np_codeocean 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- np_codeocean/upload.py +101 -63
- {np_codeocean-0.1.6.dist-info → np_codeocean-0.1.8.dist-info}/METADATA +2 -2
- {np_codeocean-0.1.6.dist-info → np_codeocean-0.1.8.dist-info}/RECORD +5 -5
- {np_codeocean-0.1.6.dist-info → np_codeocean-0.1.8.dist-info}/WHEEL +0 -0
- {np_codeocean-0.1.6.dist-info → np_codeocean-0.1.8.dist-info}/entry_points.txt +0 -0
np_codeocean/upload.py
CHANGED
|
@@ -4,9 +4,7 @@ import argparse
|
|
|
4
4
|
import contextlib
|
|
5
5
|
import csv
|
|
6
6
|
import json
|
|
7
|
-
from multiprocessing import context
|
|
8
7
|
import pathlib
|
|
9
|
-
import sys
|
|
10
8
|
import datetime
|
|
11
9
|
from pathlib import Path
|
|
12
10
|
from typing import NamedTuple
|
|
@@ -20,9 +18,8 @@ import np_tools
|
|
|
20
18
|
import doctest
|
|
21
19
|
import numpy as np
|
|
22
20
|
import polars as pl
|
|
23
|
-
|
|
24
21
|
import requests
|
|
25
|
-
|
|
22
|
+
from pydantic import ValidationError # may be returned from aind-data-transfer-service
|
|
26
23
|
|
|
27
24
|
logger = np_logging.get_logger(__name__)
|
|
28
25
|
|
|
@@ -46,13 +43,16 @@ class CodeOceanUpload(NamedTuple):
|
|
|
46
43
|
"""Directory of symlinks to behavior video files and video info jsons in
|
|
47
44
|
top-level of session folder on np-exp."""
|
|
48
45
|
|
|
49
|
-
ephys: Path
|
|
46
|
+
ephys: Path | None
|
|
50
47
|
"""Directory of symlinks to raw ephys data files on np-exp, with only one
|
|
51
48
|
`recording` per `Record Node` folder."""
|
|
52
49
|
|
|
53
50
|
job: Path
|
|
54
51
|
"""File containing job parameters for `aind-data-transfer`"""
|
|
55
52
|
|
|
53
|
+
force_cloud_sync: bool = False
|
|
54
|
+
"""If True, re-upload and re-make raw asset even if data exists on S3."""
|
|
55
|
+
|
|
56
56
|
def as_posix(path: pathlib.Path) -> str:
|
|
57
57
|
return path.as_posix()[1:]
|
|
58
58
|
|
|
@@ -83,6 +83,37 @@ def create_ephys_symlinks(session: np_session.Session, dest: Path,
|
|
|
83
83
|
if not abs_path.is_dir():
|
|
84
84
|
np_tools.symlink(as_posix(abs_path), dest / rel_path)
|
|
85
85
|
logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
|
|
86
|
+
correct_structure(dest)
|
|
87
|
+
|
|
88
|
+
def correct_structure(dest: Path) -> None:
|
|
89
|
+
"""
|
|
90
|
+
In case some probes are missing, remove device entries from structure.oebin
|
|
91
|
+
files for devices with folders that have not been preserved.
|
|
92
|
+
"""
|
|
93
|
+
logger.debug('Checking structure.oebin for missing folders...')
|
|
94
|
+
recording_dirs = dest.rglob('recording[0-9]')
|
|
95
|
+
for recording_dir in recording_dirs:
|
|
96
|
+
if not recording_dir.is_dir():
|
|
97
|
+
continue
|
|
98
|
+
oebin_path = recording_dir / 'structure.oebin'
|
|
99
|
+
if not (oebin_path.is_symlink() or oebin_path.exists()):
|
|
100
|
+
logger.warning(f'No structure.oebin found in {recording_dir}')
|
|
101
|
+
continue
|
|
102
|
+
logger.debug(f'Examining oebin: {oebin_path} for correction')
|
|
103
|
+
oebin_obj = np_tools.read_oebin(np_config.normalize_path(oebin_path.readlink()))
|
|
104
|
+
any_removed = False
|
|
105
|
+
for subdir_name in ('events', 'continuous'):
|
|
106
|
+
subdir = oebin_path.parent / subdir_name
|
|
107
|
+
# iterate over copy of list so as to not disrupt iteration when elements are removed
|
|
108
|
+
for device in [device for device in oebin_obj[subdir_name]]:
|
|
109
|
+
if not (subdir / device['folder_name']).exists():
|
|
110
|
+
logger.info(f'{device["folder_name"]} not found in {subdir}, removing from structure.oebin')
|
|
111
|
+
oebin_obj[subdir_name].remove(device)
|
|
112
|
+
any_removed = True
|
|
113
|
+
if any_removed:
|
|
114
|
+
oebin_path.unlink()
|
|
115
|
+
oebin_path.write_text(json.dumps(oebin_obj, indent=4))
|
|
116
|
+
logger.debug('Overwrote symlink to structure.oebin with corrected strcuture.oebin')
|
|
86
117
|
|
|
87
118
|
def is_behavior_video_file(path: Path) -> bool:
|
|
88
119
|
if path.is_dir() or path.suffix not in ('.mp4', '.avi', '.json'):
|
|
@@ -92,13 +123,10 @@ def is_behavior_video_file(path: Path) -> bool:
|
|
|
92
123
|
return True
|
|
93
124
|
return False
|
|
94
125
|
|
|
95
|
-
def create_behavior_symlinks(session: np_session.Session, dest: Path
|
|
126
|
+
def create_behavior_symlinks(session: np_session.Session, dest: Path) -> None:
|
|
96
127
|
"""Create symlinks in `dest` pointing to files in top-level of session
|
|
97
128
|
folder on np-exp, plus all files in `exp` subfolder, if present.
|
|
98
129
|
"""
|
|
99
|
-
if dest is None:
|
|
100
|
-
logger.debug(f"No behavior folder supplied for {session}")
|
|
101
|
-
return
|
|
102
130
|
subfolder_names = ('exp', 'qc')
|
|
103
131
|
logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
|
|
104
132
|
for src in session.npexp_path.glob('*'):
|
|
@@ -115,13 +143,10 @@ def create_behavior_symlinks(session: np_session.Session, dest: Path | None) ->
|
|
|
115
143
|
np_tools.symlink(as_posix(src), dest / src.relative_to(session.npexp_path))
|
|
116
144
|
logger.debug(f'Finished creating symlinks to {name!r} files')
|
|
117
145
|
|
|
118
|
-
def create_behavior_videos_symlinks(session: np_session.Session, dest: Path
|
|
146
|
+
def create_behavior_videos_symlinks(session: np_session.Session, dest: Path) -> None:
|
|
119
147
|
"""Create symlinks in `dest` pointing to MVR video files and info jsons in top-level of session
|
|
120
148
|
folder on np-exp.
|
|
121
149
|
"""
|
|
122
|
-
if dest is None:
|
|
123
|
-
logger.debug(f"No behavior_videos folder supplied for {session}")
|
|
124
|
-
return
|
|
125
150
|
logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
|
|
126
151
|
for src in session.npexp_path.glob('*'):
|
|
127
152
|
if is_behavior_video_file(src):
|
|
@@ -130,7 +155,7 @@ def create_behavior_videos_symlinks(session: np_session.Session, dest: Path | No
|
|
|
130
155
|
|
|
131
156
|
def is_surface_channel_recording(path_name: str) -> bool:
|
|
132
157
|
"""
|
|
133
|
-
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/
|
|
158
|
+
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
134
159
|
>>> is_surface_channel_recording(session.npexp_path.as_posix())
|
|
135
160
|
True
|
|
136
161
|
"""
|
|
@@ -138,9 +163,9 @@ def is_surface_channel_recording(path_name: str) -> bool:
|
|
|
138
163
|
|
|
139
164
|
def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
|
|
140
165
|
"""
|
|
141
|
-
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/
|
|
166
|
+
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
142
167
|
>>> get_surface_channel_start_time(session)
|
|
143
|
-
datetime.datetime(2023,
|
|
168
|
+
datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
|
|
144
169
|
"""
|
|
145
170
|
sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
|
|
146
171
|
if not sync_messages_paths:
|
|
@@ -154,43 +179,42 @@ def get_surface_channel_start_time(session: np_session.Session) -> datetime.date
|
|
|
154
179
|
timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
|
|
155
180
|
return timestamp
|
|
156
181
|
|
|
157
|
-
def
|
|
182
|
+
def get_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int | bool]:
|
|
158
183
|
"""
|
|
159
|
-
>>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/
|
|
184
|
+
>>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
|
|
160
185
|
>>> is_surface_channel_recording(path)
|
|
161
186
|
True
|
|
162
187
|
>>> upload = create_codeocean_upload(path)
|
|
163
|
-
>>> ephys_upload_csv =
|
|
188
|
+
>>> ephys_upload_csv = get_upload_csv_for_session(upload)
|
|
164
189
|
>>> ephys_upload_csv['modality0.source']
|
|
165
|
-
'//allen/programs/mindscope/workgroups/np-exp/codeocean/
|
|
190
|
+
'//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
|
|
166
191
|
>>> ephys_upload_csv.keys()
|
|
167
|
-
dict_keys(['
|
|
192
|
+
dict_keys(['platform', 'subject-id', 'force_cloud_sync', 'modality0', 'modality0.source', 'acq-datetime'])
|
|
168
193
|
"""
|
|
169
|
-
|
|
170
|
-
ephys_upload = {
|
|
171
|
-
'modality0.source': np_config.normalize_path(upload.ephys).as_posix(),
|
|
172
|
-
'modality0': 'ecephys',
|
|
173
|
-
's3-bucket': CONFIG['s3-bucket'],
|
|
174
|
-
'subject-id': str(upload.session.mouse),
|
|
194
|
+
params = {
|
|
175
195
|
'platform': 'ecephys',
|
|
196
|
+
'subject-id': str(upload.session.mouse),
|
|
197
|
+
'force_cloud_sync': upload.force_cloud_sync,
|
|
176
198
|
}
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
199
|
+
idx = 0
|
|
200
|
+
for modality_name, attr_name in {
|
|
201
|
+
'ecephys': 'ephys',
|
|
202
|
+
'behavior': 'behavior',
|
|
203
|
+
'behavior-videos': 'behavior_videos',
|
|
204
|
+
}.items():
|
|
205
|
+
if getattr(upload, attr_name) is not None:
|
|
206
|
+
params[f'modality{idx}'] = modality_name
|
|
207
|
+
params[f'modality{idx}.source'] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
|
|
208
|
+
idx += 1
|
|
209
|
+
|
|
186
210
|
if is_surface_channel_recording(upload.session.npexp_path.as_posix()):
|
|
187
211
|
date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
|
|
188
212
|
session_date_time = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
|
|
189
|
-
|
|
213
|
+
params['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
|
|
190
214
|
else:
|
|
191
|
-
|
|
215
|
+
params['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
|
|
192
216
|
|
|
193
|
-
return
|
|
217
|
+
return params
|
|
194
218
|
|
|
195
219
|
|
|
196
220
|
def is_in_hpc_upload_queue(csv_path: pathlib.Path) -> bool:
|
|
@@ -230,7 +254,8 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
|
|
|
230
254
|
to get the real error class + message."""
|
|
231
255
|
if response.status_code != 200:
|
|
232
256
|
try:
|
|
233
|
-
|
|
257
|
+
x = response.json()['data']['errors']
|
|
258
|
+
import pdb; pdb.set_trace()
|
|
234
259
|
except (KeyError, IndexError, requests.exceptions.JSONDecodeError, SyntaxError) as exc1:
|
|
235
260
|
try:
|
|
236
261
|
response.raise_for_status()
|
|
@@ -261,10 +286,13 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
|
|
|
261
286
|
),
|
|
262
287
|
)
|
|
263
288
|
_raise_for_status(post_csv_response)
|
|
264
|
-
|
|
289
|
+
|
|
290
|
+
def is_ephys_session(session: np_session.Session) -> bool:
|
|
291
|
+
return bool(next(session.npexp_path.rglob('settings.xml'), None))
|
|
292
|
+
|
|
265
293
|
def create_upload_job(upload: CodeOceanUpload) -> None:
|
|
266
294
|
logger.info(f'Creating upload job file {upload.job} for session {upload.session}...')
|
|
267
|
-
job: dict =
|
|
295
|
+
job: dict = get_upload_csv_for_session(upload)
|
|
268
296
|
with open(upload.job, 'w') as f:
|
|
269
297
|
w = csv.writer(f, lineterminator='')
|
|
270
298
|
w.writerow(job.keys())
|
|
@@ -273,28 +301,33 @@ def create_upload_job(upload: CodeOceanUpload) -> None:
|
|
|
273
301
|
w.writerow(job.values())
|
|
274
302
|
|
|
275
303
|
def create_codeocean_upload(session: str | int | np_session.Session,
|
|
276
|
-
recording_dirs: Iterable[str] | None = None
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
>>> upload.behavior is None
|
|
280
|
-
True
|
|
281
|
-
>>> upload.ephys.exists()
|
|
282
|
-
True
|
|
283
|
-
"""
|
|
304
|
+
recording_dirs: Iterable[str] | None = None,
|
|
305
|
+
force_cloud_sync: bool = False,
|
|
306
|
+
) -> CodeOceanUpload:
|
|
284
307
|
"""Create directories of symlinks to np-exp files with correct structure
|
|
285
308
|
for upload to CodeOcean.
|
|
286
309
|
|
|
287
310
|
- only one `recording` per `Record Node` folder (largest if multiple found)
|
|
288
311
|
- job file for feeding into `aind-data-transfer`
|
|
289
|
-
"""
|
|
290
312
|
|
|
291
|
-
|
|
313
|
+
>>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
314
|
+
>>> upload.behavior is None
|
|
315
|
+
True
|
|
316
|
+
>>> upload.ephys.exists()
|
|
317
|
+
True
|
|
318
|
+
"""
|
|
292
319
|
|
|
293
|
-
if is_surface_channel_recording(session
|
|
320
|
+
if is_surface_channel_recording(str(session)):
|
|
321
|
+
session = np_session.Session(session)
|
|
322
|
+
if not is_surface_channel_recording(session.npexp_path.name):
|
|
323
|
+
# manually assign surface channel path
|
|
324
|
+
session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
|
|
325
|
+
assert session.npexp_path.exists(), f"Surface channel path {session.npexp_path} does not exist in same folder as main session recording"
|
|
294
326
|
root = np_session.NPEXP_PATH / 'codeocean' / f'{session.folder}_surface_channels'
|
|
295
327
|
behavior = None
|
|
296
328
|
behavior_videos = None
|
|
297
329
|
else:
|
|
330
|
+
session = np_session.Session(session)
|
|
298
331
|
root = np_session.NPEXP_PATH / 'codeocean' / session.folder
|
|
299
332
|
behavior = np_config.normalize_path(root / 'behavior')
|
|
300
333
|
behavior_videos = behavior.with_name('behavior-videos')
|
|
@@ -305,19 +338,24 @@ def create_codeocean_upload(session: str | int | np_session.Session,
|
|
|
305
338
|
session = session,
|
|
306
339
|
behavior = behavior,
|
|
307
340
|
behavior_videos = behavior_videos,
|
|
308
|
-
ephys = np_config.normalize_path(root / 'ephys'),
|
|
341
|
+
ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
|
|
309
342
|
job = np_config.normalize_path(root / 'upload.csv'),
|
|
343
|
+
force_cloud_sync=force_cloud_sync,
|
|
310
344
|
)
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
345
|
+
if upload.ephys:
|
|
346
|
+
create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
|
|
347
|
+
if upload.behavior:
|
|
348
|
+
create_behavior_symlinks(upload.session, upload.behavior)
|
|
349
|
+
if upload.behavior_videos:
|
|
350
|
+
create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
|
|
315
351
|
create_upload_job(upload)
|
|
316
352
|
return upload
|
|
317
353
|
|
|
318
354
|
def upload_session(session: str | int | pathlib.Path | np_session.Session,
|
|
319
|
-
recording_dirs: Iterable[str] | None = None
|
|
320
|
-
|
|
355
|
+
recording_dirs: Iterable[str] | None = None,
|
|
356
|
+
force: bool = False,
|
|
357
|
+
) -> None:
|
|
358
|
+
upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs, force_cloud_sync=force)
|
|
321
359
|
np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
|
|
322
360
|
put_csv_for_hpc_upload(upload.job)
|
|
323
361
|
logger.debug(f'Submitted {upload.session} to hpc upload queue')
|
|
@@ -336,6 +374,7 @@ def main() -> None:
|
|
|
336
374
|
def parse_args() -> argparse.Namespace:
|
|
337
375
|
parser = argparse.ArgumentParser(description="Upload a session to CodeOcean")
|
|
338
376
|
parser.add_argument('session', help="session ID (lims or np-exp foldername) or path to session folder")
|
|
377
|
+
parser.add_argument('--force', action='store_true', help="enable `force_cloud_sync` option, re-uploading and re-making raw asset even if data exists on S3")
|
|
339
378
|
parser.add_argument('recording_dirs', nargs='*', type=list, help="[optional] specific recording directories to upload - for use with split recordings only.")
|
|
340
379
|
return parser.parse_args()
|
|
341
380
|
|
|
@@ -343,6 +382,5 @@ if __name__ == '__main__':
|
|
|
343
382
|
import doctest
|
|
344
383
|
|
|
345
384
|
doctest.testmod(
|
|
346
|
-
optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE)
|
|
347
|
-
)
|
|
348
|
-
main()
|
|
385
|
+
optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
|
|
386
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: np_codeocean
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean
|
|
5
5
|
Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>
|
|
6
6
|
License: MIT
|
|
@@ -19,7 +19,7 @@ Requires-Dist: np-tools>=0.1.21
|
|
|
19
19
|
Requires-Dist: np-config>=0.4.24
|
|
20
20
|
Requires-Dist: requests>=2.31.0
|
|
21
21
|
Requires-Dist: npc-session>=0.1.34
|
|
22
|
-
Requires-Dist: polars>=0.
|
|
22
|
+
Requires-Dist: polars>=0.20.16
|
|
23
23
|
Requires-Dist: bump>=1.3.2; extra == "dev"
|
|
24
24
|
Requires-Dist: pdm>=2.4.9; extra == "dev"
|
|
25
25
|
Provides-Extra: dev
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
np_codeocean-0.1.
|
|
2
|
-
np_codeocean-0.1.
|
|
3
|
-
np_codeocean-0.1.
|
|
1
|
+
np_codeocean-0.1.8.dist-info/METADATA,sha256=hvXhhe5Fv5FEzz17K1RK6wtpJcyKhJKL_qS9JT_OeZM,2488
|
|
2
|
+
np_codeocean-0.1.8.dist-info/WHEEL,sha256=N2J68yzZqJh3mI_Wg92rwhw0rtJDFpZj9bwQIMJgaVg,90
|
|
3
|
+
np_codeocean-0.1.8.dist-info/entry_points.txt,sha256=T3Is83nShuWFYg7bTLxVhRWi15OVxO99WYcUg3-xURM,113
|
|
4
4
|
np_codeocean/__init__.py,sha256=BYXXoFDa1J_Lv-YG52Ch6k5L4DMCEPXtfHsrMmMeST4,66
|
|
5
5
|
np_codeocean/scripts/upload_sessions.py,sha256=1_aqoBxAkB_VpRKYqyPsEQBDGvgyAHXAkIJA0ZT2Vb0,1490
|
|
6
|
-
np_codeocean/upload.py,sha256=
|
|
6
|
+
np_codeocean/upload.py,sha256=qSxujYOMjuaz_NMJa_v9ZhuuTj9IiSs2qIxy8zWXeB4,17487
|
|
7
7
|
np_codeocean/upload_one.py,sha256=-egSjXvA0bBfshbY3D2TZ0M0GfLokFBZ3mSCm_gOGXE,7367
|
|
8
8
|
np_codeocean/utils.py,sha256=p0pmljaH4j7RjRsc4TPYXPpLhq-2ScvnfyXOYFSFBTM,3375
|
|
9
|
-
np_codeocean-0.1.
|
|
9
|
+
np_codeocean-0.1.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|