np_codeocean 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {np_codeocean-0.1.6 → np_codeocean-0.1.7}/PKG-INFO +2 -2
- {np_codeocean-0.1.6 → np_codeocean-0.1.7}/pyproject.toml +2 -2
- {np_codeocean-0.1.6 → np_codeocean-0.1.7}/src/np_codeocean/upload.py +85 -51
- {np_codeocean-0.1.6 → np_codeocean-0.1.7}/README.md +0 -0
- {np_codeocean-0.1.6 → np_codeocean-0.1.7}/src/np_codeocean/__init__.py +0 -0
- {np_codeocean-0.1.6 → np_codeocean-0.1.7}/src/np_codeocean/scripts/upload_sessions.py +0 -0
- {np_codeocean-0.1.6 → np_codeocean-0.1.7}/src/np_codeocean/upload_one.py +0 -0
- {np_codeocean-0.1.6 → np_codeocean-0.1.7}/src/np_codeocean/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: np_codeocean
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean
|
|
5
5
|
Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>
|
|
6
6
|
License: MIT
|
|
@@ -19,7 +19,7 @@ Requires-Dist: np-tools>=0.1.21
|
|
|
19
19
|
Requires-Dist: np-config>=0.4.24
|
|
20
20
|
Requires-Dist: requests>=2.31.0
|
|
21
21
|
Requires-Dist: npc-session>=0.1.34
|
|
22
|
-
Requires-Dist: polars>=0.
|
|
22
|
+
Requires-Dist: polars>=0.20.16
|
|
23
23
|
Requires-Dist: bump>=1.3.2; extra == "dev"
|
|
24
24
|
Requires-Dist: pdm>=2.4.9; extra == "dev"
|
|
25
25
|
Provides-Extra: dev
|
|
@@ -40,7 +40,7 @@ composite = [
|
|
|
40
40
|
|
|
41
41
|
[project]
|
|
42
42
|
name = "np_codeocean"
|
|
43
|
-
version = "0.1.
|
|
43
|
+
version = "0.1.7"
|
|
44
44
|
description = "Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean"
|
|
45
45
|
authors = [
|
|
46
46
|
{ name = "Ben Hardcastle", email = "ben.hardcastle@alleninstitute.org" },
|
|
@@ -51,7 +51,7 @@ dependencies = [
|
|
|
51
51
|
"np-config>=0.4.24",
|
|
52
52
|
"requests>=2.31.0",
|
|
53
53
|
"npc-session>=0.1.34",
|
|
54
|
-
"polars>=0.
|
|
54
|
+
"polars>=0.20.16",
|
|
55
55
|
]
|
|
56
56
|
requires-python = ">=3.9"
|
|
57
57
|
readme = "README.md"
|
|
@@ -4,9 +4,7 @@ import argparse
|
|
|
4
4
|
import contextlib
|
|
5
5
|
import csv
|
|
6
6
|
import json
|
|
7
|
-
from multiprocessing import context
|
|
8
7
|
import pathlib
|
|
9
|
-
import sys
|
|
10
8
|
import datetime
|
|
11
9
|
from pathlib import Path
|
|
12
10
|
from typing import NamedTuple
|
|
@@ -20,9 +18,8 @@ import np_tools
|
|
|
20
18
|
import doctest
|
|
21
19
|
import numpy as np
|
|
22
20
|
import polars as pl
|
|
23
|
-
|
|
24
21
|
import requests
|
|
25
|
-
|
|
22
|
+
from pydantic import ValidationError # may be returned from aind-data-transfer-service
|
|
26
23
|
|
|
27
24
|
logger = np_logging.get_logger(__name__)
|
|
28
25
|
|
|
@@ -46,13 +43,16 @@ class CodeOceanUpload(NamedTuple):
|
|
|
46
43
|
"""Directory of symlinks to behavior video files and video info jsons in
|
|
47
44
|
top-level of session folder on np-exp."""
|
|
48
45
|
|
|
49
|
-
ephys: Path
|
|
46
|
+
ephys: Path | None
|
|
50
47
|
"""Directory of symlinks to raw ephys data files on np-exp, with only one
|
|
51
48
|
`recording` per `Record Node` folder."""
|
|
52
49
|
|
|
53
50
|
job: Path
|
|
54
51
|
"""File containing job parameters for `aind-data-transfer`"""
|
|
55
52
|
|
|
53
|
+
force_cloud_sync: bool = False
|
|
54
|
+
"""If True, re-upload and re-make raw asset even if data exists on S3."""
|
|
55
|
+
|
|
56
56
|
def as_posix(path: pathlib.Path) -> str:
|
|
57
57
|
return path.as_posix()[1:]
|
|
58
58
|
|
|
@@ -83,6 +83,29 @@ def create_ephys_symlinks(session: np_session.Session, dest: Path,
|
|
|
83
83
|
if not abs_path.is_dir():
|
|
84
84
|
np_tools.symlink(as_posix(abs_path), dest / rel_path)
|
|
85
85
|
logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
|
|
86
|
+
correct_structure(dest)
|
|
87
|
+
|
|
88
|
+
def correct_structure(dest: Path) -> None:
|
|
89
|
+
"""
|
|
90
|
+
In case some probes are missing, remove device entries from structure.oebin files with folders that don't actually exist.
|
|
91
|
+
"""
|
|
92
|
+
logger.debug('Creating modified structure.oebin')
|
|
93
|
+
oebin_paths = dest.rglob('recording[0-9]*/structure.oebin')
|
|
94
|
+
for oebin_path in oebin_paths:
|
|
95
|
+
logger.debug(f'Examining oebin: {oebin_path} for correction')
|
|
96
|
+
oebin_obj = np_tools.read_oebin(np_config.normalize_path(oebin_path.readlink()))
|
|
97
|
+
|
|
98
|
+
for subdir_name in ('events', 'continuous'):
|
|
99
|
+
subdir = oebin_path.parent / subdir_name
|
|
100
|
+
# iterate over copy of list so as to not disrupt iteration when elements are removed
|
|
101
|
+
for device in [device for device in oebin_obj[subdir_name]]:
|
|
102
|
+
if not (subdir / device['folder_name']).exists():
|
|
103
|
+
logger.info(f'{device["folder_name"]} not found in {subdir}, removing from structure.oebin')
|
|
104
|
+
oebin_obj[subdir_name].remove(device)
|
|
105
|
+
|
|
106
|
+
oebin_path.unlink()
|
|
107
|
+
oebin_path.write_text(json.dumps(oebin_obj, indent=4))
|
|
108
|
+
logger.debug('Overwrote symlink to structure.oebin with corrected strcuture.oebin')
|
|
86
109
|
|
|
87
110
|
def is_behavior_video_file(path: Path) -> bool:
|
|
88
111
|
if path.is_dir() or path.suffix not in ('.mp4', '.avi', '.json'):
|
|
@@ -130,7 +153,7 @@ def create_behavior_videos_symlinks(session: np_session.Session, dest: Path | No
|
|
|
130
153
|
|
|
131
154
|
def is_surface_channel_recording(path_name: str) -> bool:
|
|
132
155
|
"""
|
|
133
|
-
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/
|
|
156
|
+
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
134
157
|
>>> is_surface_channel_recording(session.npexp_path.as_posix())
|
|
135
158
|
True
|
|
136
159
|
"""
|
|
@@ -138,9 +161,9 @@ def is_surface_channel_recording(path_name: str) -> bool:
|
|
|
138
161
|
|
|
139
162
|
def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
|
|
140
163
|
"""
|
|
141
|
-
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/
|
|
164
|
+
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
142
165
|
>>> get_surface_channel_start_time(session)
|
|
143
|
-
datetime.datetime(2023,
|
|
166
|
+
datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
|
|
144
167
|
"""
|
|
145
168
|
sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
|
|
146
169
|
if not sync_messages_paths:
|
|
@@ -154,43 +177,42 @@ def get_surface_channel_start_time(session: np_session.Session) -> datetime.date
|
|
|
154
177
|
timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
|
|
155
178
|
return timestamp
|
|
156
179
|
|
|
157
|
-
def
|
|
180
|
+
def get_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int | bool]:
|
|
158
181
|
"""
|
|
159
|
-
>>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/
|
|
182
|
+
>>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
|
|
160
183
|
>>> is_surface_channel_recording(path)
|
|
161
184
|
True
|
|
162
185
|
>>> upload = create_codeocean_upload(path)
|
|
163
|
-
>>> ephys_upload_csv =
|
|
186
|
+
>>> ephys_upload_csv = get_upload_csv_for_session(upload)
|
|
164
187
|
>>> ephys_upload_csv['modality0.source']
|
|
165
|
-
'//allen/programs/mindscope/workgroups/np-exp/codeocean/
|
|
188
|
+
'//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
|
|
166
189
|
>>> ephys_upload_csv.keys()
|
|
167
|
-
dict_keys(['
|
|
190
|
+
dict_keys(['platform', 'subject-id', 'force_cloud_sync', 'modality0', 'modality0.source', 'acq-datetime'])
|
|
168
191
|
"""
|
|
169
|
-
|
|
170
|
-
ephys_upload = {
|
|
171
|
-
'modality0.source': np_config.normalize_path(upload.ephys).as_posix(),
|
|
172
|
-
'modality0': 'ecephys',
|
|
173
|
-
's3-bucket': CONFIG['s3-bucket'],
|
|
174
|
-
'subject-id': str(upload.session.mouse),
|
|
192
|
+
params = {
|
|
175
193
|
'platform': 'ecephys',
|
|
194
|
+
'subject-id': str(upload.session.mouse),
|
|
195
|
+
'force_cloud_sync': upload.force_cloud_sync,
|
|
176
196
|
}
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
197
|
+
idx = 0
|
|
198
|
+
for modality_name, attr_name in {
|
|
199
|
+
'ecephys': 'ephys',
|
|
200
|
+
'behavior': 'behavior',
|
|
201
|
+
'behavior-videos': 'behavior_videos',
|
|
202
|
+
}.items():
|
|
203
|
+
if getattr(upload, attr_name) is not None:
|
|
204
|
+
params[f'modality{idx}'] = modality_name
|
|
205
|
+
params[f'modality{idx}.source'] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
|
|
206
|
+
idx += 1
|
|
207
|
+
|
|
186
208
|
if is_surface_channel_recording(upload.session.npexp_path.as_posix()):
|
|
187
209
|
date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
|
|
188
210
|
session_date_time = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
|
|
189
|
-
|
|
211
|
+
params['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
|
|
190
212
|
else:
|
|
191
|
-
|
|
213
|
+
params['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
|
|
192
214
|
|
|
193
|
-
return
|
|
215
|
+
return params
|
|
194
216
|
|
|
195
217
|
|
|
196
218
|
def is_in_hpc_upload_queue(csv_path: pathlib.Path) -> bool:
|
|
@@ -230,7 +252,8 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
|
|
|
230
252
|
to get the real error class + message."""
|
|
231
253
|
if response.status_code != 200:
|
|
232
254
|
try:
|
|
233
|
-
|
|
255
|
+
x = response.json()['data']['errors']
|
|
256
|
+
import pdb; pdb.set_trace()
|
|
234
257
|
except (KeyError, IndexError, requests.exceptions.JSONDecodeError, SyntaxError) as exc1:
|
|
235
258
|
try:
|
|
236
259
|
response.raise_for_status()
|
|
@@ -261,10 +284,13 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
|
|
|
261
284
|
),
|
|
262
285
|
)
|
|
263
286
|
_raise_for_status(post_csv_response)
|
|
264
|
-
|
|
287
|
+
|
|
288
|
+
def is_ephys_session(session: np_session.Session) -> bool:
|
|
289
|
+
return bool(next(session.npexp_path.rglob('settings.xml'), None))
|
|
290
|
+
|
|
265
291
|
def create_upload_job(upload: CodeOceanUpload) -> None:
|
|
266
292
|
logger.info(f'Creating upload job file {upload.job} for session {upload.session}...')
|
|
267
|
-
job: dict =
|
|
293
|
+
job: dict = get_upload_csv_for_session(upload)
|
|
268
294
|
with open(upload.job, 'w') as f:
|
|
269
295
|
w = csv.writer(f, lineterminator='')
|
|
270
296
|
w.writerow(job.keys())
|
|
@@ -273,28 +299,33 @@ def create_upload_job(upload: CodeOceanUpload) -> None:
|
|
|
273
299
|
w.writerow(job.values())
|
|
274
300
|
|
|
275
301
|
def create_codeocean_upload(session: str | int | np_session.Session,
|
|
276
|
-
recording_dirs: Iterable[str] | None = None
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
>>> upload.behavior is None
|
|
280
|
-
True
|
|
281
|
-
>>> upload.ephys.exists()
|
|
282
|
-
True
|
|
283
|
-
"""
|
|
302
|
+
recording_dirs: Iterable[str] | None = None,
|
|
303
|
+
force_cloud_sync: bool = False,
|
|
304
|
+
) -> CodeOceanUpload:
|
|
284
305
|
"""Create directories of symlinks to np-exp files with correct structure
|
|
285
306
|
for upload to CodeOcean.
|
|
286
307
|
|
|
287
308
|
- only one `recording` per `Record Node` folder (largest if multiple found)
|
|
288
309
|
- job file for feeding into `aind-data-transfer`
|
|
289
|
-
"""
|
|
290
310
|
|
|
291
|
-
|
|
311
|
+
>>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
312
|
+
>>> upload.behavior is None
|
|
313
|
+
True
|
|
314
|
+
>>> upload.ephys.exists()
|
|
315
|
+
True
|
|
316
|
+
"""
|
|
292
317
|
|
|
293
|
-
if is_surface_channel_recording(session
|
|
318
|
+
if is_surface_channel_recording(str(session)):
|
|
319
|
+
session = np_session.Session(session)
|
|
320
|
+
if not is_surface_channel_recording(session.npexp_path.name):
|
|
321
|
+
# manually assign surface channel path
|
|
322
|
+
session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
|
|
323
|
+
assert session.npexp_path.exists(), f"Surface channel path {session.npexp_path} does not exist in same folder as main session recording"
|
|
294
324
|
root = np_session.NPEXP_PATH / 'codeocean' / f'{session.folder}_surface_channels'
|
|
295
325
|
behavior = None
|
|
296
326
|
behavior_videos = None
|
|
297
327
|
else:
|
|
328
|
+
session = np_session.Session(session)
|
|
298
329
|
root = np_session.NPEXP_PATH / 'codeocean' / session.folder
|
|
299
330
|
behavior = np_config.normalize_path(root / 'behavior')
|
|
300
331
|
behavior_videos = behavior.with_name('behavior-videos')
|
|
@@ -305,8 +336,9 @@ def create_codeocean_upload(session: str | int | np_session.Session,
|
|
|
305
336
|
session = session,
|
|
306
337
|
behavior = behavior,
|
|
307
338
|
behavior_videos = behavior_videos,
|
|
308
|
-
ephys = np_config.normalize_path(root / 'ephys'),
|
|
339
|
+
ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
|
|
309
340
|
job = np_config.normalize_path(root / 'upload.csv'),
|
|
341
|
+
force_cloud_sync=force_cloud_sync,
|
|
310
342
|
)
|
|
311
343
|
|
|
312
344
|
create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
|
|
@@ -316,8 +348,10 @@ def create_codeocean_upload(session: str | int | np_session.Session,
|
|
|
316
348
|
return upload
|
|
317
349
|
|
|
318
350
|
def upload_session(session: str | int | pathlib.Path | np_session.Session,
|
|
319
|
-
recording_dirs: Iterable[str] | None = None
|
|
320
|
-
|
|
351
|
+
recording_dirs: Iterable[str] | None = None,
|
|
352
|
+
force: bool = False,
|
|
353
|
+
) -> None:
|
|
354
|
+
upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs, force_cloud_sync=force)
|
|
321
355
|
np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
|
|
322
356
|
put_csv_for_hpc_upload(upload.job)
|
|
323
357
|
logger.debug(f'Submitted {upload.session} to hpc upload queue')
|
|
@@ -336,6 +370,7 @@ def main() -> None:
|
|
|
336
370
|
def parse_args() -> argparse.Namespace:
|
|
337
371
|
parser = argparse.ArgumentParser(description="Upload a session to CodeOcean")
|
|
338
372
|
parser.add_argument('session', help="session ID (lims or np-exp foldername) or path to session folder")
|
|
373
|
+
parser.add_argument('--force', action='store_true', help="enable `force_cloud_sync` option, re-uploading and re-making raw asset even if data exists on S3")
|
|
339
374
|
parser.add_argument('recording_dirs', nargs='*', type=list, help="[optional] specific recording directories to upload - for use with split recordings only.")
|
|
340
375
|
return parser.parse_args()
|
|
341
376
|
|
|
@@ -343,6 +378,5 @@ if __name__ == '__main__':
|
|
|
343
378
|
import doctest
|
|
344
379
|
|
|
345
380
|
doctest.testmod(
|
|
346
|
-
optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE)
|
|
347
|
-
)
|
|
348
|
-
main()
|
|
381
|
+
optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
|
|
382
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|