np_codeocean 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- np_codeocean/__init__.py +1 -1
- np_codeocean/np_session_utils.py +367 -0
- np_codeocean/scripts/fix_ephys_data_on_s3.py +20 -0
- np_codeocean/scripts/upload_dynamic_routing_behavior.py +416 -0
- np_codeocean/scripts/upload_dynamic_routing_ecephys.py +215 -0
- np_codeocean/scripts/upload_ethan_analysis_files.py +22 -0
- np_codeocean/utils.py +452 -94
- {np_codeocean-0.2.0.dist-info → np_codeocean-0.3.0.dist-info}/METADATA +16 -7
- np_codeocean-0.3.0.dist-info/RECORD +12 -0
- {np_codeocean-0.2.0.dist-info → np_codeocean-0.3.0.dist-info}/WHEEL +1 -1
- np_codeocean-0.3.0.dist-info/entry_points.txt +7 -0
- np_codeocean/upload.py +0 -359
- np_codeocean/upload_one.py +0 -183
- np_codeocean-0.2.0.dist-info/RECORD +0 -9
- np_codeocean-0.2.0.dist-info/entry_points.txt +0 -4
- /np_codeocean/scripts/{upload_sessions.py → upload_split_recordings_example.py} +0 -0
np_codeocean/upload.py
DELETED
|
@@ -1,359 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import argparse
|
|
4
|
-
import contextlib
|
|
5
|
-
import csv
|
|
6
|
-
import json
|
|
7
|
-
import pathlib
|
|
8
|
-
import datetime
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import NamedTuple
|
|
11
|
-
from collections.abc import Iterable
|
|
12
|
-
|
|
13
|
-
import np_config
|
|
14
|
-
import np_logging
|
|
15
|
-
import np_session
|
|
16
|
-
import npc_session
|
|
17
|
-
import np_tools
|
|
18
|
-
import doctest
|
|
19
|
-
import numpy as np
|
|
20
|
-
import polars as pl
|
|
21
|
-
import requests
|
|
22
|
-
from pydantic import ValidationError # may be returned from aind-data-transfer-service
|
|
23
|
-
|
|
24
|
-
logger = np_logging.get_logger(__name__)
|
|
25
|
-
|
|
26
|
-
CONFIG = np_config.fetch('/projects/np_codeocean')
|
|
27
|
-
AIND_DATA_TRANSFER_SERVICE = "http://aind-data-transfer-service"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class CodeOceanUpload(NamedTuple):
|
|
31
|
-
"""Objects required for uploading a Mindscope Neuropixels session to CodeOcean.
|
|
32
|
-
Paths are symlinks to files on np-exp.
|
|
33
|
-
"""
|
|
34
|
-
session: np_session.Session
|
|
35
|
-
"""Session object that the paths belong to."""
|
|
36
|
-
|
|
37
|
-
behavior: Path | None
|
|
38
|
-
"""Directory of symlinks to files in top-level of session folder on np-exp,
|
|
39
|
-
plus all files in `exp` and `qc` subfolders, if present. Excludes behavior video files
|
|
40
|
-
and video info jsons."""
|
|
41
|
-
|
|
42
|
-
behavior_videos: Path | None
|
|
43
|
-
"""Directory of symlinks to behavior video files and video info jsons in
|
|
44
|
-
top-level of session folder on np-exp."""
|
|
45
|
-
|
|
46
|
-
ephys: Path | None
|
|
47
|
-
"""Directory of symlinks to raw ephys data files on np-exp, with only one
|
|
48
|
-
`recording` per `Record Node` folder."""
|
|
49
|
-
|
|
50
|
-
job: Path
|
|
51
|
-
"""File containing job parameters for `aind-data-transfer`"""
|
|
52
|
-
|
|
53
|
-
force_cloud_sync: bool = False
|
|
54
|
-
"""If True, re-upload and re-make raw asset even if data exists on S3."""
|
|
55
|
-
|
|
56
|
-
def as_posix(path: pathlib.Path) -> str:
|
|
57
|
-
return path.as_posix()[1:]
|
|
58
|
-
|
|
59
|
-
def create_ephys_symlinks(session: np_session.Session, dest: Path,
|
|
60
|
-
recording_dirs: Iterable[str] | None = None) -> None:
|
|
61
|
-
"""Create symlinks in `dest` pointing to raw ephys data files on np-exp, with only one
|
|
62
|
-
`recording` per `Record Node` folder (the largest, if multiple found).
|
|
63
|
-
|
|
64
|
-
Relative paths are preserved, so `dest` will essentially be a merge of
|
|
65
|
-
_probeABC / _probeDEF folders.
|
|
66
|
-
|
|
67
|
-
Top-level items other than `Record Node *` folders are excluded.
|
|
68
|
-
"""
|
|
69
|
-
root_path = session.npexp_path
|
|
70
|
-
if isinstance(session, np_session.PipelineSession) and session.lims_path is not None:
|
|
71
|
-
# if ephys has been uploaded to lims, use lims path, as large raw data may have
|
|
72
|
-
# been deleted from np-exp
|
|
73
|
-
if any(
|
|
74
|
-
np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
|
|
75
|
-
session.npexp_path, specific_recording_dir_names=recording_dirs
|
|
76
|
-
)
|
|
77
|
-
):
|
|
78
|
-
root_path = session.lims_path
|
|
79
|
-
logger.info(f'Creating symlinks to raw ephys data files in {root_path}...')
|
|
80
|
-
for abs_path, rel_path in np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
|
|
81
|
-
root_path, specific_recording_dir_names=recording_dirs
|
|
82
|
-
):
|
|
83
|
-
if not abs_path.is_dir():
|
|
84
|
-
np_tools.symlink(as_posix(abs_path), dest / rel_path)
|
|
85
|
-
logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
|
|
86
|
-
|
|
87
|
-
def is_behavior_video_file(path: Path) -> bool:
|
|
88
|
-
if path.is_dir() or path.suffix not in ('.mp4', '.avi', '.json'):
|
|
89
|
-
return False
|
|
90
|
-
with contextlib.suppress(ValueError):
|
|
91
|
-
_ = npc_session.extract_mvr_camera_name(path.as_posix())
|
|
92
|
-
return True
|
|
93
|
-
return False
|
|
94
|
-
|
|
95
|
-
def create_behavior_symlinks(session: np_session.Session, dest: Path | None) -> None:
|
|
96
|
-
"""Create symlinks in `dest` pointing to files in top-level of session
|
|
97
|
-
folder on np-exp, plus all files in `exp` subfolder, if present.
|
|
98
|
-
"""
|
|
99
|
-
if dest is None:
|
|
100
|
-
logger.debug(f"No behavior folder supplied for {session}")
|
|
101
|
-
return
|
|
102
|
-
subfolder_names = ('exp', 'qc')
|
|
103
|
-
logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
|
|
104
|
-
for src in session.npexp_path.glob('*'):
|
|
105
|
-
if not src.is_dir() and not is_behavior_video_file(src):
|
|
106
|
-
np_tools.symlink(as_posix(src), dest / src.relative_to(session.npexp_path))
|
|
107
|
-
logger.debug(f'Finished creating symlinks to top-level files in {session.npexp_path}')
|
|
108
|
-
|
|
109
|
-
for name in subfolder_names:
|
|
110
|
-
subfolder = session.npexp_path / name
|
|
111
|
-
if not subfolder.exists():
|
|
112
|
-
continue
|
|
113
|
-
for src in subfolder.rglob('*'):
|
|
114
|
-
if not src.is_dir():
|
|
115
|
-
np_tools.symlink(as_posix(src), dest / src.relative_to(session.npexp_path))
|
|
116
|
-
logger.debug(f'Finished creating symlinks to {name!r} files')
|
|
117
|
-
|
|
118
|
-
def create_behavior_videos_symlinks(session: np_session.Session, dest: Path | None) -> None:
|
|
119
|
-
"""Create symlinks in `dest` pointing to MVR video files and info jsons in top-level of session
|
|
120
|
-
folder on np-exp.
|
|
121
|
-
"""
|
|
122
|
-
if dest is None:
|
|
123
|
-
logger.debug(f"No behavior_videos folder supplied for {session}")
|
|
124
|
-
return
|
|
125
|
-
logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
|
|
126
|
-
for src in session.npexp_path.glob('*'):
|
|
127
|
-
if is_behavior_video_file(src):
|
|
128
|
-
np_tools.symlink(as_posix(src), dest / src.relative_to(session.npexp_path))
|
|
129
|
-
logger.debug(f'Finished creating symlinks to behavior video files in {session.npexp_path}')
|
|
130
|
-
|
|
131
|
-
def is_surface_channel_recording(path_name: str) -> bool:
|
|
132
|
-
"""
|
|
133
|
-
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
134
|
-
>>> is_surface_channel_recording(session.npexp_path.as_posix())
|
|
135
|
-
True
|
|
136
|
-
"""
|
|
137
|
-
return 'surface_channels' in path_name.lower()
|
|
138
|
-
|
|
139
|
-
def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
|
|
140
|
-
"""
|
|
141
|
-
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
142
|
-
>>> get_surface_channel_start_time(session)
|
|
143
|
-
datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
|
|
144
|
-
"""
|
|
145
|
-
sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
|
|
146
|
-
if not sync_messages_paths:
|
|
147
|
-
raise ValueError(f'No sync messages txt found for surface channel session {session}')
|
|
148
|
-
sync_messages_path = sync_messages_paths[0]
|
|
149
|
-
|
|
150
|
-
with open(sync_messages_path, 'r') as f:
|
|
151
|
-
software_time_line = f.readlines()[0]
|
|
152
|
-
|
|
153
|
-
timestamp_value = float(software_time_line[software_time_line.index(':')+2:].strip())
|
|
154
|
-
timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
|
|
155
|
-
return timestamp
|
|
156
|
-
|
|
157
|
-
def get_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int | bool]:
|
|
158
|
-
"""
|
|
159
|
-
>>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
|
|
160
|
-
>>> is_surface_channel_recording(path)
|
|
161
|
-
True
|
|
162
|
-
>>> upload = create_codeocean_upload(path)
|
|
163
|
-
>>> ephys_upload_csv = get_upload_csv_for_session(upload)
|
|
164
|
-
>>> ephys_upload_csv['modality0.source']
|
|
165
|
-
'//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
|
|
166
|
-
>>> ephys_upload_csv.keys()
|
|
167
|
-
dict_keys(['platform', 'subject-id', 'force_cloud_sync', 'modality0', 'modality0.source', 'acq-datetime'])
|
|
168
|
-
"""
|
|
169
|
-
params = {
|
|
170
|
-
'platform': 'ecephys',
|
|
171
|
-
'subject-id': str(upload.session.mouse),
|
|
172
|
-
'force_cloud_sync': upload.force_cloud_sync,
|
|
173
|
-
}
|
|
174
|
-
idx = 0
|
|
175
|
-
for modality_name, attr_name in {
|
|
176
|
-
'ecephys': 'ephys',
|
|
177
|
-
'behavior': 'behavior',
|
|
178
|
-
'behavior-videos': 'behavior_videos',
|
|
179
|
-
}.items():
|
|
180
|
-
if getattr(upload, attr_name) is not None:
|
|
181
|
-
params[f'modality{idx}'] = modality_name
|
|
182
|
-
params[f'modality{idx}.source'] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
|
|
183
|
-
idx += 1
|
|
184
|
-
|
|
185
|
-
if is_surface_channel_recording(upload.session.npexp_path.as_posix()):
|
|
186
|
-
date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
|
|
187
|
-
session_date_time = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
|
|
188
|
-
params['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
|
|
189
|
-
else:
|
|
190
|
-
params['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
|
|
191
|
-
|
|
192
|
-
return params
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
def is_in_hpc_upload_queue(csv_path: pathlib.Path) -> bool:
|
|
196
|
-
"""Check if an upload job has been submitted to the hpc upload queue.
|
|
197
|
-
|
|
198
|
-
- currently assumes one job per csv
|
|
199
|
-
- does not check status (job may be FINISHED rather than RUNNING)
|
|
200
|
-
|
|
201
|
-
>>> is_in_hpc_upload_queue("//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_664851_20231114/upload.csv")
|
|
202
|
-
False
|
|
203
|
-
"""
|
|
204
|
-
# get subject-id, acq-datetime from csv
|
|
205
|
-
df = pl.read_csv(csv_path, eol_char='\r')
|
|
206
|
-
for col in df.get_columns():
|
|
207
|
-
if col.name.startswith('subject') and col.name.endswith('id'):
|
|
208
|
-
subject = npc_session.SubjectRecord(col[0])
|
|
209
|
-
continue
|
|
210
|
-
if col.name.startswith('acq') and 'datetime' in col.name.lower():
|
|
211
|
-
dt = npc_session.DatetimeRecord(col[0])
|
|
212
|
-
continue
|
|
213
|
-
partial_session_id = f"{subject}_{dt.replace(' ', '_').replace(':', '-')}"
|
|
214
|
-
|
|
215
|
-
jobs_response = requests.get(f"{AIND_DATA_TRANSFER_SERVICE}/jobs")
|
|
216
|
-
jobs_response.raise_for_status()
|
|
217
|
-
return partial_session_id in jobs_response.content.decode()
|
|
218
|
-
|
|
219
|
-
def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
|
|
220
|
-
"""Submit a single job upload csv to the aind-data-transfer-service, for
|
|
221
|
-
upload to S3 on the hpc.
|
|
222
|
-
|
|
223
|
-
- gets validated version of csv
|
|
224
|
-
- checks session is not already being uploaded
|
|
225
|
-
- submits csv via http request
|
|
226
|
-
"""
|
|
227
|
-
def _raise_for_status(response: requests.Response) -> None:
|
|
228
|
-
"""pydantic validation errors are returned as strings that can be eval'd
|
|
229
|
-
to get the real error class + message."""
|
|
230
|
-
if response.status_code != 200:
|
|
231
|
-
try:
|
|
232
|
-
x = response.json()['data']['errors']
|
|
233
|
-
import pdb; pdb.set_trace()
|
|
234
|
-
except (KeyError, IndexError, requests.exceptions.JSONDecodeError, SyntaxError) as exc1:
|
|
235
|
-
try:
|
|
236
|
-
response.raise_for_status()
|
|
237
|
-
except requests.exceptions.HTTPError as exc2:
|
|
238
|
-
raise exc2 from exc1
|
|
239
|
-
|
|
240
|
-
with open(csv_path, 'rb') as f:
|
|
241
|
-
validate_csv_response = requests.post(
|
|
242
|
-
url=f"{AIND_DATA_TRANSFER_SERVICE}/api/validate_csv",
|
|
243
|
-
files=dict(file=f),
|
|
244
|
-
)
|
|
245
|
-
_raise_for_status(validate_csv_response)
|
|
246
|
-
|
|
247
|
-
if is_in_hpc_upload_queue(csv_path):
|
|
248
|
-
logger.warning(f"Job already submitted for {csv_path}")
|
|
249
|
-
return
|
|
250
|
-
|
|
251
|
-
post_csv_response = requests.post(
|
|
252
|
-
url=f"{AIND_DATA_TRANSFER_SERVICE}/api/submit_hpc_jobs",
|
|
253
|
-
json=dict(
|
|
254
|
-
jobs=[
|
|
255
|
-
dict(
|
|
256
|
-
hpc_settings=json.dumps({"time_limit": 60 * 15, "mail_user": "arjun.sridhar@alleninstitute.org"}),
|
|
257
|
-
upload_job_settings=validate_csv_response.json()["data"]["jobs"][0],
|
|
258
|
-
script="",
|
|
259
|
-
)
|
|
260
|
-
]
|
|
261
|
-
),
|
|
262
|
-
)
|
|
263
|
-
_raise_for_status(post_csv_response)
|
|
264
|
-
|
|
265
|
-
def is_ephys_session(session: np_session.Session) -> bool:
|
|
266
|
-
return bool(next(session.npexp_path.rglob('settings.xml'), None))
|
|
267
|
-
|
|
268
|
-
def create_upload_job(upload: CodeOceanUpload) -> None:
|
|
269
|
-
logger.info(f'Creating upload job file {upload.job} for session {upload.session}...')
|
|
270
|
-
job: dict = get_upload_csv_for_session(upload)
|
|
271
|
-
with open(upload.job, 'w') as f:
|
|
272
|
-
w = csv.writer(f, lineterminator='')
|
|
273
|
-
w.writerow(job.keys())
|
|
274
|
-
w.writerow('\n')
|
|
275
|
-
|
|
276
|
-
w.writerow(job.values())
|
|
277
|
-
|
|
278
|
-
def create_codeocean_upload(session: str | int | np_session.Session,
|
|
279
|
-
recording_dirs: Iterable[str] | None = None,
|
|
280
|
-
force_cloud_sync: bool = False,
|
|
281
|
-
) -> CodeOceanUpload:
|
|
282
|
-
"""Create directories of symlinks to np-exp files with correct structure
|
|
283
|
-
for upload to CodeOcean.
|
|
284
|
-
|
|
285
|
-
- only one `recording` per `Record Node` folder (largest if multiple found)
|
|
286
|
-
- job file for feeding into `aind-data-transfer`
|
|
287
|
-
|
|
288
|
-
>>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
289
|
-
>>> upload.behavior is None
|
|
290
|
-
True
|
|
291
|
-
>>> upload.ephys.exists()
|
|
292
|
-
True
|
|
293
|
-
"""
|
|
294
|
-
|
|
295
|
-
if is_surface_channel_recording(str(session)):
|
|
296
|
-
session = np_session.Session(session)
|
|
297
|
-
if not is_surface_channel_recording(session.npexp_path.name):
|
|
298
|
-
# manually assign surface channel path
|
|
299
|
-
session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
|
|
300
|
-
assert session.npexp_path.exists(), f"Surface channel path {session.npexp_path} does not exist in same folder as main session recording"
|
|
301
|
-
root = np_session.NPEXP_PATH / 'codeocean' / f'{session.folder}_surface_channels'
|
|
302
|
-
behavior = None
|
|
303
|
-
behavior_videos = None
|
|
304
|
-
else:
|
|
305
|
-
session = np_session.Session(session)
|
|
306
|
-
root = np_session.NPEXP_PATH / 'codeocean' / session.folder
|
|
307
|
-
behavior = np_config.normalize_path(root / 'behavior')
|
|
308
|
-
behavior_videos = behavior.with_name('behavior-videos')
|
|
309
|
-
|
|
310
|
-
logger.debug(f'Created directory {root} for CodeOcean upload')
|
|
311
|
-
|
|
312
|
-
upload = CodeOceanUpload(
|
|
313
|
-
session = session,
|
|
314
|
-
behavior = behavior,
|
|
315
|
-
behavior_videos = behavior_videos,
|
|
316
|
-
ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
|
|
317
|
-
job = np_config.normalize_path(root / 'upload.csv'),
|
|
318
|
-
force_cloud_sync=force_cloud_sync,
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
|
|
322
|
-
create_behavior_symlinks(upload.session, upload.behavior)
|
|
323
|
-
create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
|
|
324
|
-
create_upload_job(upload)
|
|
325
|
-
return upload
|
|
326
|
-
|
|
327
|
-
def upload_session(session: str | int | pathlib.Path | np_session.Session,
|
|
328
|
-
recording_dirs: Iterable[str] | None = None,
|
|
329
|
-
force: bool = False,
|
|
330
|
-
) -> None:
|
|
331
|
-
upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs, force_cloud_sync=force)
|
|
332
|
-
np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
|
|
333
|
-
put_csv_for_hpc_upload(upload.job)
|
|
334
|
-
logger.debug(f'Submitted {upload.session} to hpc upload queue')
|
|
335
|
-
|
|
336
|
-
if (is_split_recording :=
|
|
337
|
-
recording_dirs is not None
|
|
338
|
-
and len(tuple(recording_dirs)) > 1
|
|
339
|
-
and isinstance(recording_dirs, str)
|
|
340
|
-
):
|
|
341
|
-
logger.warning(f"Split recording {upload.session} will need to be sorted manually with `CONCAT=True`")
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
def main() -> None:
|
|
345
|
-
upload_session(**vars(parse_args()))
|
|
346
|
-
|
|
347
|
-
def parse_args() -> argparse.Namespace:
|
|
348
|
-
parser = argparse.ArgumentParser(description="Upload a session to CodeOcean")
|
|
349
|
-
parser.add_argument('session', help="session ID (lims or np-exp foldername) or path to session folder")
|
|
350
|
-
parser.add_argument('--force', action='store_true', help="enable `force_cloud_sync` option, re-uploading and re-making raw asset even if data exists on S3")
|
|
351
|
-
parser.add_argument('recording_dirs', nargs='*', type=list, help="[optional] specific recording directories to upload - for use with split recordings only.")
|
|
352
|
-
return parser.parse_args()
|
|
353
|
-
|
|
354
|
-
if __name__ == '__main__':
|
|
355
|
-
import doctest
|
|
356
|
-
|
|
357
|
-
doctest.testmod(
|
|
358
|
-
optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
|
|
359
|
-
)
|
np_codeocean/upload_one.py
DELETED
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import csv
|
|
4
|
-
import datetime
|
|
5
|
-
import pathlib
|
|
6
|
-
import sys
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import ClassVar, NamedTuple
|
|
9
|
-
|
|
10
|
-
from aind_data_transfer.jobs.s3_upload_job import GenericS3UploadJobList
|
|
11
|
-
from aind_data_schema.data_description import ExperimentType
|
|
12
|
-
from aind_data_transfer.config_loader.base_config import BasicJobEndpoints
|
|
13
|
-
from aind_data_transfer.jobs.basic_job import BasicJob, BasicUploadJobConfigs
|
|
14
|
-
from aind_data_transfer.util.s3_utils import upload_to_s3, copy_to_s3
|
|
15
|
-
import np_config
|
|
16
|
-
import np_logging
|
|
17
|
-
import np_session
|
|
18
|
-
import np_tools
|
|
19
|
-
|
|
20
|
-
import np_codeocean.utils as utils
|
|
21
|
-
|
|
22
|
-
logger = np_logging.get_logger(__name__)
|
|
23
|
-
|
|
24
|
-
CONFIG = np_config.fetch('/projects/np_codeocean')
|
|
25
|
-
JOB_LIST_FILENAME_PREFIX = 'codeocean_upload'
|
|
26
|
-
|
|
27
|
-
class ConfigWithExtendedS3Prefix(BasicUploadJobConfigs):
|
|
28
|
-
|
|
29
|
-
_extension: ClassVar[str]
|
|
30
|
-
"""Some tag to add to the end of the bucket name, e.g.
|
|
31
|
-
`_curated_<datetime>`"""
|
|
32
|
-
|
|
33
|
-
def __init__(self, s3_bucket_name_extension: str, **kwargs) -> None:
|
|
34
|
-
super().__init__(**kwargs)
|
|
35
|
-
self.__class__._extension = s3_bucket_name_extension
|
|
36
|
-
|
|
37
|
-
@property
|
|
38
|
-
def s3_prefix(self) -> str:
|
|
39
|
-
extension = f'_{self._extension}' if self._extension[0] != '_' else self._extension
|
|
40
|
-
return super().s3_prefix + extension
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class EcephysJobListWithoutRawData(GenericS3UploadJobList):
|
|
44
|
-
|
|
45
|
-
def __init__(self, s3_bucket_name_extension: str, *args):
|
|
46
|
-
self.extension = s3_bucket_name_extension
|
|
47
|
-
super().__init__(*args)
|
|
48
|
-
|
|
49
|
-
def _create_job_config_list(self) -> list[BasicJob]:
|
|
50
|
-
"""Reads in the csv file and outputs a list of Job Configs."""
|
|
51
|
-
job_list = list()
|
|
52
|
-
param_store_name = None
|
|
53
|
-
job_endpoints = {}
|
|
54
|
-
with open(self.configs.jobs_csv_file, newline="") as csvfile:
|
|
55
|
-
reader = csv.DictReader(csvfile, skipinitialspace=True)
|
|
56
|
-
for row in reader:
|
|
57
|
-
cleaned_row = {
|
|
58
|
-
k.strip().replace("-", "_"): self._clean_csv_entry(
|
|
59
|
-
k.strip().replace("-", "_"), v
|
|
60
|
-
)
|
|
61
|
-
for k, v in row.items()
|
|
62
|
-
}
|
|
63
|
-
cleaned_row["acq_date"] = BasicUploadJobConfigs.parse_date(
|
|
64
|
-
cleaned_row["acq_date"]
|
|
65
|
-
)
|
|
66
|
-
cleaned_row["acq_time"] = BasicUploadJobConfigs.parse_time(
|
|
67
|
-
cleaned_row["acq_time"]
|
|
68
|
-
)
|
|
69
|
-
# Override with flags set in command line
|
|
70
|
-
if self.configs.dry_run is True:
|
|
71
|
-
cleaned_row["dry_run"] = True
|
|
72
|
-
if self.configs.compress_raw_data is True:
|
|
73
|
-
cleaned_row["compress_raw_data"] = True
|
|
74
|
-
# Avoid downloading endpoints from aws multiple times
|
|
75
|
-
if cleaned_row.get("aws_param_store_name") is not None:
|
|
76
|
-
# Check if param store is defined in previous row
|
|
77
|
-
if cleaned_row["aws_param_store_name"] == param_store_name:
|
|
78
|
-
cleaned_row.update(job_endpoints)
|
|
79
|
-
# Otherwise, download it from aws
|
|
80
|
-
else:
|
|
81
|
-
job_endpoints = BasicJobEndpoints(
|
|
82
|
-
aws_param_store_name=(
|
|
83
|
-
cleaned_row["aws_param_store_name"]
|
|
84
|
-
)
|
|
85
|
-
).dict()
|
|
86
|
-
cleaned_row.update(job_endpoints)
|
|
87
|
-
param_store_name = cleaned_row["aws_param_store_name"]
|
|
88
|
-
del cleaned_row["aws_param_store_name"]
|
|
89
|
-
|
|
90
|
-
#! the original method switches here to create an EcephysJob:
|
|
91
|
-
# that isn't appropriate for non-raw data, so we just create
|
|
92
|
-
# the default general-purpose BasicJob instead
|
|
93
|
-
configs_from_row = ConfigWithExtendedS3Prefix(self.extension, **cleaned_row)
|
|
94
|
-
new_job = BasicJob(job_configs=configs_from_row)
|
|
95
|
-
job_list.append(new_job)
|
|
96
|
-
return job_list
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
class OneOffCodeOceanUpload(NamedTuple):
|
|
100
|
-
"""Objects required for uploading data associated with a Mindscope Neuropixels to CodeOcean.
|
|
101
|
-
|
|
102
|
-
`source` can be a file or folder of data to upload.
|
|
103
|
-
"""
|
|
104
|
-
session: np_session.Session
|
|
105
|
-
"""Session object that the paths belong to."""
|
|
106
|
-
|
|
107
|
-
source: Path
|
|
108
|
-
"""Path to file or directory to upload."""
|
|
109
|
-
|
|
110
|
-
job: Path
|
|
111
|
-
"""File containing job parameters for `aind-data-transfer`"""
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def get_ephys_upload_csv_for_session(session: np_session.Session, source: Path) -> dict[str, str | int]:
|
|
115
|
-
return {
|
|
116
|
-
'data-source': np_config.normalize_path(source).as_posix(),
|
|
117
|
-
's3-bucket': CONFIG['s3-bucket'],
|
|
118
|
-
'subject-id': str(session.mouse),
|
|
119
|
-
'experiment-type': 'ecephys',
|
|
120
|
-
'modality': 'ECEPHYS',
|
|
121
|
-
'acq-date': f'{session.date:%Y-%m-%d}',
|
|
122
|
-
'acq-time': f'{session.start:%H-%M-%S}',
|
|
123
|
-
'aws-param-store-name': CONFIG['aws-param-store-name'],
|
|
124
|
-
} # type: ignore
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def create_upload_job(session: np_session.Session, job: Path, source: Path) -> None:
|
|
128
|
-
logger.info(f'Creating upload job file {job} for session {session}...')
|
|
129
|
-
_csv = get_ephys_upload_csv_for_session(session, source)
|
|
130
|
-
with open(job, 'w') as f:
|
|
131
|
-
w = csv.writer(f)
|
|
132
|
-
w.writerow(_csv.keys())
|
|
133
|
-
w.writerow(_csv.values())
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def create_codeocean_upload(source: str | Path) -> OneOffCodeOceanUpload:
|
|
137
|
-
"""Create upload object
|
|
138
|
-
- job file for feeding into `aind-data-transfer`
|
|
139
|
-
"""
|
|
140
|
-
|
|
141
|
-
session = np_session.Session(source)
|
|
142
|
-
|
|
143
|
-
upload = OneOffCodeOceanUpload(
|
|
144
|
-
session = session,
|
|
145
|
-
source = np_config.normalize_path(source),
|
|
146
|
-
job = np_config.normalize_path(source) / get_new_job_list_filename(),
|
|
147
|
-
)
|
|
148
|
-
create_upload_job(upload.session, upload.job, upload.source)
|
|
149
|
-
return upload
|
|
150
|
-
|
|
151
|
-
def get_new_job_list_filename() -> str:
|
|
152
|
-
return f'{JOB_LIST_FILENAME_PREFIX}_{datetime.datetime.now():%Y-%m-%d_%H-%M-%S}.csv'
|
|
153
|
-
|
|
154
|
-
def get_timestamp_from_job_list_filename(filename: str) -> str:
|
|
155
|
-
return filename.split(f'{JOB_LIST_FILENAME_PREFIX}_')[-1]
|
|
156
|
-
|
|
157
|
-
def get_s3_prefix(upload: OneOffCodeOceanUpload) -> str:
|
|
158
|
-
d = get_ephys_upload_csv_for_session(upload.session, upload.source)
|
|
159
|
-
return f"{d['experiment-type']}_{d['subject-id']}_{d['acq-date']}_{d['acq-time']}"
|
|
160
|
-
|
|
161
|
-
def upload(source: str | pathlib.Path, tag: str) -> None:
|
|
162
|
-
utils.ensure_credentials()
|
|
163
|
-
upload = create_codeocean_upload(source)
|
|
164
|
-
tag = f"{tag}{'_' if tag[-1] != '_' else ''}{get_timestamp_from_job_list_filename(upload.job.stem)}"
|
|
165
|
-
np_logging.web('np_codeocean').info(f'Uploading {upload.source}')
|
|
166
|
-
# EcephysJobListWithoutRawData(tag, ["--jobs-csv-file", upload.job.as_posix()]).run_job()
|
|
167
|
-
if upload.source.is_dir():
|
|
168
|
-
fn = upload_to_s3
|
|
169
|
-
else:
|
|
170
|
-
fn = copy_to_s3
|
|
171
|
-
fn(
|
|
172
|
-
source,
|
|
173
|
-
s3_bucket=get_ephys_upload_csv_for_session(upload.session, upload.source)['s3-bucket'],
|
|
174
|
-
s3_prefix=f"{get_s3_prefix(upload)}_{tag}",
|
|
175
|
-
dryrun=False,
|
|
176
|
-
)
|
|
177
|
-
np_logging.web('np_codeocean').info(f'Finished uploading {upload.source}')
|
|
178
|
-
|
|
179
|
-
def main() -> None:
|
|
180
|
-
upload(*sys.argv[1:])
|
|
181
|
-
|
|
182
|
-
if __name__ == '__main__':
|
|
183
|
-
main()
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
np_codeocean-0.2.0.dist-info/METADATA,sha256=UlrIexZ_aS71qK8-QiUkiGoe3h75zmJBqnplTAVaDbk,2488
|
|
2
|
-
np_codeocean-0.2.0.dist-info/WHEEL,sha256=N2J68yzZqJh3mI_Wg92rwhw0rtJDFpZj9bwQIMJgaVg,90
|
|
3
|
-
np_codeocean-0.2.0.dist-info/entry_points.txt,sha256=T3Is83nShuWFYg7bTLxVhRWi15OVxO99WYcUg3-xURM,113
|
|
4
|
-
np_codeocean/__init__.py,sha256=BYXXoFDa1J_Lv-YG52Ch6k5L4DMCEPXtfHsrMmMeST4,66
|
|
5
|
-
np_codeocean/scripts/upload_sessions.py,sha256=1_aqoBxAkB_VpRKYqyPsEQBDGvgyAHXAkIJA0ZT2Vb0,1490
|
|
6
|
-
np_codeocean/upload.py,sha256=suP41bMthg0bgkm6FFktv5v95P9bwQyDdXdSKrjQ-YU,15972
|
|
7
|
-
np_codeocean/upload_one.py,sha256=-egSjXvA0bBfshbY3D2TZ0M0GfLokFBZ3mSCm_gOGXE,7367
|
|
8
|
-
np_codeocean/utils.py,sha256=p0pmljaH4j7RjRsc4TPYXPpLhq-2ScvnfyXOYFSFBTM,3375
|
|
9
|
-
np_codeocean-0.2.0.dist-info/RECORD,,
|
|
File without changes
|