np_codeocean 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- np_codeocean/__init__.py +1 -1
- np_codeocean/np_session_utils.py +367 -0
- np_codeocean/scripts/fix_ephys_data_on_s3.py +20 -0
- np_codeocean/scripts/upload_dynamic_routing_behavior.py +416 -0
- np_codeocean/scripts/upload_dynamic_routing_ecephys.py +215 -0
- np_codeocean/scripts/upload_ethan_analysis_files.py +22 -0
- np_codeocean/utils.py +452 -94
- {np_codeocean-0.2.0.dist-info → np_codeocean-0.3.0.dist-info}/METADATA +16 -7
- np_codeocean-0.3.0.dist-info/RECORD +12 -0
- {np_codeocean-0.2.0.dist-info → np_codeocean-0.3.0.dist-info}/WHEEL +1 -1
- np_codeocean-0.3.0.dist-info/entry_points.txt +7 -0
- np_codeocean/upload.py +0 -359
- np_codeocean/upload_one.py +0 -183
- np_codeocean-0.2.0.dist-info/RECORD +0 -9
- np_codeocean-0.2.0.dist-info/entry_points.txt +0 -4
- /np_codeocean/scripts/{upload_sessions.py → upload_split_recordings_example.py} +0 -0
np_codeocean/utils.py
CHANGED
|
@@ -1,94 +1,452 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
import
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import csv
|
|
5
|
+
import datetime
|
|
6
|
+
import functools
|
|
7
|
+
import itertools
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import pathlib
|
|
12
|
+
from typing import Any, Generator, Iterable, Literal
|
|
13
|
+
import typing_extensions
|
|
14
|
+
|
|
15
|
+
import aind_data_transfer_models.core
|
|
16
|
+
import aind_slurm_rest.models
|
|
17
|
+
import np_config
|
|
18
|
+
import np_tools
|
|
19
|
+
import npc_ephys
|
|
20
|
+
import npc_sync
|
|
21
|
+
import npc_session
|
|
22
|
+
import numpy as np
|
|
23
|
+
import polars as pl
|
|
24
|
+
import requests
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
AINDPlatform = Literal['ecephys', 'behavior']
|
|
29
|
+
|
|
30
|
+
AIND_DATA_TRANSFER_SERVICE = "http://aind-data-transfer-service"
|
|
31
|
+
DEV_SERVICE = "http://aind-data-transfer-service-dev"
|
|
32
|
+
HPC_UPLOAD_JOB_EMAIL = "ben.hardcastle@alleninstitute.org"
|
|
33
|
+
ACQ_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
34
|
+
|
|
35
|
+
AIND_METADATA_NAMES: tuple[str, ...] = ('session', 'data_description', 'procedures', 'processing', 'rig', 'subject')
|
|
36
|
+
|
|
37
|
+
DEFAULT_EPHYS_SLURM_SETTINGS = aind_slurm_rest.models.V0036JobProperties(
|
|
38
|
+
environment=dict(), # JonY: set this to an empty dictionary
|
|
39
|
+
time_limit = 15 * 60,
|
|
40
|
+
minimum_cpus_per_node=12, # 6 probes * (lfp + ap)
|
|
41
|
+
)
|
|
42
|
+
"""Increased timelimit and cpus for running ephys compression on the hpc"""
|
|
43
|
+
|
|
44
|
+
class SyncFileNotFoundError(FileNotFoundError):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@functools.cache
|
|
48
|
+
def get_project_config() -> dict[str, Any]:
|
|
49
|
+
"""Config for this project"""
|
|
50
|
+
return np_config.fetch('/projects/np_codeocean')
|
|
51
|
+
|
|
52
|
+
def set_npc_lims_credentials() -> None:
|
|
53
|
+
creds = np_config.fetch('/projects/np_codeocean/npc_lims')
|
|
54
|
+
for k, v in creds.items():
|
|
55
|
+
os.environ.setdefault(k, v)
|
|
56
|
+
|
|
57
|
+
def get_home() -> pathlib.Path:
|
|
58
|
+
if os.name == 'nt':
|
|
59
|
+
return pathlib.Path(os.environ['USERPROFILE'])
|
|
60
|
+
return pathlib.Path(os.environ['HOME'])
|
|
61
|
+
|
|
62
|
+
def is_behavior_video_file(path: pathlib.Path) -> bool:
|
|
63
|
+
if path.is_dir() or path.suffix not in ('.mp4', '.avi', '.json'):
|
|
64
|
+
return False
|
|
65
|
+
with contextlib.suppress(ValueError):
|
|
66
|
+
_ = npc_session.extract_mvr_camera_name(path.as_posix())
|
|
67
|
+
return True
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
def is_surface_channel_recording(path_name: str) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
>>> import np_session
|
|
73
|
+
>>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
|
|
74
|
+
>>> is_surface_channel_recording(session.npexp_path.as_posix())
|
|
75
|
+
True
|
|
76
|
+
"""
|
|
77
|
+
return 'surface_channels' in path_name.lower()
|
|
78
|
+
|
|
79
|
+
def cleanup_ephys_symlinks(toplevel_dir: pathlib.Path) -> None:
|
|
80
|
+
"""After creating symlinks to the ephys data, run this to make any necessary
|
|
81
|
+
modifications prior to upload.
|
|
82
|
+
|
|
83
|
+
Provided dir path should be a directory containing all ephys data in
|
|
84
|
+
subfolders (e.g. directory containing "Record Node 10x" folders)
|
|
85
|
+
|
|
86
|
+
Only deletes symlinks or writes new files in place of symlinks - does not
|
|
87
|
+
modify original data.
|
|
88
|
+
|
|
89
|
+
Rules:
|
|
90
|
+
- if any continuous.dat files are unreadable: remove them and their containing folders
|
|
91
|
+
- if any probes were recorded on multiple record nodes: just keep the first
|
|
92
|
+
- if continuous.dat files are missing (ie. excluded because probes weren't
|
|
93
|
+
inserted, or we removed symlinks in previous steps): update metadata files
|
|
94
|
+
"""
|
|
95
|
+
remove_unreadable_ephys_data(toplevel_dir)
|
|
96
|
+
remove_duplicate_ephys_data(toplevel_dir)
|
|
97
|
+
cleanup_ephys_metadata(toplevel_dir)
|
|
98
|
+
|
|
99
|
+
def remove_unreadable_ephys_data(toplevel_dir: pathlib.Path) -> None:
|
|
100
|
+
|
|
101
|
+
for continuous_dir in ephys_continuous_dir_generator(toplevel_dir):
|
|
102
|
+
events_dir = continuous_dir.parent.parent / 'events' / continuous_dir.name / 'TTL'
|
|
103
|
+
filenames = ('continuous.dat', 'timestamps.npy', 'sample_numbers.npy')
|
|
104
|
+
dirs = (continuous_dir, ) + ((events_dir,) if events_dir.exists() else ())
|
|
105
|
+
mark_for_removal = False
|
|
106
|
+
for d in dirs:
|
|
107
|
+
if not d.exists():
|
|
108
|
+
continue
|
|
109
|
+
for filename in filenames:
|
|
110
|
+
if filename == 'continuous.dat' and d.name == 'TTL':
|
|
111
|
+
continue # no continuous.dat expected in TTL events
|
|
112
|
+
file = d / filename
|
|
113
|
+
if not (file.is_symlink() or file.exists()):
|
|
114
|
+
logger.warning(f'Critical file not found {file}, insufficient data for processing')
|
|
115
|
+
mark_for_removal = True
|
|
116
|
+
break
|
|
117
|
+
try:
|
|
118
|
+
data = np.memmap(decode_symlink_path(file), dtype="int16" if 'timestamps' not in file.name else "float64", mode="r")
|
|
119
|
+
except Exception as exc:
|
|
120
|
+
logger.warning(f'Failed to read {file}: {exc!r}')
|
|
121
|
+
mark_for_removal = True
|
|
122
|
+
break
|
|
123
|
+
if data.size == 0:
|
|
124
|
+
logger.warning(f'Empty file {file}')
|
|
125
|
+
mark_for_removal = True
|
|
126
|
+
break
|
|
127
|
+
logger.debug(f'Found readable, non-empty data in {file}')
|
|
128
|
+
if mark_for_removal:
|
|
129
|
+
break
|
|
130
|
+
if mark_for_removal:
|
|
131
|
+
logger.warning(f'Removing {continuous_dir} and its contents')
|
|
132
|
+
remove_folder_of_symlinks(continuous_dir)
|
|
133
|
+
logger.warning(f'Removing {events_dir.parent} and its contents')
|
|
134
|
+
remove_folder_of_symlinks(events_dir.parent)
|
|
135
|
+
|
|
136
|
+
def remove_duplicate_ephys_data(toplevel_dir: pathlib.Path) -> None:
|
|
137
|
+
previous_recording_name = ''
|
|
138
|
+
for continuous_dir in ephys_continuous_dir_generator(toplevel_dir):
|
|
139
|
+
recording_name = continuous_dir.parent.parent.name
|
|
140
|
+
if recording_name != previous_recording_name:
|
|
141
|
+
# reset probes list for each new recording
|
|
142
|
+
probes = []
|
|
143
|
+
try:
|
|
144
|
+
probe = npc_session.ProbeRecord(continuous_dir.name)
|
|
145
|
+
except ValueError:
|
|
146
|
+
continue
|
|
147
|
+
suffix = continuous_dir.name.split('-')[-1]
|
|
148
|
+
assert suffix in ('AP', 'LFP')
|
|
149
|
+
recording_name = f"{probe}-{suffix}"
|
|
150
|
+
if recording_name in probes:
|
|
151
|
+
logger.info(f'Duplicate {recording_name = } found in {continuous_dir.parent.parent} - removing')
|
|
152
|
+
remove_folder_of_symlinks(continuous_dir)
|
|
153
|
+
else:
|
|
154
|
+
probes.append(recording_name)
|
|
155
|
+
|
|
156
|
+
def remove_folder_of_symlinks(folder: pathlib.Path) -> None:
|
|
157
|
+
"""Recursive deletion of all files in dir tree, with a check that each is a
|
|
158
|
+
symlink."""
|
|
159
|
+
for path in folder.rglob('*'):
|
|
160
|
+
if path.is_dir():
|
|
161
|
+
remove_folder_of_symlinks(path)
|
|
162
|
+
else:
|
|
163
|
+
assert path.is_symlink(), f'Expected {path} to be a symlink'
|
|
164
|
+
path.unlink(missing_ok=True)
|
|
165
|
+
with contextlib.suppress(FileNotFoundError):
|
|
166
|
+
folder.rmdir()
|
|
167
|
+
|
|
168
|
+
def ephys_recording_dir_generator(toplevel_dir: pathlib.Path) -> Generator[pathlib.Path, None, None]:
|
|
169
|
+
for recording_dir in toplevel_dir.rglob('recording[0-9]*'):
|
|
170
|
+
if recording_dir.is_dir():
|
|
171
|
+
yield recording_dir
|
|
172
|
+
|
|
173
|
+
def ephys_continuous_dir_generator(toplevel_dir: pathlib.Path) -> Generator[pathlib.Path, None, None]:
|
|
174
|
+
for recording_dir in ephys_recording_dir_generator(toplevel_dir):
|
|
175
|
+
parent = recording_dir / 'continuous'
|
|
176
|
+
if not parent.exists():
|
|
177
|
+
continue
|
|
178
|
+
for continuous_dir in parent.iterdir():
|
|
179
|
+
if continuous_dir.is_dir():
|
|
180
|
+
yield continuous_dir
|
|
181
|
+
|
|
182
|
+
def ephys_structure_oebin_generator(toplevel_dir: pathlib.Path) -> Generator[pathlib.Path, None, None]:
|
|
183
|
+
for recording_dir in ephys_recording_dir_generator(toplevel_dir):
|
|
184
|
+
oebin_path = recording_dir / 'structure.oebin'
|
|
185
|
+
if not (oebin_path.is_symlink() or oebin_path.exists()):
|
|
186
|
+
# symlinks that are created for the hpc use posix paths, and aren't
|
|
187
|
+
# readable on windows, so .exists() returns False: use .is_symlink() instead
|
|
188
|
+
logger.warning(f'No structure.oebin found in {recording_dir}')
|
|
189
|
+
continue
|
|
190
|
+
yield oebin_path
|
|
191
|
+
|
|
192
|
+
def cleanup_ephys_metadata(toplevel_dir: pathlib.Path) -> None:
|
|
193
|
+
logger.debug('Checking structure.oebin for missing folders...')
|
|
194
|
+
for oebin_path in ephys_structure_oebin_generator(toplevel_dir):
|
|
195
|
+
oebin_obj = np_tools.read_oebin(decode_symlink_path(oebin_path))
|
|
196
|
+
logger.debug(f'Checking {oebin_path} against actual folders...')
|
|
197
|
+
any_removed = False
|
|
198
|
+
for subdir_name in ('events', 'continuous'):
|
|
199
|
+
subdir = oebin_path.parent / subdir_name
|
|
200
|
+
# iterate over copy of list so as to not disrupt iteration when elements are removed
|
|
201
|
+
for device in [device for device in oebin_obj[subdir_name]]:
|
|
202
|
+
if not (subdir / device['folder_name']).exists():
|
|
203
|
+
logger.info(f'{device["folder_name"]} not found in {subdir}, removing from structure.oebin')
|
|
204
|
+
oebin_obj[subdir_name].remove(device)
|
|
205
|
+
any_removed = True
|
|
206
|
+
if any_removed:
|
|
207
|
+
oebin_path.unlink()
|
|
208
|
+
oebin_path.write_text(json.dumps(oebin_obj, indent=4))
|
|
209
|
+
logger.debug('Overwrote symlink to structure.oebin with corrected structure.oebin')
|
|
210
|
+
|
|
211
|
+
def write_corrected_ephys_timestamps(
|
|
212
|
+
ephys_dir: pathlib.Path,
|
|
213
|
+
behavior_dir: pathlib.Path,
|
|
214
|
+
) -> None:
|
|
215
|
+
for path in itertools.chain(behavior_dir.glob('*.h5'), behavior_dir.glob('*.sync')):
|
|
216
|
+
with contextlib.suppress(Exception):
|
|
217
|
+
sync_dataset = npc_sync.SyncDataset(path)
|
|
218
|
+
_ = sync_dataset.line_labels
|
|
219
|
+
logger.info(f'Found valid sync file {path.as_posix()}')
|
|
220
|
+
break
|
|
221
|
+
else:
|
|
222
|
+
raise SyncFileNotFoundError(f'No valid sync file found in {behavior_dir.as_posix()}')
|
|
223
|
+
|
|
224
|
+
timing_on_pxi = (
|
|
225
|
+
timing
|
|
226
|
+
for timing in npc_ephys.get_ephys_timing_on_pxi(
|
|
227
|
+
ephys_dir.glob("**/experiment*/recording*"),
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
timing_on_sync = (
|
|
231
|
+
npc_ephys.get_ephys_timing_on_sync(
|
|
232
|
+
sync=sync_dataset,
|
|
233
|
+
devices=timing_on_pxi,
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
npc_ephys.overwrite_timestamps(timing_on_sync)
|
|
237
|
+
logger.info(f'Corrected timestamps in {ephys_dir}')
|
|
238
|
+
|
|
239
|
+
def decode_symlink_path(oebin_path: pathlib.Path) -> pathlib.Path:
|
|
240
|
+
if not oebin_path.is_symlink():
|
|
241
|
+
return oebin_path
|
|
242
|
+
return np_config.normalize_path(oebin_path.readlink())
|
|
243
|
+
|
|
244
|
+
def is_csv_in_hpc_upload_queue(csv_path: pathlib.Path, upload_service_url: str = AIND_DATA_TRANSFER_SERVICE) -> bool:
|
|
245
|
+
"""Check if an upload job has been submitted to the hpc upload queue.
|
|
246
|
+
|
|
247
|
+
- currently assumes one job per csv
|
|
248
|
+
- does not check status (job may be FINISHED rather than RUNNING)
|
|
249
|
+
|
|
250
|
+
>>> is_csv_in_hpc_upload_queue("//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_664851_20231114/upload.csv")
|
|
251
|
+
False
|
|
252
|
+
"""
|
|
253
|
+
# get subject-id, acq-datetime from csv
|
|
254
|
+
df = pl.read_csv(csv_path, eol_char='\r')
|
|
255
|
+
for col in df.get_columns():
|
|
256
|
+
if col.name.startswith('subject') and col.name.endswith('id'):
|
|
257
|
+
subject = npc_session.SubjectRecord(col[0])
|
|
258
|
+
continue
|
|
259
|
+
if col.name.startswith('acq') and 'datetime' in col.name.lower():
|
|
260
|
+
dt = npc_session.DatetimeRecord(col[0])
|
|
261
|
+
continue
|
|
262
|
+
if col.name == 'platform':
|
|
263
|
+
platform = col[0]
|
|
264
|
+
continue
|
|
265
|
+
return is_session_in_hpc_queue(subject=subject, acq_datetime=dt.dt, platform=platform, upload_service_url=upload_service_url)
|
|
266
|
+
|
|
267
|
+
def is_session_in_hpc_queue(subject: int | str, acq_datetime: str | datetime.datetime, platform: str | None = None, upload_service_url: str = AIND_DATA_TRANSFER_SERVICE) -> bool:
|
|
268
|
+
"""
|
|
269
|
+
>>> is_session_in_hpc_queue(366122, datetime.datetime(2023, 11, 14, 0, 0, 0))
|
|
270
|
+
False
|
|
271
|
+
>>> is_session_in_hpc_queue(702136, datetime.datetime(2024, 3, 4, 13, 21, 35))
|
|
272
|
+
True
|
|
273
|
+
"""
|
|
274
|
+
if not isinstance(acq_datetime, datetime.datetime):
|
|
275
|
+
acq_datetime = datetime.datetime.strptime(acq_datetime, ACQ_DATETIME_FORMAT)
|
|
276
|
+
partial_session_id = f"{subject}_{acq_datetime.strftime(ACQ_DATETIME_FORMAT).replace(' ', '_').replace(':', '-')}"
|
|
277
|
+
if platform:
|
|
278
|
+
partial_session_id = f"{platform}_{partial_session_id}"
|
|
279
|
+
|
|
280
|
+
jobs_response = requests.get(f"{upload_service_url}/jobs")
|
|
281
|
+
jobs_response.raise_for_status()
|
|
282
|
+
return partial_session_id in jobs_response.content.decode()
|
|
283
|
+
|
|
284
|
+
def is_job_in_hpc_upload_queue(job: aind_data_transfer_models.core.BasicUploadJobConfigs, upload_service_url: str = AIND_DATA_TRANSFER_SERVICE) -> bool:
|
|
285
|
+
return is_session_in_hpc_queue(job.subject_id, job.acq_datetime, job.platform, upload_service_url)
|
|
286
|
+
|
|
287
|
+
def write_upload_csv(
|
|
288
|
+
content: dict[str, Any],
|
|
289
|
+
output_path: pathlib.Path,
|
|
290
|
+
) -> pathlib.Path:
|
|
291
|
+
logger.info(f'Creating upload job file {output_path}')
|
|
292
|
+
with open(output_path, 'w') as f:
|
|
293
|
+
w = csv.writer(f, lineterminator='')
|
|
294
|
+
w.writerow(content.keys())
|
|
295
|
+
w.writerow('\n')
|
|
296
|
+
w.writerow(content.values())
|
|
297
|
+
return output_path
|
|
298
|
+
|
|
299
|
+
def get_job_models_from_csv(
|
|
300
|
+
path: pathlib.Path,
|
|
301
|
+
ephys_slurm_settings: aind_slurm_rest.models.V0036JobProperties = DEFAULT_EPHYS_SLURM_SETTINGS,
|
|
302
|
+
user_email: str = HPC_UPLOAD_JOB_EMAIL,
|
|
303
|
+
**extra_BasicUploadJobConfigs_params: Any,
|
|
304
|
+
) -> tuple[aind_data_transfer_models.core.BasicUploadJobConfigs, ...]:
|
|
305
|
+
jobs = pl.read_csv(path, eol_char='\r').with_columns(
|
|
306
|
+
pl.col('subject-id').cast(str),
|
|
307
|
+
).to_dicts()
|
|
308
|
+
jobs = jobs
|
|
309
|
+
models = []
|
|
310
|
+
for job in jobs.copy():
|
|
311
|
+
modalities = []
|
|
312
|
+
for modality_column in (k for k in job.keys() if k.startswith('modality') and ".source" not in k):
|
|
313
|
+
modality_name = job[modality_column]
|
|
314
|
+
modalities.append(
|
|
315
|
+
aind_data_transfer_models.core.ModalityConfigs(
|
|
316
|
+
modality=modality_name,
|
|
317
|
+
source=job[f"{modality_column}.source"],
|
|
318
|
+
slurm_settings = ephys_slurm_settings if modality_name == 'ecephys' else None,
|
|
319
|
+
),
|
|
320
|
+
)
|
|
321
|
+
for k in (k for k in job.copy().keys() if k.startswith('modality')):
|
|
322
|
+
del job[k]
|
|
323
|
+
for k, v in job.items():
|
|
324
|
+
if isinstance(v, str) and '\n' in v:
|
|
325
|
+
job[k] = v.replace('\n', '')
|
|
326
|
+
models.append(
|
|
327
|
+
aind_data_transfer_models.core.BasicUploadJobConfigs(
|
|
328
|
+
**{k.replace('-', '_'): v for k,v in job.items()},
|
|
329
|
+
modalities=modalities,
|
|
330
|
+
user_email=user_email,
|
|
331
|
+
**extra_BasicUploadJobConfigs_params,
|
|
332
|
+
)
|
|
333
|
+
)
|
|
334
|
+
return tuple(models)
|
|
335
|
+
|
|
336
|
+
def put_jobs_for_hpc_upload(
|
|
337
|
+
upload_jobs: aind_data_transfer_models.core.BasicUploadJobConfigs | Iterable[aind_data_transfer_models.core.BasicUploadJobConfigs],
|
|
338
|
+
upload_service_url: str = AIND_DATA_TRANSFER_SERVICE,
|
|
339
|
+
user_email: str = HPC_UPLOAD_JOB_EMAIL,
|
|
340
|
+
email_notification_types: Iterable[str | aind_data_transfer_models.core.EmailNotificationType] = ('fail',),
|
|
341
|
+
dry_run: bool = False,
|
|
342
|
+
save_path: pathlib.Path | None = None,
|
|
343
|
+
**extra_model_kwargs: Any,
|
|
344
|
+
) -> None:
|
|
345
|
+
"""Submit one or more jobs to the aind-data-transfer-service, for
|
|
346
|
+
upload to S3 on the hpc.
|
|
347
|
+
|
|
348
|
+
- accepts one or more aind_data_schema BasicUploadJobConfigs models
|
|
349
|
+
- assembles a SubmitJobRequest model
|
|
350
|
+
- excludes jobs for sessions that are already in the upload queue
|
|
351
|
+
- accepts additional parameters for SubmitHpcJobRequest as kwargs
|
|
352
|
+
- submits json via http request
|
|
353
|
+
- optionally saves the json file as a record
|
|
354
|
+
"""
|
|
355
|
+
if not isinstance(upload_jobs, Iterable):
|
|
356
|
+
upload_jobs = (upload_jobs, )
|
|
357
|
+
submit_request = aind_data_transfer_models.core.SubmitJobRequest(
|
|
358
|
+
upload_jobs=[job for job in upload_jobs if not is_job_in_hpc_upload_queue(job)],
|
|
359
|
+
user_email=user_email,
|
|
360
|
+
email_notification_types=email_notification_types,
|
|
361
|
+
**extra_model_kwargs,
|
|
362
|
+
)
|
|
363
|
+
post_request_content = json.loads(
|
|
364
|
+
submit_request.model_dump_json(round_trip=True, exclude_none=True)
|
|
365
|
+
) #! round_trip required for s3 bucket suffix to work correctly
|
|
366
|
+
if save_path:
|
|
367
|
+
save_path.write_text(submit_request.model_dump_json(round_trip=True, indent=4), errors='ignore')
|
|
368
|
+
if dry_run:
|
|
369
|
+
logger.warning(f'Dry run: not submitting {len(upload_jobs)} upload job(s) to {upload_service_url}')
|
|
370
|
+
return
|
|
371
|
+
post_json_response: requests.Response = requests.post(
|
|
372
|
+
url=f"{upload_service_url}/api/v1/submit_jobs",
|
|
373
|
+
json=post_request_content,
|
|
374
|
+
)
|
|
375
|
+
logger.info(f"Submitted {len(upload_jobs)} upload job(s) to {upload_service_url}")
|
|
376
|
+
post_json_response.raise_for_status()
|
|
377
|
+
|
|
378
|
+
@typing_extensions.deprecated("Uses old, pre-v1 endpoints: use put_jobs_for_hpc_upload in combination with get_job_models_from_csv")
|
|
379
|
+
def put_csv_for_hpc_upload(
|
|
380
|
+
csv_path: pathlib.Path,
|
|
381
|
+
upload_service_url: str = AIND_DATA_TRANSFER_SERVICE,
|
|
382
|
+
hpc_upload_job_email: str = HPC_UPLOAD_JOB_EMAIL,
|
|
383
|
+
dry_run: bool = False,
|
|
384
|
+
) -> None:
|
|
385
|
+
"""Submit a single job upload csv to the aind-data-transfer-service, for
|
|
386
|
+
upload to S3 on the hpc.
|
|
387
|
+
|
|
388
|
+
- gets validated version of csv
|
|
389
|
+
- checks session is not already being uploaded
|
|
390
|
+
- submits csv via http request
|
|
391
|
+
"""
|
|
392
|
+
def _raise_for_status(response: requests.Response) -> None:
|
|
393
|
+
"""pydantic validation errors are returned as strings that can be eval'd
|
|
394
|
+
to get the real error class + message."""
|
|
395
|
+
if response.status_code != 200:
|
|
396
|
+
try:
|
|
397
|
+
response.json()['data']['errors']
|
|
398
|
+
except (KeyError, IndexError, requests.exceptions.JSONDecodeError, SyntaxError) as exc1:
|
|
399
|
+
try:
|
|
400
|
+
response.raise_for_status()
|
|
401
|
+
except requests.exceptions.HTTPError as exc2:
|
|
402
|
+
raise exc2 from exc1
|
|
403
|
+
|
|
404
|
+
with open(csv_path, 'rb') as f:
|
|
405
|
+
validate_csv_response = requests.post(
|
|
406
|
+
url=f"{upload_service_url}/api/validate_csv",
|
|
407
|
+
files=dict(file=f),
|
|
408
|
+
)
|
|
409
|
+
_raise_for_status(validate_csv_response)
|
|
410
|
+
logger.debug(f"Validated response: {validate_csv_response.json()}")
|
|
411
|
+
if is_csv_in_hpc_upload_queue(csv_path, upload_service_url):
|
|
412
|
+
logger.warning(f"Job already submitted for {csv_path}")
|
|
413
|
+
return
|
|
414
|
+
if dry_run:
|
|
415
|
+
logger.info(f'Dry run: not submitting {csv_path} to hpc upload queue at {upload_service_url}.')
|
|
416
|
+
return
|
|
417
|
+
post_csv_response = requests.post(
|
|
418
|
+
url=f"{upload_service_url}/api/submit_hpc_jobs",
|
|
419
|
+
json=dict(
|
|
420
|
+
jobs=[
|
|
421
|
+
dict(
|
|
422
|
+
hpc_settings=json.dumps({"time_limit": 60 * 15, "mail_user": hpc_upload_job_email}),
|
|
423
|
+
upload_job_settings=validate_csv_response.json()["data"]["jobs"][0],
|
|
424
|
+
script="",
|
|
425
|
+
)
|
|
426
|
+
]
|
|
427
|
+
),
|
|
428
|
+
)
|
|
429
|
+
logger.info(f"Submitted {csv_path} to hpc upload queue at {upload_service_url}")
|
|
430
|
+
_raise_for_status(post_csv_response)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def ensure_posix(path: str | pathlib.Path) -> str:
|
|
434
|
+
posix = pathlib.Path(path).as_posix()
|
|
435
|
+
if posix.startswith('//'):
|
|
436
|
+
posix = posix[1:]
|
|
437
|
+
return posix
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def convert_symlinks_to_posix(toplevel_dir: str | pathlib.Path) -> None:
|
|
441
|
+
"""Convert all symlinks in `root_dir` (recursively) to POSIX paths. This is a
|
|
442
|
+
necessary last step before submitting uploads to run on the HPC.
|
|
443
|
+
"""
|
|
444
|
+
for path in pathlib.Path(toplevel_dir).rglob('*'):
|
|
445
|
+
if path.is_symlink():
|
|
446
|
+
posix_target = path.readlink().as_posix().removeprefix('//?/UNC')
|
|
447
|
+
path.unlink()
|
|
448
|
+
np_tools.symlink(src=ensure_posix(posix_target), dest=path)
|
|
449
|
+
|
|
450
|
+
if __name__ == '__main__':
|
|
451
|
+
import doctest
|
|
452
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: np_codeocean
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean
|
|
5
|
-
Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>
|
|
5
|
+
Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>, Chris Mochizuki <chrism@alleninstitute.org>, Arjun Sridhar <arjun.sridhar@alleninstitute.org>
|
|
6
6
|
License: MIT
|
|
7
7
|
Classifier: Programming Language :: Python :: 3
|
|
8
8
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -13,16 +13,25 @@ Classifier: Operating System :: Microsoft :: Windows
|
|
|
13
13
|
Classifier: Operating System :: POSIX :: Linux
|
|
14
14
|
Project-URL: Source, https://github.com/AllenInstitute/np_codeocean
|
|
15
15
|
Project-URL: Issues, https://github.com/AllenInstitute/np_codeocean/issues
|
|
16
|
-
Requires-Python: >=3.
|
|
17
|
-
Requires-Dist: np_session>=0.6.
|
|
18
|
-
Requires-Dist: np-tools>=0.1.
|
|
19
|
-
Requires-Dist: np-config>=0.4.
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: np_session>=0.6.44
|
|
18
|
+
Requires-Dist: np-tools>=0.1.23
|
|
19
|
+
Requires-Dist: np-config>=0.4.33
|
|
20
20
|
Requires-Dist: requests>=2.31.0
|
|
21
21
|
Requires-Dist: npc-session>=0.1.34
|
|
22
22
|
Requires-Dist: polars>=0.20.16
|
|
23
|
+
Requires-Dist: npc-lims>=0.1.168
|
|
24
|
+
Requires-Dist: npc-ephys>=0.1.28
|
|
25
|
+
Provides-Extra: dev
|
|
23
26
|
Requires-Dist: bump>=1.3.2; extra == "dev"
|
|
24
27
|
Requires-Dist: pdm>=2.4.9; extra == "dev"
|
|
25
|
-
Provides-Extra:
|
|
28
|
+
Provides-Extra: dynamic-routing-metadata
|
|
29
|
+
Requires-Dist: np-aind-metadata>=0.1.17; extra == "dynamic-routing-metadata"
|
|
30
|
+
Requires-Dist: npc-lims>=0.1.154; extra == "dynamic-routing-metadata"
|
|
31
|
+
Requires-Dist: npc-sessions>=0.0.253; extra == "dynamic-routing-metadata"
|
|
32
|
+
Requires-Dist: aind-data-transfer-models>=0.13.1; extra == "dynamic-routing-metadata"
|
|
33
|
+
Requires-Dist: aind-codeocean-pipeline-monitor[full]>=0.5.0; extra == "dynamic-routing-metadata"
|
|
34
|
+
Requires-Dist: aind-metadata-mapper==0.18.2; extra == "dynamic-routing-metadata"
|
|
26
35
|
Description-Content-Type: text/markdown
|
|
27
36
|
|
|
28
37
|
# np_codeocean
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
np_codeocean-0.3.0.dist-info/METADATA,sha256=LdDu8xF8-r2bQMp5xf5WbFDa78CRYWB11wByNNPkwfQ,3177
|
|
2
|
+
np_codeocean-0.3.0.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
|
|
3
|
+
np_codeocean-0.3.0.dist-info/entry_points.txt,sha256=p32aRkIjrFa4KtUbq2E6ZMYBVNRUw3U8ZIarvwNkK1E,250
|
|
4
|
+
np_codeocean/__init__.py,sha256=ED7YOu-3AIQIEML9jPT9bQ690lfhYZNiOB4QhnJ8r8U,76
|
|
5
|
+
np_codeocean/np_session_utils.py,sha256=yRz62ybX7k192Qr_viRPOL3saIPh5EH60KakAva5Wck,16659
|
|
6
|
+
np_codeocean/scripts/fix_ephys_data_on_s3.py,sha256=c5jHZmFLjMCRGb3YSmefCZRO_telZ7dB-mXGIG5ncYk,560
|
|
7
|
+
np_codeocean/scripts/upload_dynamic_routing_behavior.py,sha256=mc_dRF_sllvIlaDiCvWBMY8XZGji3dqsMJBJzykdS8M,17190
|
|
8
|
+
np_codeocean/scripts/upload_dynamic_routing_ecephys.py,sha256=ywlemwzY8IFK7h-s759tNsKW2av9CQsZBRerUibtUd4,9934
|
|
9
|
+
np_codeocean/scripts/upload_ethan_analysis_files.py,sha256=MaJRVk0CfzEMkwMmmXRmnRCqYpo6mGNWtROfZLavgGw,1019
|
|
10
|
+
np_codeocean/scripts/upload_split_recordings_example.py,sha256=1_aqoBxAkB_VpRKYqyPsEQBDGvgyAHXAkIJA0ZT2Vb0,1490
|
|
11
|
+
np_codeocean/utils.py,sha256=Pni1c8iKIe94lPLOTBha8MLlSYPqUluWXXVCNyOsGbw,19971
|
|
12
|
+
np_codeocean-0.3.0.dist-info/RECORD,,
|