np_codeocean 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
np_codeocean/utils.py CHANGED
@@ -1,94 +1,452 @@
1
- from __future__ import annotations
2
-
3
- import configparser
4
- import json
5
- import os
6
- import pathlib
7
- from typing import Literal
8
-
9
- import np_config
10
-
11
- CONFIG = np_config.fetch('/projects/np_codeocean')
12
- """Config for this project"""
13
-
14
- AWS_CONFIG: dict[Literal['aws_access_key_id', 'aws_secret_access_key'], str] = np_config.fetch('/projects/np_codeocean/aws')['config']
15
- """Config for connecting to AWS/S3 via awscli/boto3"""
16
-
17
- AWS_CREDENTIALS: dict[Literal['domain', 'token'], str] = np_config.fetch('/projects/np_codeocean/aws')['credentials']
18
- """Config for connecting to AWS/S3 via awscli/boto3"""
19
-
20
- CODEOCEAN_CONFIG: dict[Literal['region'], str] = np_config.fetch('/projects/np_codeocean/codeocean')['credentials']
21
- """Config for connecting to CodeOcean via http API"""
22
-
23
-
24
- def get_home() -> pathlib.Path:
25
- if os.name == 'nt':
26
- return pathlib.Path(os.environ['USERPROFILE'])
27
- return pathlib.Path(os.environ['HOME'])
28
-
29
- def get_aws_files() -> dict[Literal['config', 'credentials'], pathlib.Path]:
30
- return {
31
- 'config': get_home() / '.aws' / 'config',
32
- 'credentials': get_home() / '.aws' / 'credentials',
33
- }
34
-
35
- def get_codeocean_files() -> dict[Literal['credentials'], pathlib.Path]:
36
- return {
37
- 'credentials': get_home() / '.codeocean' / 'credentials.json',
38
- }
39
-
40
- def verify_ini_config(path: pathlib.Path, contents: dict, profile: str = 'default') -> None:
41
- config = configparser.ConfigParser()
42
- if path.exists():
43
- config.read(path)
44
- if not all(k in config[profile] for k in contents):
45
- raise ValueError(f'Profile {profile} in {path} exists but is missing some keys required for codeocean or s3 access.')
46
-
47
- def write_or_verify_ini_config(path: pathlib.Path, contents: dict, profile: str = 'default') -> None:
48
- config = configparser.ConfigParser()
49
- if path.exists():
50
- config.read(path)
51
- try:
52
- verify_ini_config(path, contents, profile)
53
- except ValueError:
54
- pass
55
- else:
56
- return
57
- config[profile] = contents
58
- path.parent.mkdir(parents=True, exist_ok=True)
59
- path.touch(exist_ok=True)
60
- with path.open('w') as f:
61
- config.write(f)
62
- verify_ini_config(path, contents, profile)
63
-
64
- def verify_json_config(path: pathlib.Path, contents: dict) -> None:
65
- config = json.loads(path.read_text())
66
- if not all(k in config for k in contents):
67
- raise ValueError(f'{path} exists but is missing some keys required for codeocean or s3 access.')
68
-
69
- def write_or_verify_json_config(path: pathlib.Path, contents: dict) -> None:
70
- if path.exists():
71
- try:
72
- verify_json_config(path, contents)
73
- except ValueError:
74
- contents = np_config.merge(json.loads(path.read_text()), contents)
75
- else:
76
- return
77
- path.parent.mkdir(parents=True, exist_ok=True)
78
- path.touch(exist_ok=True)
79
- path.write_text(json.dumps(contents, indent=4))
80
-
81
- def ensure_credentials() -> None:
82
- for file, contents in (
83
- (get_aws_files()['config'], AWS_CONFIG),
84
- (get_aws_files()['credentials'], AWS_CREDENTIALS),
85
- ):
86
- write_or_verify_ini_config(file, contents, profile='default')
87
-
88
- for file, contents in (
89
- (get_codeocean_files()['credentials'], CODEOCEAN_CONFIG),
90
- ):
91
- write_or_verify_json_config(file, contents)
92
-
93
- if __name__ == '__main__':
94
- ensure_credentials()
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import csv
5
+ import datetime
6
+ import functools
7
+ import itertools
8
+ import json
9
+ import logging
10
+ import os
11
+ import pathlib
12
+ from typing import Any, Generator, Iterable, Literal
13
+ import typing_extensions
14
+
15
+ import aind_data_transfer_models.core
16
+ import aind_slurm_rest.models
17
+ import np_config
18
+ import np_tools
19
+ import npc_ephys
20
+ import npc_sync
21
+ import npc_session
22
+ import numpy as np
23
+ import polars as pl
24
+ import requests
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ AINDPlatform = Literal['ecephys', 'behavior']
29
+
30
+ AIND_DATA_TRANSFER_SERVICE = "http://aind-data-transfer-service"
31
+ DEV_SERVICE = "http://aind-data-transfer-service-dev"
32
+ HPC_UPLOAD_JOB_EMAIL = "ben.hardcastle@alleninstitute.org"
33
+ ACQ_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
34
+
35
+ AIND_METADATA_NAMES: tuple[str, ...] = ('session', 'data_description', 'procedures', 'processing', 'rig', 'subject')
36
+
37
+ DEFAULT_EPHYS_SLURM_SETTINGS = aind_slurm_rest.models.V0036JobProperties(
38
+ environment=dict(), # JonY: set this to an empty dictionary
39
+ time_limit = 15 * 60,
40
+ minimum_cpus_per_node=12, # 6 probes * (lfp + ap)
41
+ )
42
+ """Increased timelimit and cpus for running ephys compression on the hpc"""
43
+
44
+ class SyncFileNotFoundError(FileNotFoundError):
45
+ pass
46
+
47
+ @functools.cache
48
+ def get_project_config() -> dict[str, Any]:
49
+ """Config for this project"""
50
+ return np_config.fetch('/projects/np_codeocean')
51
+
52
+ def set_npc_lims_credentials() -> None:
53
+ creds = np_config.fetch('/projects/np_codeocean/npc_lims')
54
+ for k, v in creds.items():
55
+ os.environ.setdefault(k, v)
56
+
57
+ def get_home() -> pathlib.Path:
58
+ if os.name == 'nt':
59
+ return pathlib.Path(os.environ['USERPROFILE'])
60
+ return pathlib.Path(os.environ['HOME'])
61
+
62
+ def is_behavior_video_file(path: pathlib.Path) -> bool:
63
+ if path.is_dir() or path.suffix not in ('.mp4', '.avi', '.json'):
64
+ return False
65
+ with contextlib.suppress(ValueError):
66
+ _ = npc_session.extract_mvr_camera_name(path.as_posix())
67
+ return True
68
+ return False
69
+
70
+ def is_surface_channel_recording(path_name: str) -> bool:
71
+ """
72
+ >>> import np_session
73
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
74
+ >>> is_surface_channel_recording(session.npexp_path.as_posix())
75
+ True
76
+ """
77
+ return 'surface_channels' in path_name.lower()
78
+
79
+ def cleanup_ephys_symlinks(toplevel_dir: pathlib.Path) -> None:
80
+ """After creating symlinks to the ephys data, run this to make any necessary
81
+ modifications prior to upload.
82
+
83
+ Provided dir path should be a directory containing all ephys data in
84
+ subfolders (e.g. directory containing "Record Node 10x" folders)
85
+
86
+ Only deletes symlinks or writes new files in place of symlinks - does not
87
+ modify original data.
88
+
89
+ Rules:
90
+ - if any continuous.dat files are unreadable: remove them and their containing folders
91
+ - if any probes were recorded on multiple record nodes: just keep the first
92
+ - if continuous.dat files are missing (ie. excluded because probes weren't
93
+ inserted, or we removed symlinks in previous steps): update metadata files
94
+ """
95
+ remove_unreadable_ephys_data(toplevel_dir)
96
+ remove_duplicate_ephys_data(toplevel_dir)
97
+ cleanup_ephys_metadata(toplevel_dir)
98
+
99
+ def remove_unreadable_ephys_data(toplevel_dir: pathlib.Path) -> None:
100
+
101
+ for continuous_dir in ephys_continuous_dir_generator(toplevel_dir):
102
+ events_dir = continuous_dir.parent.parent / 'events' / continuous_dir.name / 'TTL'
103
+ filenames = ('continuous.dat', 'timestamps.npy', 'sample_numbers.npy')
104
+ dirs = (continuous_dir, ) + ((events_dir,) if events_dir.exists() else ())
105
+ mark_for_removal = False
106
+ for d in dirs:
107
+ if not d.exists():
108
+ continue
109
+ for filename in filenames:
110
+ if filename == 'continuous.dat' and d.name == 'TTL':
111
+ continue # no continuous.dat expected in TTL events
112
+ file = d / filename
113
+ if not (file.is_symlink() or file.exists()):
114
+ logger.warning(f'Critical file not found {file}, insufficient data for processing')
115
+ mark_for_removal = True
116
+ break
117
+ try:
118
+ data = np.memmap(decode_symlink_path(file), dtype="int16" if 'timestamps' not in file.name else "float64", mode="r")
119
+ except Exception as exc:
120
+ logger.warning(f'Failed to read {file}: {exc!r}')
121
+ mark_for_removal = True
122
+ break
123
+ if data.size == 0:
124
+ logger.warning(f'Empty file {file}')
125
+ mark_for_removal = True
126
+ break
127
+ logger.debug(f'Found readable, non-empty data in {file}')
128
+ if mark_for_removal:
129
+ break
130
+ if mark_for_removal:
131
+ logger.warning(f'Removing {continuous_dir} and its contents')
132
+ remove_folder_of_symlinks(continuous_dir)
133
+ logger.warning(f'Removing {events_dir.parent} and its contents')
134
+ remove_folder_of_symlinks(events_dir.parent)
135
+
136
+ def remove_duplicate_ephys_data(toplevel_dir: pathlib.Path) -> None:
137
+ previous_recording_name = ''
138
+ for continuous_dir in ephys_continuous_dir_generator(toplevel_dir):
139
+ recording_name = continuous_dir.parent.parent.name
140
+ if recording_name != previous_recording_name:
141
+ # reset probes list for each new recording
142
+ probes = []
143
+ try:
144
+ probe = npc_session.ProbeRecord(continuous_dir.name)
145
+ except ValueError:
146
+ continue
147
+ suffix = continuous_dir.name.split('-')[-1]
148
+ assert suffix in ('AP', 'LFP')
149
+ recording_name = f"{probe}-{suffix}"
150
+ if recording_name in probes:
151
+ logger.info(f'Duplicate {recording_name = } found in {continuous_dir.parent.parent} - removing')
152
+ remove_folder_of_symlinks(continuous_dir)
153
+ else:
154
+ probes.append(recording_name)
155
+
156
+ def remove_folder_of_symlinks(folder: pathlib.Path) -> None:
157
+ """Recursive deletion of all files in dir tree, with a check that each is a
158
+ symlink."""
159
+ for path in folder.rglob('*'):
160
+ if path.is_dir():
161
+ remove_folder_of_symlinks(path)
162
+ else:
163
+ assert path.is_symlink(), f'Expected {path} to be a symlink'
164
+ path.unlink(missing_ok=True)
165
+ with contextlib.suppress(FileNotFoundError):
166
+ folder.rmdir()
167
+
168
+ def ephys_recording_dir_generator(toplevel_dir: pathlib.Path) -> Generator[pathlib.Path, None, None]:
169
+ for recording_dir in toplevel_dir.rglob('recording[0-9]*'):
170
+ if recording_dir.is_dir():
171
+ yield recording_dir
172
+
173
+ def ephys_continuous_dir_generator(toplevel_dir: pathlib.Path) -> Generator[pathlib.Path, None, None]:
174
+ for recording_dir in ephys_recording_dir_generator(toplevel_dir):
175
+ parent = recording_dir / 'continuous'
176
+ if not parent.exists():
177
+ continue
178
+ for continuous_dir in parent.iterdir():
179
+ if continuous_dir.is_dir():
180
+ yield continuous_dir
181
+
182
+ def ephys_structure_oebin_generator(toplevel_dir: pathlib.Path) -> Generator[pathlib.Path, None, None]:
183
+ for recording_dir in ephys_recording_dir_generator(toplevel_dir):
184
+ oebin_path = recording_dir / 'structure.oebin'
185
+ if not (oebin_path.is_symlink() or oebin_path.exists()):
186
+ # symlinks that are created for the hpc use posix paths, and aren't
187
+ # readable on windows, so .exists() returns False: use .is_symlink() instead
188
+ logger.warning(f'No structure.oebin found in {recording_dir}')
189
+ continue
190
+ yield oebin_path
191
+
192
+ def cleanup_ephys_metadata(toplevel_dir: pathlib.Path) -> None:
193
+ logger.debug('Checking structure.oebin for missing folders...')
194
+ for oebin_path in ephys_structure_oebin_generator(toplevel_dir):
195
+ oebin_obj = np_tools.read_oebin(decode_symlink_path(oebin_path))
196
+ logger.debug(f'Checking {oebin_path} against actual folders...')
197
+ any_removed = False
198
+ for subdir_name in ('events', 'continuous'):
199
+ subdir = oebin_path.parent / subdir_name
200
+ # iterate over copy of list so as to not disrupt iteration when elements are removed
201
+ for device in [device for device in oebin_obj[subdir_name]]:
202
+ if not (subdir / device['folder_name']).exists():
203
+ logger.info(f'{device["folder_name"]} not found in {subdir}, removing from structure.oebin')
204
+ oebin_obj[subdir_name].remove(device)
205
+ any_removed = True
206
+ if any_removed:
207
+ oebin_path.unlink()
208
+ oebin_path.write_text(json.dumps(oebin_obj, indent=4))
209
+ logger.debug('Overwrote symlink to structure.oebin with corrected structure.oebin')
210
+
211
+ def write_corrected_ephys_timestamps(
212
+ ephys_dir: pathlib.Path,
213
+ behavior_dir: pathlib.Path,
214
+ ) -> None:
215
+ for path in itertools.chain(behavior_dir.glob('*.h5'), behavior_dir.glob('*.sync')):
216
+ with contextlib.suppress(Exception):
217
+ sync_dataset = npc_sync.SyncDataset(path)
218
+ _ = sync_dataset.line_labels
219
+ logger.info(f'Found valid sync file {path.as_posix()}')
220
+ break
221
+ else:
222
+ raise SyncFileNotFoundError(f'No valid sync file found in {behavior_dir.as_posix()}')
223
+
224
+ timing_on_pxi = (
225
+ timing
226
+ for timing in npc_ephys.get_ephys_timing_on_pxi(
227
+ ephys_dir.glob("**/experiment*/recording*"),
228
+ )
229
+ )
230
+ timing_on_sync = (
231
+ npc_ephys.get_ephys_timing_on_sync(
232
+ sync=sync_dataset,
233
+ devices=timing_on_pxi,
234
+ )
235
+ )
236
+ npc_ephys.overwrite_timestamps(timing_on_sync)
237
+ logger.info(f'Corrected timestamps in {ephys_dir}')
238
+
239
+ def decode_symlink_path(oebin_path: pathlib.Path) -> pathlib.Path:
240
+ if not oebin_path.is_symlink():
241
+ return oebin_path
242
+ return np_config.normalize_path(oebin_path.readlink())
243
+
244
+ def is_csv_in_hpc_upload_queue(csv_path: pathlib.Path, upload_service_url: str = AIND_DATA_TRANSFER_SERVICE) -> bool:
245
+ """Check if an upload job has been submitted to the hpc upload queue.
246
+
247
+ - currently assumes one job per csv
248
+ - does not check status (job may be FINISHED rather than RUNNING)
249
+
250
+ >>> is_csv_in_hpc_upload_queue("//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_664851_20231114/upload.csv")
251
+ False
252
+ """
253
+ # get subject-id, acq-datetime from csv
254
+ df = pl.read_csv(csv_path, eol_char='\r')
255
+ for col in df.get_columns():
256
+ if col.name.startswith('subject') and col.name.endswith('id'):
257
+ subject = npc_session.SubjectRecord(col[0])
258
+ continue
259
+ if col.name.startswith('acq') and 'datetime' in col.name.lower():
260
+ dt = npc_session.DatetimeRecord(col[0])
261
+ continue
262
+ if col.name == 'platform':
263
+ platform = col[0]
264
+ continue
265
+ return is_session_in_hpc_queue(subject=subject, acq_datetime=dt.dt, platform=platform, upload_service_url=upload_service_url)
266
+
267
+ def is_session_in_hpc_queue(subject: int | str, acq_datetime: str | datetime.datetime, platform: str | None = None, upload_service_url: str = AIND_DATA_TRANSFER_SERVICE) -> bool:
268
+ """
269
+ >>> is_session_in_hpc_queue(366122, datetime.datetime(2023, 11, 14, 0, 0, 0))
270
+ False
271
+ >>> is_session_in_hpc_queue(702136, datetime.datetime(2024, 3, 4, 13, 21, 35))
272
+ True
273
+ """
274
+ if not isinstance(acq_datetime, datetime.datetime):
275
+ acq_datetime = datetime.datetime.strptime(acq_datetime, ACQ_DATETIME_FORMAT)
276
+ partial_session_id = f"{subject}_{acq_datetime.strftime(ACQ_DATETIME_FORMAT).replace(' ', '_').replace(':', '-')}"
277
+ if platform:
278
+ partial_session_id = f"{platform}_{partial_session_id}"
279
+
280
+ jobs_response = requests.get(f"{upload_service_url}/jobs")
281
+ jobs_response.raise_for_status()
282
+ return partial_session_id in jobs_response.content.decode()
283
+
284
+ def is_job_in_hpc_upload_queue(job: aind_data_transfer_models.core.BasicUploadJobConfigs, upload_service_url: str = AIND_DATA_TRANSFER_SERVICE) -> bool:
285
+ return is_session_in_hpc_queue(job.subject_id, job.acq_datetime, job.platform, upload_service_url)
286
+
287
+ def write_upload_csv(
288
+ content: dict[str, Any],
289
+ output_path: pathlib.Path,
290
+ ) -> pathlib.Path:
291
+ logger.info(f'Creating upload job file {output_path}')
292
+ with open(output_path, 'w') as f:
293
+ w = csv.writer(f, lineterminator='')
294
+ w.writerow(content.keys())
295
+ w.writerow('\n')
296
+ w.writerow(content.values())
297
+ return output_path
298
+
299
+ def get_job_models_from_csv(
300
+ path: pathlib.Path,
301
+ ephys_slurm_settings: aind_slurm_rest.models.V0036JobProperties = DEFAULT_EPHYS_SLURM_SETTINGS,
302
+ user_email: str = HPC_UPLOAD_JOB_EMAIL,
303
+ **extra_BasicUploadJobConfigs_params: Any,
304
+ ) -> tuple[aind_data_transfer_models.core.BasicUploadJobConfigs, ...]:
305
+ jobs = pl.read_csv(path, eol_char='\r').with_columns(
306
+ pl.col('subject-id').cast(str),
307
+ ).to_dicts()
308
+ jobs = jobs
309
+ models = []
310
+ for job in jobs.copy():
311
+ modalities = []
312
+ for modality_column in (k for k in job.keys() if k.startswith('modality') and ".source" not in k):
313
+ modality_name = job[modality_column]
314
+ modalities.append(
315
+ aind_data_transfer_models.core.ModalityConfigs(
316
+ modality=modality_name,
317
+ source=job[f"{modality_column}.source"],
318
+ slurm_settings = ephys_slurm_settings if modality_name == 'ecephys' else None,
319
+ ),
320
+ )
321
+ for k in (k for k in job.copy().keys() if k.startswith('modality')):
322
+ del job[k]
323
+ for k, v in job.items():
324
+ if isinstance(v, str) and '\n' in v:
325
+ job[k] = v.replace('\n', '')
326
+ models.append(
327
+ aind_data_transfer_models.core.BasicUploadJobConfigs(
328
+ **{k.replace('-', '_'): v for k,v in job.items()},
329
+ modalities=modalities,
330
+ user_email=user_email,
331
+ **extra_BasicUploadJobConfigs_params,
332
+ )
333
+ )
334
+ return tuple(models)
335
+
336
+ def put_jobs_for_hpc_upload(
337
+ upload_jobs: aind_data_transfer_models.core.BasicUploadJobConfigs | Iterable[aind_data_transfer_models.core.BasicUploadJobConfigs],
338
+ upload_service_url: str = AIND_DATA_TRANSFER_SERVICE,
339
+ user_email: str = HPC_UPLOAD_JOB_EMAIL,
340
+ email_notification_types: Iterable[str | aind_data_transfer_models.core.EmailNotificationType] = ('fail',),
341
+ dry_run: bool = False,
342
+ save_path: pathlib.Path | None = None,
343
+ **extra_model_kwargs: Any,
344
+ ) -> None:
345
+ """Submit one or more jobs to the aind-data-transfer-service, for
346
+ upload to S3 on the hpc.
347
+
348
+ - accepts one or more aind_data_schema BasicUploadJobConfigs models
349
+ - assembles a SubmitJobRequest model
350
+ - excludes jobs for sessions that are already in the upload queue
351
+ - accepts additional parameters for SubmitHpcJobRequest as kwargs
352
+ - submits json via http request
353
+ - optionally saves the json file as a record
354
+ """
355
+ if not isinstance(upload_jobs, Iterable):
356
+ upload_jobs = (upload_jobs, )
357
+ submit_request = aind_data_transfer_models.core.SubmitJobRequest(
358
+ upload_jobs=[job for job in upload_jobs if not is_job_in_hpc_upload_queue(job)],
359
+ user_email=user_email,
360
+ email_notification_types=email_notification_types,
361
+ **extra_model_kwargs,
362
+ )
363
+ post_request_content = json.loads(
364
+ submit_request.model_dump_json(round_trip=True, exclude_none=True)
365
+ ) #! round_trip required for s3 bucket suffix to work correctly
366
+ if save_path:
367
+ save_path.write_text(submit_request.model_dump_json(round_trip=True, indent=4), errors='ignore')
368
+ if dry_run:
369
+ logger.warning(f'Dry run: not submitting {len(upload_jobs)} upload job(s) to {upload_service_url}')
370
+ return
371
+ post_json_response: requests.Response = requests.post(
372
+ url=f"{upload_service_url}/api/v1/submit_jobs",
373
+ json=post_request_content,
374
+ )
375
+ logger.info(f"Submitted {len(upload_jobs)} upload job(s) to {upload_service_url}")
376
+ post_json_response.raise_for_status()
377
+
378
+ @typing_extensions.deprecated("Uses old, pre-v1 endpoints: use put_jobs_for_hpc_upload in combination with get_job_models_from_csv")
379
+ def put_csv_for_hpc_upload(
380
+ csv_path: pathlib.Path,
381
+ upload_service_url: str = AIND_DATA_TRANSFER_SERVICE,
382
+ hpc_upload_job_email: str = HPC_UPLOAD_JOB_EMAIL,
383
+ dry_run: bool = False,
384
+ ) -> None:
385
+ """Submit a single job upload csv to the aind-data-transfer-service, for
386
+ upload to S3 on the hpc.
387
+
388
+ - gets validated version of csv
389
+ - checks session is not already being uploaded
390
+ - submits csv via http request
391
+ """
392
+ def _raise_for_status(response: requests.Response) -> None:
393
+ """pydantic validation errors are returned as strings that can be eval'd
394
+ to get the real error class + message."""
395
+ if response.status_code != 200:
396
+ try:
397
+ response.json()['data']['errors']
398
+ except (KeyError, IndexError, requests.exceptions.JSONDecodeError, SyntaxError) as exc1:
399
+ try:
400
+ response.raise_for_status()
401
+ except requests.exceptions.HTTPError as exc2:
402
+ raise exc2 from exc1
403
+
404
+ with open(csv_path, 'rb') as f:
405
+ validate_csv_response = requests.post(
406
+ url=f"{upload_service_url}/api/validate_csv",
407
+ files=dict(file=f),
408
+ )
409
+ _raise_for_status(validate_csv_response)
410
+ logger.debug(f"Validated response: {validate_csv_response.json()}")
411
+ if is_csv_in_hpc_upload_queue(csv_path, upload_service_url):
412
+ logger.warning(f"Job already submitted for {csv_path}")
413
+ return
414
+ if dry_run:
415
+ logger.info(f'Dry run: not submitting {csv_path} to hpc upload queue at {upload_service_url}.')
416
+ return
417
+ post_csv_response = requests.post(
418
+ url=f"{upload_service_url}/api/submit_hpc_jobs",
419
+ json=dict(
420
+ jobs=[
421
+ dict(
422
+ hpc_settings=json.dumps({"time_limit": 60 * 15, "mail_user": hpc_upload_job_email}),
423
+ upload_job_settings=validate_csv_response.json()["data"]["jobs"][0],
424
+ script="",
425
+ )
426
+ ]
427
+ ),
428
+ )
429
+ logger.info(f"Submitted {csv_path} to hpc upload queue at {upload_service_url}")
430
+ _raise_for_status(post_csv_response)
431
+
432
+
433
+ def ensure_posix(path: str | pathlib.Path) -> str:
434
+ posix = pathlib.Path(path).as_posix()
435
+ if posix.startswith('//'):
436
+ posix = posix[1:]
437
+ return posix
438
+
439
+
440
+ def convert_symlinks_to_posix(toplevel_dir: str | pathlib.Path) -> None:
441
+ """Convert all symlinks in `root_dir` (recursively) to POSIX paths. This is a
442
+ necessary last step before submitting uploads to run on the HPC.
443
+ """
444
+ for path in pathlib.Path(toplevel_dir).rglob('*'):
445
+ if path.is_symlink():
446
+ posix_target = path.readlink().as_posix().removeprefix('//?/UNC')
447
+ path.unlink()
448
+ np_tools.symlink(src=ensure_posix(posix_target), dest=path)
449
+
450
+ if __name__ == '__main__':
451
+ import doctest
452
+ doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL)
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: np_codeocean
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean
5
- Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>
5
+ Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>, Chris Mochizuki <chrism@alleninstitute.org>, Arjun Sridhar <arjun.sridhar@alleninstitute.org>
6
6
  License: MIT
7
7
  Classifier: Programming Language :: Python :: 3
8
8
  Classifier: Programming Language :: Python :: 3.9
@@ -13,16 +13,25 @@ Classifier: Operating System :: Microsoft :: Windows
13
13
  Classifier: Operating System :: POSIX :: Linux
14
14
  Project-URL: Source, https://github.com/AllenInstitute/np_codeocean
15
15
  Project-URL: Issues, https://github.com/AllenInstitute/np_codeocean/issues
16
- Requires-Python: >=3.9
17
- Requires-Dist: np_session>=0.6.4
18
- Requires-Dist: np-tools>=0.1.21
19
- Requires-Dist: np-config>=0.4.24
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: np_session>=0.6.44
18
+ Requires-Dist: np-tools>=0.1.23
19
+ Requires-Dist: np-config>=0.4.33
20
20
  Requires-Dist: requests>=2.31.0
21
21
  Requires-Dist: npc-session>=0.1.34
22
22
  Requires-Dist: polars>=0.20.16
23
+ Requires-Dist: npc-lims>=0.1.168
24
+ Requires-Dist: npc-ephys>=0.1.28
25
+ Provides-Extra: dev
23
26
  Requires-Dist: bump>=1.3.2; extra == "dev"
24
27
  Requires-Dist: pdm>=2.4.9; extra == "dev"
25
- Provides-Extra: dev
28
+ Provides-Extra: dynamic-routing-metadata
29
+ Requires-Dist: np-aind-metadata>=0.1.17; extra == "dynamic-routing-metadata"
30
+ Requires-Dist: npc-lims>=0.1.154; extra == "dynamic-routing-metadata"
31
+ Requires-Dist: npc-sessions>=0.0.253; extra == "dynamic-routing-metadata"
32
+ Requires-Dist: aind-data-transfer-models>=0.13.1; extra == "dynamic-routing-metadata"
33
+ Requires-Dist: aind-codeocean-pipeline-monitor[full]>=0.5.0; extra == "dynamic-routing-metadata"
34
+ Requires-Dist: aind-metadata-mapper==0.18.2; extra == "dynamic-routing-metadata"
26
35
  Description-Content-Type: text/markdown
27
36
 
28
37
  # np_codeocean
@@ -0,0 +1,12 @@
1
+ np_codeocean-0.3.0.dist-info/METADATA,sha256=LdDu8xF8-r2bQMp5xf5WbFDa78CRYWB11wByNNPkwfQ,3177
2
+ np_codeocean-0.3.0.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
3
+ np_codeocean-0.3.0.dist-info/entry_points.txt,sha256=p32aRkIjrFa4KtUbq2E6ZMYBVNRUw3U8ZIarvwNkK1E,250
4
+ np_codeocean/__init__.py,sha256=ED7YOu-3AIQIEML9jPT9bQ690lfhYZNiOB4QhnJ8r8U,76
5
+ np_codeocean/np_session_utils.py,sha256=yRz62ybX7k192Qr_viRPOL3saIPh5EH60KakAva5Wck,16659
6
+ np_codeocean/scripts/fix_ephys_data_on_s3.py,sha256=c5jHZmFLjMCRGb3YSmefCZRO_telZ7dB-mXGIG5ncYk,560
7
+ np_codeocean/scripts/upload_dynamic_routing_behavior.py,sha256=mc_dRF_sllvIlaDiCvWBMY8XZGji3dqsMJBJzykdS8M,17190
8
+ np_codeocean/scripts/upload_dynamic_routing_ecephys.py,sha256=ywlemwzY8IFK7h-s759tNsKW2av9CQsZBRerUibtUd4,9934
9
+ np_codeocean/scripts/upload_ethan_analysis_files.py,sha256=MaJRVk0CfzEMkwMmmXRmnRCqYpo6mGNWtROfZLavgGw,1019
10
+ np_codeocean/scripts/upload_split_recordings_example.py,sha256=1_aqoBxAkB_VpRKYqyPsEQBDGvgyAHXAkIJA0ZT2Vb0,1490
11
+ np_codeocean/utils.py,sha256=Pni1c8iKIe94lPLOTBha8MLlSYPqUluWXXVCNyOsGbw,19971
12
+ np_codeocean-0.3.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: pdm-backend (2.1.8)
2
+ Generator: pdm-backend (2.4.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -0,0 +1,7 @@
1
+ [console_scripts]
2
+ upload_dr_behavior = np_codeocean.scripts.upload_dynamic_routing_behavior:main
3
+ upload_dr_ecephys = np_codeocean.scripts.upload_dynamic_routing_ecephys:main
4
+ upload_sessions = np_codeocean.scripts.upload_sessions:main
5
+
6
+ [gui_scripts]
7
+