np_codeocean 0.2.1__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: np_codeocean
3
- Version: 0.2.1
3
+ Version: 0.3.1
4
4
  Summary: Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean
5
5
  Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>, Chris Mochizuki <chrism@alleninstitute.org>, Arjun Sridhar <arjun.sridhar@alleninstitute.org>
6
6
  License: MIT
@@ -20,15 +20,18 @@ Requires-Dist: np-config>=0.4.33
20
20
  Requires-Dist: requests>=2.31.0
21
21
  Requires-Dist: npc-session>=0.1.34
22
22
  Requires-Dist: polars>=0.20.16
23
- Requires-Dist: aind-data-transfer-models>=0.5.1
24
23
  Requires-Dist: npc-lims>=0.1.168
24
+ Requires-Dist: npc-ephys>=0.1.28
25
+ Provides-Extra: dev
25
26
  Requires-Dist: bump>=1.3.2; extra == "dev"
26
27
  Requires-Dist: pdm>=2.4.9; extra == "dev"
27
- Requires-Dist: np-aind-metadata==0.1.16; extra == "dynamic-routing-metadata"
28
- Requires-Dist: npc-lims>=0.1.154; extra == "dynamic-routing-metadata"
29
- Requires-Dist: npc-sessions>=0.0.226; extra == "dynamic-routing-metadata"
30
- Provides-Extra: dev
31
28
  Provides-Extra: dynamic-routing-metadata
29
+ Requires-Dist: np-aind-metadata>=0.1.17; extra == "dynamic-routing-metadata"
30
+ Requires-Dist: npc-lims>=0.1.154; extra == "dynamic-routing-metadata"
31
+ Requires-Dist: npc-sessions>=0.0.253; extra == "dynamic-routing-metadata"
32
+ Requires-Dist: aind-data-transfer-models>=0.13.1; extra == "dynamic-routing-metadata"
33
+ Requires-Dist: aind-codeocean-pipeline-monitor[full]>=0.5.0; extra == "dynamic-routing-metadata"
34
+ Requires-Dist: aind-metadata-mapper==0.18.2; extra == "dynamic-routing-metadata"
32
35
  Description-Content-Type: text/markdown
33
36
 
34
37
  # np_codeocean
@@ -40,7 +40,7 @@ composite = [
40
40
 
41
41
  [project]
42
42
  name = "np_codeocean"
43
- version = "0.2.1"
43
+ version = "0.3.1"
44
44
  description = "Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean"
45
45
  authors = [
46
46
  { name = "Ben Hardcastle", email = "ben.hardcastle@alleninstitute.org" },
@@ -54,8 +54,8 @@ dependencies = [
54
54
  "requests>=2.31.0",
55
55
  "npc-session>=0.1.34",
56
56
  "polars>=0.20.16",
57
- "aind-data-transfer-models>=0.5.1",
58
57
  "npc-lims>=0.1.168",
58
+ "npc-ephys>=0.1.28",
59
59
  ]
60
60
  requires-python = ">=3.10"
61
61
  readme = "README.md"
@@ -87,9 +87,12 @@ dev = [
87
87
  "pdm>=2.4.9",
88
88
  ]
89
89
  dynamic-routing-metadata = [
90
- "np-aind-metadata==0.1.16",
90
+ "np-aind-metadata>=0.1.17",
91
91
  "npc-lims>=0.1.154",
92
- "npc-sessions>=0.0.226",
92
+ "npc-sessions>=0.0.253",
93
+ "aind-data-transfer-models>=0.13.1",
94
+ "aind-codeocean-pipeline-monitor[full]>=0.5.0",
95
+ "aind-metadata-mapper==0.18.2",
93
96
  ]
94
97
 
95
98
  [build-system]
@@ -7,6 +7,7 @@ import pathlib
7
7
  from collections.abc import Iterable
8
8
  import shutil
9
9
 
10
+ import aind_data_transfer_models.core
10
11
  import np_config
11
12
  import np_logging
12
13
  import np_session
@@ -82,7 +83,7 @@ def create_aind_metadata_symlinks(upload: CodeOceanUpload) -> bool:
82
83
  has_metadata_files = False
83
84
  for src in upload.session.npexp_path.glob('*'):
84
85
  if src.stem in utils.AIND_METADATA_NAMES:
85
- np_tools.symlink(utils.ensure_posix(src), upload.aind_metadata / src.name)
86
+ np_tools.symlink(src, upload.aind_metadata / src.name)
86
87
  has_metadata_files = True
87
88
  if has_metadata_files:
88
89
  logger.debug(f'Finished creating symlinks to aind metadata files in {upload.session.npexp_path}')
@@ -116,7 +117,7 @@ def create_ephys_symlinks(session: np_session.Session, dest: pathlib.Path,
116
117
  root_path, specific_recording_dir_names=recording_dirs
117
118
  ):
118
119
  if not abs_path.is_dir():
119
- np_tools.symlink(utils.ensure_posix(abs_path), dest / rel_path)
120
+ np_tools.symlink(abs_path, dest / rel_path)
120
121
  logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
121
122
  utils.cleanup_ephys_symlinks(dest)
122
123
 
@@ -132,7 +133,7 @@ def create_behavior_symlinks(session: np_session.Session, dest: pathlib.Path | N
132
133
  logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
133
134
  for src in session.npexp_path.glob('*'):
134
135
  if not src.is_dir() and not utils.is_behavior_video_file(src):
135
- np_tools.symlink(utils.ensure_posix(src), dest / src.relative_to(session.npexp_path))
136
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
136
137
  logger.debug(f'Finished creating symlinks to top-level files in {session.npexp_path}')
137
138
 
138
139
  for name in subfolder_names:
@@ -141,7 +142,7 @@ def create_behavior_symlinks(session: np_session.Session, dest: pathlib.Path | N
141
142
  continue
142
143
  for src in subfolder.rglob('*'):
143
144
  if not src.is_dir():
144
- np_tools.symlink(utils.ensure_posix(src), dest / src.relative_to(session.npexp_path))
145
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
145
146
  logger.debug(f'Finished creating symlinks to {name!r} files')
146
147
 
147
148
 
@@ -155,7 +156,7 @@ def create_behavior_videos_symlinks(session: np_session.Session, dest: pathlib.P
155
156
  logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
156
157
  for src in session.npexp_path.glob('*'):
157
158
  if utils.is_behavior_video_file(src):
158
- np_tools.symlink(utils.ensure_posix(src), dest / src.relative_to(session.npexp_path))
159
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
159
160
  logger.debug(f'Finished creating symlinks to behavior video files in {session.npexp_path}')
160
161
 
161
162
 
@@ -299,6 +300,8 @@ def upload_session(
299
300
  test: bool = False,
300
301
  hpc_upload_job_email: str = utils.HPC_UPLOAD_JOB_EMAIL,
301
302
  regenerate_symlinks: bool = True,
303
+ adjust_ephys_timestamps: bool = True,
304
+ codeocean_configs: aind_data_transfer_models.core.CodeOceanPipelineMonitorConfigs | None = None,
302
305
  ) -> None:
303
306
  codeocean_root = np_session.NPEXP_PATH / ('codeocean-dev' if test else 'codeocean')
304
307
  logger.debug(f'{codeocean_root = }')
@@ -310,7 +313,7 @@ def upload_session(
310
313
  )
311
314
  if regenerate_symlinks and upload.root.exists():
312
315
  logger.debug(f'Removing existing {upload.root = }')
313
- shutil.rmtree(upload.root.as_posix())
316
+ shutil.rmtree(upload.root.as_posix(), ignore_errors=True)
314
317
  if upload.aind_metadata:
315
318
  create_aind_metadata_symlinks(upload)
316
319
  if upload.ephys:
@@ -319,11 +322,28 @@ def upload_session(
319
322
  create_behavior_symlinks(upload.session, upload.behavior)
320
323
  if upload.behavior_videos:
321
324
  create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
325
+ if adjust_ephys_timestamps and upload.ephys:
326
+ if not upload.behavior: # includes surface channel recordings
327
+ logger.warning(f"Cannot adjust ephys timestamps for {upload.session} - no behavior folder supplied for upload")
328
+ else:
329
+ try:
330
+ utils.write_corrected_ephys_timestamps(ephys_dir=upload.ephys, behavior_dir=upload.behavior)
331
+ except utils.SyncFileNotFoundError:
332
+ raise FileNotFoundError(
333
+ (
334
+ f"Cannot adjust timestamps - no sync file found in {upload.behavior}. "
335
+ "If the session doesn't have one, run with "
336
+ "`adjust_ephys_timestamps=False` or `--no-sync` flag in CLI"
337
+ )
338
+ ) from None
339
+ for path in (upload.ephys, upload.behavior, upload.behavior_videos, upload.aind_metadata):
340
+ if path is not None and path.exists():
341
+ utils.convert_symlinks_to_posix(path)
322
342
  csv_content: dict = get_upload_csv_for_session(upload)
323
343
  utils.write_upload_csv(csv_content, upload.job)
324
344
  np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
325
345
  utils.put_jobs_for_hpc_upload(
326
- utils.get_job_models_from_csv(upload.job),
346
+ utils.get_job_models_from_csv(upload.job, codeocean_configs=codeocean_configs),
327
347
  upload_service_url=utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE,
328
348
  user_email=hpc_upload_job_email,
329
349
  dry_run=dry_run,
@@ -43,7 +43,8 @@ def reset_log_file() -> None:
43
43
  log.unlink(missing_ok=True)
44
44
 
45
45
  def get_log_file() -> pathlib.Path:
46
- folder = pathlib.Path(".").resolve() / "logs"
46
+ folder = pathlib.Path("//allen/programs/mindscope/workgroups/np-exp") / "codeocean-logs"
47
+ folder.mkdir(exist_ok=True)
47
48
  return folder / f"{pathlib.Path(__file__).stem}_{datetime.datetime.now().strftime('%Y-%m-%d')}.log"
48
49
 
49
50
  logging.basicConfig(
@@ -1,4 +1,5 @@
1
1
  import argparse
2
+ import contextlib
2
3
  import datetime
3
4
  import logging
4
5
  import pathlib
@@ -10,14 +11,18 @@ import npc_session
10
11
  import npc_sessions
11
12
  from aind_data_schema.core.rig import Rig
12
13
  from np_aind_metadata.integrations import dynamic_routing_task
13
-
14
+ import aind_codeocean_pipeline_monitor.models
15
+ import aind_data_transfer_models.core
16
+ import codeocean.capsule
17
+ import codeocean.data_asset
18
+ import codeocean.computation
14
19
  import np_codeocean
15
20
 
16
21
  # Disable divide by zero or NaN warnings
17
22
  warnings.filterwarnings("ignore", category=RuntimeWarning)
18
23
 
19
24
  logging.basicConfig(
20
- filename=f"logs/{pathlib.Path(__file__).stem}_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log",
25
+ filename=f"//allen/programs/mindscope/workgroups/np-exp/codeocean-logs/{pathlib.Path(__file__).stem}_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log",
21
26
  level=logging.DEBUG,
22
27
  format="%(asctime)s | %(name)s | %(levelname)s | %(message)s",
23
28
  datefmt="%Y-%d-%m %H:%M:%S",
@@ -123,6 +128,7 @@ def write_metadata_and_upload(
123
128
  hpc_upload_job_email: str = np_codeocean.HPC_UPLOAD_JOB_EMAIL,
124
129
  regenerate_metadata: bool = False,
125
130
  regenerate_symlinks: bool = True,
131
+ adjust_ephys_timestamps: bool = False,
126
132
  ) -> None:
127
133
  """Writes and updates aind-data-schema to the session directory
128
134
  associated with the `session`. The aind-data-schema session model is
@@ -134,16 +140,35 @@ def write_metadata_and_upload(
134
140
  """
135
141
  # session = np_session.Session(session) #! this doesn't work for surface_channels
136
142
  session = np_codeocean.get_np_session(session_path_or_folder_name)
137
- add_metadata(
138
- session_directory=session.npexp_path,
139
- session_datetime=(
140
- session.start
141
- if not np_codeocean.is_surface_channel_recording(session.npexp_path.name)
142
- else np_codeocean.get_surface_channel_start_time(session)
143
+ with contextlib.suppress(Exception):
144
+ add_metadata(
145
+ session_directory=session.npexp_path,
146
+ session_datetime=(
147
+ session.start
148
+ if not np_codeocean.is_surface_channel_recording(session.npexp_path.name)
149
+ else np_codeocean.get_surface_channel_start_time(session)
150
+ ),
151
+ rig_storage_directory=pathlib.Path(np_codeocean.get_project_config()["rig_metadata_dir"]),
152
+ ignore_errors=True,
153
+ skip_existing=not regenerate_metadata,
154
+ )
155
+ pipelines = [
156
+ aind_codeocean_pipeline_monitor.models.PipelineMonitorSettings(
157
+ run_params=codeocean.computation.RunParams(
158
+ capsule_id="287db808-74ce-4e44-b14b-fde1471eba45",
159
+ ),
160
+ data_assets=[
161
+ codeocean.data_asset.DataAsset(
162
+ mount="ecephys",
163
+ id="", # ID of new raw data asset will be inserted here by airflow
164
+ ),
165
+ ],
166
+ computation_polling_interval=15 * 60,
167
+ computation_timeout=48 * 3600,
143
168
  ),
144
- rig_storage_directory=pathlib.Path(np_codeocean.get_project_config()["rig_metadata_dir"]),
145
- ignore_errors=True,
146
- skip_existing=not regenerate_metadata,
169
+ ]
170
+ codeocean_configs = aind_data_transfer_models.core.CodeOceanPipelineMonitorConfigs(
171
+ pipeline_monitor_capsule_settings=pipelines,
147
172
  )
148
173
  return np_codeocean.upload_session(
149
174
  session_path_or_folder_name,
@@ -153,6 +178,8 @@ def write_metadata_and_upload(
153
178
  test=test,
154
179
  hpc_upload_job_email=hpc_upload_job_email,
155
180
  regenerate_symlinks=regenerate_symlinks,
181
+ adjust_ephys_timestamps=adjust_ephys_timestamps,
182
+ codeocean_configs=codeocean_configs,
156
183
  )
157
184
 
158
185
  def parse_args() -> argparse.Namespace:
@@ -165,11 +192,15 @@ def parse_args() -> argparse.Namespace:
165
192
  parser.add_argument('--dry-run', action='store_true', help="Create upload job but do not submit to hpc upload queue.")
166
193
  parser.add_argument('--preserve-symlinks', dest='regenerate_symlinks', action='store_false', help="Existing symlink folders will not be deleted and regenerated - may result in additional data being uploaded")
167
194
  parser.add_argument('--regenerate-metadata', action='store_true', help="Regenerate metadata files (session.json and rig.json) even if they already exist")
195
+ parser.add_argument('--no-sync', dest="adjust_ephys_timestamps", action='store_false', help="(disabled) Skip adjustment of ephys timestamps.npy using sync data for sessions where no sync data is available")
168
196
  return parser.parse_args()
169
197
 
170
198
  def main() -> None:
171
199
  args = parse_args()
172
- write_metadata_and_upload(**vars(args))
200
+ kwargs = vars(args)
201
+ kwargs |= {'adjust_ephys_timestamps': False} # unnecessary while we have machinery in place for adjusting in npc_sessions (adds 5 GB of timestamps files for each upload)
202
+ np_codeocean.utils.set_npc_lims_credentials()
203
+ write_metadata_and_upload(**kwargs)
173
204
 
174
205
 
175
206
  if __name__ == '__main__':
@@ -4,6 +4,7 @@ import contextlib
4
4
  import csv
5
5
  import datetime
6
6
  import functools
7
+ import itertools
7
8
  import json
8
9
  import logging
9
10
  import os
@@ -15,6 +16,8 @@ import aind_data_transfer_models.core
15
16
  import aind_slurm_rest.models
16
17
  import np_config
17
18
  import np_tools
19
+ import npc_ephys
20
+ import npc_sync
18
21
  import npc_session
19
22
  import numpy as np
20
23
  import polars as pl
@@ -38,6 +41,9 @@ DEFAULT_EPHYS_SLURM_SETTINGS = aind_slurm_rest.models.V0036JobProperties(
38
41
  )
39
42
  """Increased timelimit and cpus for running ephys compression on the hpc"""
40
43
 
44
+ class SyncFileNotFoundError(FileNotFoundError):
45
+ pass
46
+
41
47
  @functools.cache
42
48
  def get_project_config() -> dict[str, Any]:
43
49
  """Config for this project"""
@@ -202,6 +208,34 @@ def cleanup_ephys_metadata(toplevel_dir: pathlib.Path) -> None:
202
208
  oebin_path.write_text(json.dumps(oebin_obj, indent=4))
203
209
  logger.debug('Overwrote symlink to structure.oebin with corrected structure.oebin')
204
210
 
211
+ def write_corrected_ephys_timestamps(
212
+ ephys_dir: pathlib.Path,
213
+ behavior_dir: pathlib.Path,
214
+ ) -> None:
215
+ for path in itertools.chain(behavior_dir.glob('*.h5'), behavior_dir.glob('*.sync')):
216
+ with contextlib.suppress(Exception):
217
+ sync_dataset = npc_sync.SyncDataset(path)
218
+ _ = sync_dataset.line_labels
219
+ logger.info(f'Found valid sync file {path.as_posix()}')
220
+ break
221
+ else:
222
+ raise SyncFileNotFoundError(f'No valid sync file found in {behavior_dir.as_posix()}')
223
+
224
+ timing_on_pxi = (
225
+ timing
226
+ for timing in npc_ephys.get_ephys_timing_on_pxi(
227
+ ephys_dir.glob("**/experiment*/recording*"),
228
+ )
229
+ )
230
+ timing_on_sync = (
231
+ npc_ephys.get_ephys_timing_on_sync(
232
+ sync=sync_dataset,
233
+ devices=timing_on_pxi,
234
+ )
235
+ )
236
+ npc_ephys.overwrite_timestamps(timing_on_sync)
237
+ logger.info(f'Corrected timestamps in {ephys_dir}')
238
+
205
239
  def decode_symlink_path(oebin_path: pathlib.Path) -> pathlib.Path:
206
240
  if not oebin_path.is_symlink():
207
241
  return oebin_path
@@ -266,6 +300,7 @@ def get_job_models_from_csv(
266
300
  path: pathlib.Path,
267
301
  ephys_slurm_settings: aind_slurm_rest.models.V0036JobProperties = DEFAULT_EPHYS_SLURM_SETTINGS,
268
302
  user_email: str = HPC_UPLOAD_JOB_EMAIL,
303
+ **extra_BasicUploadJobConfigs_params: Any,
269
304
  ) -> tuple[aind_data_transfer_models.core.BasicUploadJobConfigs, ...]:
270
305
  jobs = pl.read_csv(path, eol_char='\r').with_columns(
271
306
  pl.col('subject-id').cast(str),
@@ -293,6 +328,7 @@ def get_job_models_from_csv(
293
328
  **{k.replace('-', '_'): v for k,v in job.items()},
294
329
  modalities=modalities,
295
330
  user_email=user_email,
331
+ **extra_BasicUploadJobConfigs_params,
296
332
  )
297
333
  )
298
334
  return tuple(models)
@@ -401,6 +437,16 @@ def ensure_posix(path: str | pathlib.Path) -> str:
401
437
  return posix
402
438
 
403
439
 
440
+ def convert_symlinks_to_posix(toplevel_dir: str | pathlib.Path) -> None:
441
+ """Convert all symlinks in `root_dir` (recursively) to POSIX paths. This is a
442
+ necessary last step before submitting uploads to run on the HPC.
443
+ """
444
+ for path in pathlib.Path(toplevel_dir).rglob('*'):
445
+ if path.is_symlink():
446
+ posix_target = path.readlink().as_posix().removeprefix('//?/UNC')
447
+ path.unlink()
448
+ np_tools.symlink(src=ensure_posix(posix_target), dest=path)
449
+
404
450
  if __name__ == '__main__':
405
451
  import doctest
406
452
  doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL)
File without changes