np_codeocean 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
np_codeocean/upload.py CHANGED
@@ -4,9 +4,7 @@ import argparse
4
4
  import contextlib
5
5
  import csv
6
6
  import json
7
- from multiprocessing import context
8
7
  import pathlib
9
- import sys
10
8
  import datetime
11
9
  from pathlib import Path
12
10
  from typing import NamedTuple
@@ -20,9 +18,8 @@ import np_tools
20
18
  import doctest
21
19
  import numpy as np
22
20
  import polars as pl
23
-
24
21
  import requests
25
-
22
+ from pydantic import ValidationError # may be returned from aind-data-transfer-service
26
23
 
27
24
  logger = np_logging.get_logger(__name__)
28
25
 
@@ -46,13 +43,16 @@ class CodeOceanUpload(NamedTuple):
46
43
  """Directory of symlinks to behavior video files and video info jsons in
47
44
  top-level of session folder on np-exp."""
48
45
 
49
- ephys: Path
46
+ ephys: Path | None
50
47
  """Directory of symlinks to raw ephys data files on np-exp, with only one
51
48
  `recording` per `Record Node` folder."""
52
49
 
53
50
  job: Path
54
51
  """File containing job parameters for `aind-data-transfer`"""
55
52
 
53
+ force_cloud_sync: bool = False
54
+ """If True, re-upload and re-make raw asset even if data exists on S3."""
55
+
56
56
  def as_posix(path: pathlib.Path) -> str:
57
57
  return path.as_posix()[1:]
58
58
 
@@ -83,6 +83,37 @@ def create_ephys_symlinks(session: np_session.Session, dest: Path,
83
83
  if not abs_path.is_dir():
84
84
  np_tools.symlink(as_posix(abs_path), dest / rel_path)
85
85
  logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
86
+ correct_structure(dest)
87
+
88
+ def correct_structure(dest: Path) -> None:
89
+ """
90
+ In case some probes are missing, remove device entries from structure.oebin
91
+ files for devices with folders that have not been preserved.
92
+ """
93
+ logger.debug('Checking structure.oebin for missing folders...')
94
+ recording_dirs = dest.rglob('recording[0-9]')
95
+ for recording_dir in recording_dirs:
96
+ if not recording_dir.is_dir():
97
+ continue
98
+ oebin_path = recording_dir / 'structure.oebin'
99
+ if not (oebin_path.is_symlink() or oebin_path.exists()):
100
+ logger.warning(f'No structure.oebin found in {recording_dir}')
101
+ continue
102
+ logger.debug(f'Examining oebin: {oebin_path} for correction')
103
+ oebin_obj = np_tools.read_oebin(np_config.normalize_path(oebin_path.readlink()))
104
+ any_removed = False
105
+ for subdir_name in ('events', 'continuous'):
106
+ subdir = oebin_path.parent / subdir_name
107
+ # iterate over copy of list so as to not disrupt iteration when elements are removed
108
+ for device in [device for device in oebin_obj[subdir_name]]:
109
+ if not (subdir / device['folder_name']).exists():
110
+ logger.info(f'{device["folder_name"]} not found in {subdir}, removing from structure.oebin')
111
+ oebin_obj[subdir_name].remove(device)
112
+ any_removed = True
113
+ if any_removed:
114
+ oebin_path.unlink()
115
+ oebin_path.write_text(json.dumps(oebin_obj, indent=4))
116
+ logger.debug('Overwrote symlink to structure.oebin with corrected strcuture.oebin')
86
117
 
87
118
  def is_behavior_video_file(path: Path) -> bool:
88
119
  if path.is_dir() or path.suffix not in ('.mp4', '.avi', '.json'):
@@ -92,13 +123,10 @@ def is_behavior_video_file(path: Path) -> bool:
92
123
  return True
93
124
  return False
94
125
 
95
- def create_behavior_symlinks(session: np_session.Session, dest: Path | None) -> None:
126
+ def create_behavior_symlinks(session: np_session.Session, dest: Path) -> None:
96
127
  """Create symlinks in `dest` pointing to files in top-level of session
97
128
  folder on np-exp, plus all files in `exp` subfolder, if present.
98
129
  """
99
- if dest is None:
100
- logger.debug(f"No behavior folder supplied for {session}")
101
- return
102
130
  subfolder_names = ('exp', 'qc')
103
131
  logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
104
132
  for src in session.npexp_path.glob('*'):
@@ -115,13 +143,10 @@ def create_behavior_symlinks(session: np_session.Session, dest: Path | None) ->
115
143
  np_tools.symlink(as_posix(src), dest / src.relative_to(session.npexp_path))
116
144
  logger.debug(f'Finished creating symlinks to {name!r} files')
117
145
 
118
- def create_behavior_videos_symlinks(session: np_session.Session, dest: Path | None) -> None:
146
+ def create_behavior_videos_symlinks(session: np_session.Session, dest: Path) -> None:
119
147
  """Create symlinks in `dest` pointing to MVR video files and info jsons in top-level of session
120
148
  folder on np-exp.
121
149
  """
122
- if dest is None:
123
- logger.debug(f"No behavior_videos folder supplied for {session}")
124
- return
125
150
  logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
126
151
  for src in session.npexp_path.glob('*'):
127
152
  if is_behavior_video_file(src):
@@ -130,7 +155,7 @@ def create_behavior_videos_symlinks(session: np_session.Session, dest: Path | No
130
155
 
131
156
  def is_surface_channel_recording(path_name: str) -> bool:
132
157
  """
133
- >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
158
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
134
159
  >>> is_surface_channel_recording(session.npexp_path.as_posix())
135
160
  True
136
161
  """
@@ -138,9 +163,9 @@ def is_surface_channel_recording(path_name: str) -> bool:
138
163
 
139
164
  def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
140
165
  """
141
- >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
166
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
142
167
  >>> get_surface_channel_start_time(session)
143
- datetime.datetime(2023, 8, 8, 15, 11, 14, 240000)
168
+ datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
144
169
  """
145
170
  sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
146
171
  if not sync_messages_paths:
@@ -154,43 +179,42 @@ def get_surface_channel_start_time(session: np_session.Session) -> datetime.date
154
179
  timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
155
180
  return timestamp
156
181
 
157
- def get_ephys_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int]:
182
+ def get_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int | bool]:
158
183
  """
159
- >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels"
184
+ >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
160
185
  >>> is_surface_channel_recording(path)
161
186
  True
162
187
  >>> upload = create_codeocean_upload(path)
163
- >>> ephys_upload_csv = get_ephys_upload_csv_for_session(upload)
188
+ >>> ephys_upload_csv = get_upload_csv_for_session(upload)
164
189
  >>> ephys_upload_csv['modality0.source']
165
- '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_660023_20230808_surface_channels/ephys'
190
+ '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
166
191
  >>> ephys_upload_csv.keys()
167
- dict_keys(['modality0.source', 'modality0', 's3-bucket', 'subject-id', 'platform', 'acq-datetime'])
192
+ dict_keys(['platform', 'subject-id', 'force_cloud_sync', 'modality0', 'modality0.source', 'acq-datetime'])
168
193
  """
169
-
170
- ephys_upload = {
171
- 'modality0.source': np_config.normalize_path(upload.ephys).as_posix(),
172
- 'modality0': 'ecephys',
173
- 's3-bucket': CONFIG['s3-bucket'],
174
- 'subject-id': str(upload.session.mouse),
194
+ params = {
175
195
  'platform': 'ecephys',
196
+ 'subject-id': str(upload.session.mouse),
197
+ 'force_cloud_sync': upload.force_cloud_sync,
176
198
  }
177
-
178
- if upload.behavior is not None:
179
- ephys_upload['modality1.source'] = np_config.normalize_path(upload.behavior).as_posix()
180
- ephys_upload['modality1'] = 'behavior'
181
-
182
- if upload.behavior_videos is not None:
183
- ephys_upload['modality2.source'] = np_config.normalize_path(upload.behavior_videos).as_posix()
184
- ephys_upload['modality2'] = 'behavior-videos'
185
-
199
+ idx = 0
200
+ for modality_name, attr_name in {
201
+ 'ecephys': 'ephys',
202
+ 'behavior': 'behavior',
203
+ 'behavior-videos': 'behavior_videos',
204
+ }.items():
205
+ if getattr(upload, attr_name) is not None:
206
+ params[f'modality{idx}'] = modality_name
207
+ params[f'modality{idx}.source'] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
208
+ idx += 1
209
+
186
210
  if is_surface_channel_recording(upload.session.npexp_path.as_posix()):
187
211
  date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
188
212
  session_date_time = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
189
- ephys_upload['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
213
+ params['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
190
214
  else:
191
- ephys_upload['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
215
+ params['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
192
216
 
193
- return ephys_upload
217
+ return params
194
218
 
195
219
 
196
220
  def is_in_hpc_upload_queue(csv_path: pathlib.Path) -> bool:
@@ -230,7 +254,8 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
230
254
  to get the real error class + message."""
231
255
  if response.status_code != 200:
232
256
  try:
233
- raise eval(response.json()['data']['errors'][0])
257
+ x = response.json()['data']['errors']
258
+ import pdb; pdb.set_trace()
234
259
  except (KeyError, IndexError, requests.exceptions.JSONDecodeError, SyntaxError) as exc1:
235
260
  try:
236
261
  response.raise_for_status()
@@ -261,10 +286,13 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
261
286
  ),
262
287
  )
263
288
  _raise_for_status(post_csv_response)
264
-
289
+
290
+ def is_ephys_session(session: np_session.Session) -> bool:
291
+ return bool(next(session.npexp_path.rglob('settings.xml'), None))
292
+
265
293
  def create_upload_job(upload: CodeOceanUpload) -> None:
266
294
  logger.info(f'Creating upload job file {upload.job} for session {upload.session}...')
267
- job: dict = get_ephys_upload_csv_for_session(upload)
295
+ job: dict = get_upload_csv_for_session(upload)
268
296
  with open(upload.job, 'w') as f:
269
297
  w = csv.writer(f, lineterminator='')
270
298
  w.writerow(job.keys())
@@ -273,28 +301,33 @@ def create_upload_job(upload: CodeOceanUpload) -> None:
273
301
  w.writerow(job.values())
274
302
 
275
303
  def create_codeocean_upload(session: str | int | np_session.Session,
276
- recording_dirs: Iterable[str] | None = None) -> CodeOceanUpload:
277
- """
278
- >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
279
- >>> upload.behavior is None
280
- True
281
- >>> upload.ephys.exists()
282
- True
283
- """
304
+ recording_dirs: Iterable[str] | None = None,
305
+ force_cloud_sync: bool = False,
306
+ ) -> CodeOceanUpload:
284
307
  """Create directories of symlinks to np-exp files with correct structure
285
308
  for upload to CodeOcean.
286
309
 
287
310
  - only one `recording` per `Record Node` folder (largest if multiple found)
288
311
  - job file for feeding into `aind-data-transfer`
289
- """
290
312
 
291
- session = np_session.Session(session)
313
+ >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
314
+ >>> upload.behavior is None
315
+ True
316
+ >>> upload.ephys.exists()
317
+ True
318
+ """
292
319
 
293
- if is_surface_channel_recording(session.npexp_path.as_posix()):
320
+ if is_surface_channel_recording(str(session)):
321
+ session = np_session.Session(session)
322
+ if not is_surface_channel_recording(session.npexp_path.name):
323
+ # manually assign surface channel path
324
+ session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
325
+ assert session.npexp_path.exists(), f"Surface channel path {session.npexp_path} does not exist in same folder as main session recording"
294
326
  root = np_session.NPEXP_PATH / 'codeocean' / f'{session.folder}_surface_channels'
295
327
  behavior = None
296
328
  behavior_videos = None
297
329
  else:
330
+ session = np_session.Session(session)
298
331
  root = np_session.NPEXP_PATH / 'codeocean' / session.folder
299
332
  behavior = np_config.normalize_path(root / 'behavior')
300
333
  behavior_videos = behavior.with_name('behavior-videos')
@@ -305,19 +338,24 @@ def create_codeocean_upload(session: str | int | np_session.Session,
305
338
  session = session,
306
339
  behavior = behavior,
307
340
  behavior_videos = behavior_videos,
308
- ephys = np_config.normalize_path(root / 'ephys'),
341
+ ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
309
342
  job = np_config.normalize_path(root / 'upload.csv'),
343
+ force_cloud_sync=force_cloud_sync,
310
344
  )
311
-
312
- create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
313
- create_behavior_symlinks(upload.session, upload.behavior)
314
- create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
345
+ if upload.ephys:
346
+ create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
347
+ if upload.behavior:
348
+ create_behavior_symlinks(upload.session, upload.behavior)
349
+ if upload.behavior_videos:
350
+ create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
315
351
  create_upload_job(upload)
316
352
  return upload
317
353
 
318
354
  def upload_session(session: str | int | pathlib.Path | np_session.Session,
319
- recording_dirs: Iterable[str] | None = None) -> None:
320
- upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs)
355
+ recording_dirs: Iterable[str] | None = None,
356
+ force: bool = False,
357
+ ) -> None:
358
+ upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs, force_cloud_sync=force)
321
359
  np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
322
360
  put_csv_for_hpc_upload(upload.job)
323
361
  logger.debug(f'Submitted {upload.session} to hpc upload queue')
@@ -336,6 +374,7 @@ def main() -> None:
336
374
  def parse_args() -> argparse.Namespace:
337
375
  parser = argparse.ArgumentParser(description="Upload a session to CodeOcean")
338
376
  parser.add_argument('session', help="session ID (lims or np-exp foldername) or path to session folder")
377
+ parser.add_argument('--force', action='store_true', help="enable `force_cloud_sync` option, re-uploading and re-making raw asset even if data exists on S3")
339
378
  parser.add_argument('recording_dirs', nargs='*', type=list, help="[optional] specific recording directories to upload - for use with split recordings only.")
340
379
  return parser.parse_args()
341
380
 
@@ -343,6 +382,5 @@ if __name__ == '__main__':
343
382
  import doctest
344
383
 
345
384
  doctest.testmod(
346
- optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE)
347
- )
348
- main()
385
+ optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
386
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: np_codeocean
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean
5
5
  Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>
6
6
  License: MIT
@@ -19,7 +19,7 @@ Requires-Dist: np-tools>=0.1.21
19
19
  Requires-Dist: np-config>=0.4.24
20
20
  Requires-Dist: requests>=2.31.0
21
21
  Requires-Dist: npc-session>=0.1.34
22
- Requires-Dist: polars>=0.19.19
22
+ Requires-Dist: polars>=0.20.16
23
23
  Requires-Dist: bump>=1.3.2; extra == "dev"
24
24
  Requires-Dist: pdm>=2.4.9; extra == "dev"
25
25
  Provides-Extra: dev
@@ -1,9 +1,9 @@
1
- np_codeocean-0.1.6.dist-info/METADATA,sha256=sjvrCBR2236bHRQuxEHoI92Vq9eW0RrPaaadQwbur2o,2488
2
- np_codeocean-0.1.6.dist-info/WHEEL,sha256=N2J68yzZqJh3mI_Wg92rwhw0rtJDFpZj9bwQIMJgaVg,90
3
- np_codeocean-0.1.6.dist-info/entry_points.txt,sha256=T3Is83nShuWFYg7bTLxVhRWi15OVxO99WYcUg3-xURM,113
1
+ np_codeocean-0.1.8.dist-info/METADATA,sha256=hvXhhe5Fv5FEzz17K1RK6wtpJcyKhJKL_qS9JT_OeZM,2488
2
+ np_codeocean-0.1.8.dist-info/WHEEL,sha256=N2J68yzZqJh3mI_Wg92rwhw0rtJDFpZj9bwQIMJgaVg,90
3
+ np_codeocean-0.1.8.dist-info/entry_points.txt,sha256=T3Is83nShuWFYg7bTLxVhRWi15OVxO99WYcUg3-xURM,113
4
4
  np_codeocean/__init__.py,sha256=BYXXoFDa1J_Lv-YG52Ch6k5L4DMCEPXtfHsrMmMeST4,66
5
5
  np_codeocean/scripts/upload_sessions.py,sha256=1_aqoBxAkB_VpRKYqyPsEQBDGvgyAHXAkIJA0ZT2Vb0,1490
6
- np_codeocean/upload.py,sha256=nFnPD-eRc6f3eWZMkr6Qe_D7-77nDZookTVzrjwZkfo,14940
6
+ np_codeocean/upload.py,sha256=qSxujYOMjuaz_NMJa_v9ZhuuTj9IiSs2qIxy8zWXeB4,17487
7
7
  np_codeocean/upload_one.py,sha256=-egSjXvA0bBfshbY3D2TZ0M0GfLokFBZ3mSCm_gOGXE,7367
8
8
  np_codeocean/utils.py,sha256=p0pmljaH4j7RjRsc4TPYXPpLhq-2ScvnfyXOYFSFBTM,3375
9
- np_codeocean-0.1.6.dist-info/RECORD,,
9
+ np_codeocean-0.1.8.dist-info/RECORD,,