np_codeocean 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: np_codeocean
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean
5
5
  Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>
6
6
  License: MIT
@@ -19,7 +19,7 @@ Requires-Dist: np-tools>=0.1.21
19
19
  Requires-Dist: np-config>=0.4.24
20
20
  Requires-Dist: requests>=2.31.0
21
21
  Requires-Dist: npc-session>=0.1.34
22
- Requires-Dist: polars>=0.19.19
22
+ Requires-Dist: polars>=0.20.16
23
23
  Requires-Dist: bump>=1.3.2; extra == "dev"
24
24
  Requires-Dist: pdm>=2.4.9; extra == "dev"
25
25
  Provides-Extra: dev
@@ -40,7 +40,7 @@ composite = [
40
40
 
41
41
  [project]
42
42
  name = "np_codeocean"
43
- version = "0.1.6"
43
+ version = "0.1.7"
44
44
  description = "Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean"
45
45
  authors = [
46
46
  { name = "Ben Hardcastle", email = "ben.hardcastle@alleninstitute.org" },
@@ -51,7 +51,7 @@ dependencies = [
51
51
  "np-config>=0.4.24",
52
52
  "requests>=2.31.0",
53
53
  "npc-session>=0.1.34",
54
- "polars>=0.19.19",
54
+ "polars>=0.20.16",
55
55
  ]
56
56
  requires-python = ">=3.9"
57
57
  readme = "README.md"
@@ -4,9 +4,7 @@ import argparse
4
4
  import contextlib
5
5
  import csv
6
6
  import json
7
- from multiprocessing import context
8
7
  import pathlib
9
- import sys
10
8
  import datetime
11
9
  from pathlib import Path
12
10
  from typing import NamedTuple
@@ -20,9 +18,8 @@ import np_tools
20
18
  import doctest
21
19
  import numpy as np
22
20
  import polars as pl
23
-
24
21
  import requests
25
-
22
+ from pydantic import ValidationError # may be returned from aind-data-transfer-service
26
23
 
27
24
  logger = np_logging.get_logger(__name__)
28
25
 
@@ -46,13 +43,16 @@ class CodeOceanUpload(NamedTuple):
46
43
  """Directory of symlinks to behavior video files and video info jsons in
47
44
  top-level of session folder on np-exp."""
48
45
 
49
- ephys: Path
46
+ ephys: Path | None
50
47
  """Directory of symlinks to raw ephys data files on np-exp, with only one
51
48
  `recording` per `Record Node` folder."""
52
49
 
53
50
  job: Path
54
51
  """File containing job parameters for `aind-data-transfer`"""
55
52
 
53
+ force_cloud_sync: bool = False
54
+ """If True, re-upload and re-make raw asset even if data exists on S3."""
55
+
56
56
  def as_posix(path: pathlib.Path) -> str:
57
57
  return path.as_posix()[1:]
58
58
 
@@ -83,6 +83,29 @@ def create_ephys_symlinks(session: np_session.Session, dest: Path,
83
83
  if not abs_path.is_dir():
84
84
  np_tools.symlink(as_posix(abs_path), dest / rel_path)
85
85
  logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
86
+ correct_structure(dest)
87
+
88
+ def correct_structure(dest: Path) -> None:
89
+ """
90
+ In case some probes are missing, remove device entries from structure.oebin files with folders that don't actually exist.
91
+ """
92
+ logger.debug('Creating modified structure.oebin')
93
+ oebin_paths = dest.rglob('recording[0-9]*/structure.oebin')
94
+ for oebin_path in oebin_paths:
95
+ logger.debug(f'Examining oebin: {oebin_path} for correction')
96
+ oebin_obj = np_tools.read_oebin(np_config.normalize_path(oebin_path.readlink()))
97
+
98
+ for subdir_name in ('events', 'continuous'):
99
+ subdir = oebin_path.parent / subdir_name
100
+ # iterate over copy of list so as to not disrupt iteration when elements are removed
101
+ for device in [device for device in oebin_obj[subdir_name]]:
102
+ if not (subdir / device['folder_name']).exists():
103
+ logger.info(f'{device["folder_name"]} not found in {subdir}, removing from structure.oebin')
104
+ oebin_obj[subdir_name].remove(device)
105
+
106
+ oebin_path.unlink()
107
+ oebin_path.write_text(json.dumps(oebin_obj, indent=4))
108
+ logger.debug('Overwrote symlink to structure.oebin with corrected strcuture.oebin')
86
109
 
87
110
  def is_behavior_video_file(path: Path) -> bool:
88
111
  if path.is_dir() or path.suffix not in ('.mp4', '.avi', '.json'):
@@ -130,7 +153,7 @@ def create_behavior_videos_symlinks(session: np_session.Session, dest: Path | No
130
153
 
131
154
  def is_surface_channel_recording(path_name: str) -> bool:
132
155
  """
133
- >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
156
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
134
157
  >>> is_surface_channel_recording(session.npexp_path.as_posix())
135
158
  True
136
159
  """
@@ -138,9 +161,9 @@ def is_surface_channel_recording(path_name: str) -> bool:
138
161
 
139
162
  def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
140
163
  """
141
- >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
164
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
142
165
  >>> get_surface_channel_start_time(session)
143
- datetime.datetime(2023, 8, 8, 15, 11, 14, 240000)
166
+ datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
144
167
  """
145
168
  sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
146
169
  if not sync_messages_paths:
@@ -154,43 +177,42 @@ def get_surface_channel_start_time(session: np_session.Session) -> datetime.date
154
177
  timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
155
178
  return timestamp
156
179
 
157
- def get_ephys_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int]:
180
+ def get_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int | bool]:
158
181
  """
159
- >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels"
182
+ >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
160
183
  >>> is_surface_channel_recording(path)
161
184
  True
162
185
  >>> upload = create_codeocean_upload(path)
163
- >>> ephys_upload_csv = get_ephys_upload_csv_for_session(upload)
186
+ >>> ephys_upload_csv = get_upload_csv_for_session(upload)
164
187
  >>> ephys_upload_csv['modality0.source']
165
- '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_660023_20230808_surface_channels/ephys'
188
+ '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
166
189
  >>> ephys_upload_csv.keys()
167
- dict_keys(['modality0.source', 'modality0', 's3-bucket', 'subject-id', 'platform', 'acq-datetime'])
190
+ dict_keys(['platform', 'subject-id', 'force_cloud_sync', 'modality0', 'modality0.source', 'acq-datetime'])
168
191
  """
169
-
170
- ephys_upload = {
171
- 'modality0.source': np_config.normalize_path(upload.ephys).as_posix(),
172
- 'modality0': 'ecephys',
173
- 's3-bucket': CONFIG['s3-bucket'],
174
- 'subject-id': str(upload.session.mouse),
192
+ params = {
175
193
  'platform': 'ecephys',
194
+ 'subject-id': str(upload.session.mouse),
195
+ 'force_cloud_sync': upload.force_cloud_sync,
176
196
  }
177
-
178
- if upload.behavior is not None:
179
- ephys_upload['modality1.source'] = np_config.normalize_path(upload.behavior).as_posix()
180
- ephys_upload['modality1'] = 'behavior'
181
-
182
- if upload.behavior_videos is not None:
183
- ephys_upload['modality2.source'] = np_config.normalize_path(upload.behavior_videos).as_posix()
184
- ephys_upload['modality2'] = 'behavior-videos'
185
-
197
+ idx = 0
198
+ for modality_name, attr_name in {
199
+ 'ecephys': 'ephys',
200
+ 'behavior': 'behavior',
201
+ 'behavior-videos': 'behavior_videos',
202
+ }.items():
203
+ if getattr(upload, attr_name) is not None:
204
+ params[f'modality{idx}'] = modality_name
205
+ params[f'modality{idx}.source'] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
206
+ idx += 1
207
+
186
208
  if is_surface_channel_recording(upload.session.npexp_path.as_posix()):
187
209
  date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
188
210
  session_date_time = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
189
- ephys_upload['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
211
+ params['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
190
212
  else:
191
- ephys_upload['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
213
+ params['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
192
214
 
193
- return ephys_upload
215
+ return params
194
216
 
195
217
 
196
218
  def is_in_hpc_upload_queue(csv_path: pathlib.Path) -> bool:
@@ -230,7 +252,8 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
230
252
  to get the real error class + message."""
231
253
  if response.status_code != 200:
232
254
  try:
233
- raise eval(response.json()['data']['errors'][0])
255
+ x = response.json()['data']['errors']
256
+ import pdb; pdb.set_trace()
234
257
  except (KeyError, IndexError, requests.exceptions.JSONDecodeError, SyntaxError) as exc1:
235
258
  try:
236
259
  response.raise_for_status()
@@ -261,10 +284,13 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
261
284
  ),
262
285
  )
263
286
  _raise_for_status(post_csv_response)
264
-
287
+
288
+ def is_ephys_session(session: np_session.Session) -> bool:
289
+ return bool(next(session.npexp_path.rglob('settings.xml'), None))
290
+
265
291
  def create_upload_job(upload: CodeOceanUpload) -> None:
266
292
  logger.info(f'Creating upload job file {upload.job} for session {upload.session}...')
267
- job: dict = get_ephys_upload_csv_for_session(upload)
293
+ job: dict = get_upload_csv_for_session(upload)
268
294
  with open(upload.job, 'w') as f:
269
295
  w = csv.writer(f, lineterminator='')
270
296
  w.writerow(job.keys())
@@ -273,28 +299,33 @@ def create_upload_job(upload: CodeOceanUpload) -> None:
273
299
  w.writerow(job.values())
274
300
 
275
301
  def create_codeocean_upload(session: str | int | np_session.Session,
276
- recording_dirs: Iterable[str] | None = None) -> CodeOceanUpload:
277
- """
278
- >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
279
- >>> upload.behavior is None
280
- True
281
- >>> upload.ephys.exists()
282
- True
283
- """
302
+ recording_dirs: Iterable[str] | None = None,
303
+ force_cloud_sync: bool = False,
304
+ ) -> CodeOceanUpload:
284
305
  """Create directories of symlinks to np-exp files with correct structure
285
306
  for upload to CodeOcean.
286
307
 
287
308
  - only one `recording` per `Record Node` folder (largest if multiple found)
288
309
  - job file for feeding into `aind-data-transfer`
289
- """
290
310
 
291
- session = np_session.Session(session)
311
+ >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
312
+ >>> upload.behavior is None
313
+ True
314
+ >>> upload.ephys.exists()
315
+ True
316
+ """
292
317
 
293
- if is_surface_channel_recording(session.npexp_path.as_posix()):
318
+ if is_surface_channel_recording(str(session)):
319
+ session = np_session.Session(session)
320
+ if not is_surface_channel_recording(session.npexp_path.name):
321
+ # manually assign surface channel path
322
+ session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
323
+ assert session.npexp_path.exists(), f"Surface channel path {session.npexp_path} does not exist in same folder as main session recording"
294
324
  root = np_session.NPEXP_PATH / 'codeocean' / f'{session.folder}_surface_channels'
295
325
  behavior = None
296
326
  behavior_videos = None
297
327
  else:
328
+ session = np_session.Session(session)
298
329
  root = np_session.NPEXP_PATH / 'codeocean' / session.folder
299
330
  behavior = np_config.normalize_path(root / 'behavior')
300
331
  behavior_videos = behavior.with_name('behavior-videos')
@@ -305,8 +336,9 @@ def create_codeocean_upload(session: str | int | np_session.Session,
305
336
  session = session,
306
337
  behavior = behavior,
307
338
  behavior_videos = behavior_videos,
308
- ephys = np_config.normalize_path(root / 'ephys'),
339
+ ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
309
340
  job = np_config.normalize_path(root / 'upload.csv'),
341
+ force_cloud_sync=force_cloud_sync,
310
342
  )
311
343
 
312
344
  create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
@@ -316,8 +348,10 @@ def create_codeocean_upload(session: str | int | np_session.Session,
316
348
  return upload
317
349
 
318
350
  def upload_session(session: str | int | pathlib.Path | np_session.Session,
319
- recording_dirs: Iterable[str] | None = None) -> None:
320
- upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs)
351
+ recording_dirs: Iterable[str] | None = None,
352
+ force: bool = False,
353
+ ) -> None:
354
+ upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs, force_cloud_sync=force)
321
355
  np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
322
356
  put_csv_for_hpc_upload(upload.job)
323
357
  logger.debug(f'Submitted {upload.session} to hpc upload queue')
@@ -336,6 +370,7 @@ def main() -> None:
336
370
  def parse_args() -> argparse.Namespace:
337
371
  parser = argparse.ArgumentParser(description="Upload a session to CodeOcean")
338
372
  parser.add_argument('session', help="session ID (lims or np-exp foldername) or path to session folder")
373
+ parser.add_argument('--force', action='store_true', help="enable `force_cloud_sync` option, re-uploading and re-making raw asset even if data exists on S3")
339
374
  parser.add_argument('recording_dirs', nargs='*', type=list, help="[optional] specific recording directories to upload - for use with split recordings only.")
340
375
  return parser.parse_args()
341
376
 
@@ -343,6 +378,5 @@ if __name__ == '__main__':
343
378
  import doctest
344
379
 
345
380
  doctest.testmod(
346
- optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE)
347
- )
348
- main()
381
+ optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
382
+ )
File without changes