np_codeocean 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
np_codeocean/upload.py CHANGED
@@ -4,9 +4,7 @@ import argparse
4
4
  import contextlib
5
5
  import csv
6
6
  import json
7
- from multiprocessing import context
8
7
  import pathlib
9
- import sys
10
8
  import datetime
11
9
  from pathlib import Path
12
10
  from typing import NamedTuple
@@ -20,9 +18,8 @@ import np_tools
20
18
  import doctest
21
19
  import numpy as np
22
20
  import polars as pl
23
-
24
21
  import requests
25
-
22
+ from pydantic import ValidationError # may be returned from aind-data-transfer-service
26
23
 
27
24
  logger = np_logging.get_logger(__name__)
28
25
 
@@ -46,13 +43,16 @@ class CodeOceanUpload(NamedTuple):
46
43
  """Directory of symlinks to behavior video files and video info jsons in
47
44
  top-level of session folder on np-exp."""
48
45
 
49
- ephys: Path
46
+ ephys: Path | None
50
47
  """Directory of symlinks to raw ephys data files on np-exp, with only one
51
48
  `recording` per `Record Node` folder."""
52
49
 
53
50
  job: Path
54
51
  """File containing job parameters for `aind-data-transfer`"""
55
52
 
53
+ force_cloud_sync: bool = False
54
+ """If True, re-upload and re-make raw asset even if data exists on S3."""
55
+
56
56
  def as_posix(path: pathlib.Path) -> str:
57
57
  return path.as_posix()[1:]
58
58
 
@@ -130,7 +130,7 @@ def create_behavior_videos_symlinks(session: np_session.Session, dest: Path | No
130
130
 
131
131
  def is_surface_channel_recording(path_name: str) -> bool:
132
132
  """
133
- >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
133
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
134
134
  >>> is_surface_channel_recording(session.npexp_path.as_posix())
135
135
  True
136
136
  """
@@ -138,9 +138,9 @@ def is_surface_channel_recording(path_name: str) -> bool:
138
138
 
139
139
  def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
140
140
  """
141
- >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
141
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
142
142
  >>> get_surface_channel_start_time(session)
143
- datetime.datetime(2023, 8, 8, 15, 11, 14, 240000)
143
+ datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
144
144
  """
145
145
  sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
146
146
  if not sync_messages_paths:
@@ -154,43 +154,42 @@ def get_surface_channel_start_time(session: np_session.Session) -> datetime.date
154
154
  timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
155
155
  return timestamp
156
156
 
157
- def get_ephys_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int]:
157
+ def get_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int | bool]:
158
158
  """
159
- >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels"
159
+ >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
160
160
  >>> is_surface_channel_recording(path)
161
161
  True
162
162
  >>> upload = create_codeocean_upload(path)
163
- >>> ephys_upload_csv = get_ephys_upload_csv_for_session(upload)
163
+ >>> ephys_upload_csv = get_upload_csv_for_session(upload)
164
164
  >>> ephys_upload_csv['modality0.source']
165
- '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_660023_20230808_surface_channels/ephys'
165
+ '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
166
166
  >>> ephys_upload_csv.keys()
167
- dict_keys(['modality0.source', 'modality0', 's3-bucket', 'subject-id', 'platform', 'acq-datetime'])
167
+ dict_keys(['platform', 'subject-id', 'force_cloud_sync', 'modality0', 'modality0.source', 'acq-datetime'])
168
168
  """
169
-
170
- ephys_upload = {
171
- 'modality0.source': np_config.normalize_path(upload.ephys).as_posix(),
172
- 'modality0': 'ecephys',
173
- 's3-bucket': CONFIG['s3-bucket'],
174
- 'subject-id': str(upload.session.mouse),
169
+ params = {
175
170
  'platform': 'ecephys',
171
+ 'subject-id': str(upload.session.mouse),
172
+ 'force_cloud_sync': upload.force_cloud_sync,
176
173
  }
177
-
178
- if upload.behavior is not None:
179
- ephys_upload['modality1.source'] = np_config.normalize_path(upload.behavior).as_posix()
180
- ephys_upload['modality1'] = 'behavior'
181
-
182
- if upload.behavior_videos is not None:
183
- ephys_upload['modality2.source'] = np_config.normalize_path(upload.behavior_videos).as_posix()
184
- ephys_upload['modality2'] = 'behavior-videos'
185
-
174
+ idx = 0
175
+ for modality_name, attr_name in {
176
+ 'ecephys': 'ephys',
177
+ 'behavior': 'behavior',
178
+ 'behavior-videos': 'behavior_videos',
179
+ }.items():
180
+ if getattr(upload, attr_name) is not None:
181
+ params[f'modality{idx}'] = modality_name
182
+ params[f'modality{idx}.source'] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
183
+ idx += 1
184
+
186
185
  if is_surface_channel_recording(upload.session.npexp_path.as_posix()):
187
186
  date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
188
187
  session_date_time = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
189
- ephys_upload['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
188
+ params['acq-datetime'] = f'{session_date_time.strftime("%Y-%m-%d %H:%M:%S")}'
190
189
  else:
191
- ephys_upload['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
190
+ params['acq-datetime'] = f'{upload.session.start.strftime("%Y-%m-%d %H:%M:%S")}'
192
191
 
193
- return ephys_upload
192
+ return params
194
193
 
195
194
 
196
195
  def is_in_hpc_upload_queue(csv_path: pathlib.Path) -> bool:
@@ -230,7 +229,8 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
230
229
  to get the real error class + message."""
231
230
  if response.status_code != 200:
232
231
  try:
233
- raise eval(response.json()['data']['errors'][0])
232
+ x = response.json()['data']['errors']
233
+ import pdb; pdb.set_trace()
234
234
  except (KeyError, IndexError, requests.exceptions.JSONDecodeError, SyntaxError) as exc1:
235
235
  try:
236
236
  response.raise_for_status()
@@ -261,10 +261,13 @@ def put_csv_for_hpc_upload(csv_path: pathlib.Path) -> None:
261
261
  ),
262
262
  )
263
263
  _raise_for_status(post_csv_response)
264
-
264
+
265
+ def is_ephys_session(session: np_session.Session) -> bool:
266
+ return bool(next(session.npexp_path.rglob('settings.xml'), None))
267
+
265
268
  def create_upload_job(upload: CodeOceanUpload) -> None:
266
269
  logger.info(f'Creating upload job file {upload.job} for session {upload.session}...')
267
- job: dict = get_ephys_upload_csv_for_session(upload)
270
+ job: dict = get_upload_csv_for_session(upload)
268
271
  with open(upload.job, 'w') as f:
269
272
  w = csv.writer(f, lineterminator='')
270
273
  w.writerow(job.keys())
@@ -273,28 +276,33 @@ def create_upload_job(upload: CodeOceanUpload) -> None:
273
276
  w.writerow(job.values())
274
277
 
275
278
  def create_codeocean_upload(session: str | int | np_session.Session,
276
- recording_dirs: Iterable[str] | None = None) -> CodeOceanUpload:
277
- """
278
- >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_660023_20230808_surface_channels")
279
- >>> upload.behavior is None
280
- True
281
- >>> upload.ephys.exists()
282
- True
283
- """
279
+ recording_dirs: Iterable[str] | None = None,
280
+ force_cloud_sync: bool = False,
281
+ ) -> CodeOceanUpload:
284
282
  """Create directories of symlinks to np-exp files with correct structure
285
283
  for upload to CodeOcean.
286
284
 
287
285
  - only one `recording` per `Record Node` folder (largest if multiple found)
288
286
  - job file for feeding into `aind-data-transfer`
289
- """
290
287
 
291
- session = np_session.Session(session)
288
+ >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
289
+ >>> upload.behavior is None
290
+ True
291
+ >>> upload.ephys.exists()
292
+ True
293
+ """
292
294
 
293
- if is_surface_channel_recording(session.npexp_path.as_posix()):
295
+ if is_surface_channel_recording(str(session)):
296
+ session = np_session.Session(session)
297
+ if not is_surface_channel_recording(session.npexp_path.name):
298
+ # manually assign surface channel path
299
+ session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
300
+ assert session.npexp_path.exists(), f"Surface channel path {session.npexp_path} does not exist in same folder as main session recording"
294
301
  root = np_session.NPEXP_PATH / 'codeocean' / f'{session.folder}_surface_channels'
295
302
  behavior = None
296
303
  behavior_videos = None
297
304
  else:
305
+ session = np_session.Session(session)
298
306
  root = np_session.NPEXP_PATH / 'codeocean' / session.folder
299
307
  behavior = np_config.normalize_path(root / 'behavior')
300
308
  behavior_videos = behavior.with_name('behavior-videos')
@@ -305,8 +313,9 @@ def create_codeocean_upload(session: str | int | np_session.Session,
305
313
  session = session,
306
314
  behavior = behavior,
307
315
  behavior_videos = behavior_videos,
308
- ephys = np_config.normalize_path(root / 'ephys'),
316
+ ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
309
317
  job = np_config.normalize_path(root / 'upload.csv'),
318
+ force_cloud_sync=force_cloud_sync,
310
319
  )
311
320
 
312
321
  create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
@@ -316,8 +325,10 @@ def create_codeocean_upload(session: str | int | np_session.Session,
316
325
  return upload
317
326
 
318
327
  def upload_session(session: str | int | pathlib.Path | np_session.Session,
319
- recording_dirs: Iterable[str] | None = None) -> None:
320
- upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs)
328
+ recording_dirs: Iterable[str] | None = None,
329
+ force: bool = False,
330
+ ) -> None:
331
+ upload = create_codeocean_upload(str(session), recording_dirs=recording_dirs, force_cloud_sync=force)
321
332
  np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
322
333
  put_csv_for_hpc_upload(upload.job)
323
334
  logger.debug(f'Submitted {upload.session} to hpc upload queue')
@@ -336,6 +347,7 @@ def main() -> None:
336
347
  def parse_args() -> argparse.Namespace:
337
348
  parser = argparse.ArgumentParser(description="Upload a session to CodeOcean")
338
349
  parser.add_argument('session', help="session ID (lims or np-exp foldername) or path to session folder")
350
+ parser.add_argument('--force', action='store_true', help="enable `force_cloud_sync` option, re-uploading and re-making raw asset even if data exists on S3")
339
351
  parser.add_argument('recording_dirs', nargs='*', type=list, help="[optional] specific recording directories to upload - for use with split recordings only.")
340
352
  return parser.parse_args()
341
353
 
@@ -343,6 +355,5 @@ if __name__ == '__main__':
343
355
  import doctest
344
356
 
345
357
  doctest.testmod(
346
- optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE)
347
- )
348
- main()
358
+ optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
359
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: np_codeocean
3
- Version: 0.1.6
3
+ Version: 0.2.0
4
4
  Summary: Tools for uploading and interacting with Mindscope Neuropixels experiments on Code Ocean
5
5
  Author-Email: Ben Hardcastle <ben.hardcastle@alleninstitute.org>
6
6
  License: MIT
@@ -19,7 +19,7 @@ Requires-Dist: np-tools>=0.1.21
19
19
  Requires-Dist: np-config>=0.4.24
20
20
  Requires-Dist: requests>=2.31.0
21
21
  Requires-Dist: npc-session>=0.1.34
22
- Requires-Dist: polars>=0.19.19
22
+ Requires-Dist: polars>=0.20.16
23
23
  Requires-Dist: bump>=1.3.2; extra == "dev"
24
24
  Requires-Dist: pdm>=2.4.9; extra == "dev"
25
25
  Provides-Extra: dev
@@ -1,9 +1,9 @@
1
- np_codeocean-0.1.6.dist-info/METADATA,sha256=sjvrCBR2236bHRQuxEHoI92Vq9eW0RrPaaadQwbur2o,2488
2
- np_codeocean-0.1.6.dist-info/WHEEL,sha256=N2J68yzZqJh3mI_Wg92rwhw0rtJDFpZj9bwQIMJgaVg,90
3
- np_codeocean-0.1.6.dist-info/entry_points.txt,sha256=T3Is83nShuWFYg7bTLxVhRWi15OVxO99WYcUg3-xURM,113
1
+ np_codeocean-0.2.0.dist-info/METADATA,sha256=UlrIexZ_aS71qK8-QiUkiGoe3h75zmJBqnplTAVaDbk,2488
2
+ np_codeocean-0.2.0.dist-info/WHEEL,sha256=N2J68yzZqJh3mI_Wg92rwhw0rtJDFpZj9bwQIMJgaVg,90
3
+ np_codeocean-0.2.0.dist-info/entry_points.txt,sha256=T3Is83nShuWFYg7bTLxVhRWi15OVxO99WYcUg3-xURM,113
4
4
  np_codeocean/__init__.py,sha256=BYXXoFDa1J_Lv-YG52Ch6k5L4DMCEPXtfHsrMmMeST4,66
5
5
  np_codeocean/scripts/upload_sessions.py,sha256=1_aqoBxAkB_VpRKYqyPsEQBDGvgyAHXAkIJA0ZT2Vb0,1490
6
- np_codeocean/upload.py,sha256=nFnPD-eRc6f3eWZMkr6Qe_D7-77nDZookTVzrjwZkfo,14940
6
+ np_codeocean/upload.py,sha256=suP41bMthg0bgkm6FFktv5v95P9bwQyDdXdSKrjQ-YU,15972
7
7
  np_codeocean/upload_one.py,sha256=-egSjXvA0bBfshbY3D2TZ0M0GfLokFBZ3mSCm_gOGXE,7367
8
8
  np_codeocean/utils.py,sha256=p0pmljaH4j7RjRsc4TPYXPpLhq-2ScvnfyXOYFSFBTM,3375
9
- np_codeocean-0.1.6.dist-info/RECORD,,
9
+ np_codeocean-0.2.0.dist-info/RECORD,,