np_codeocean 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
np_codeocean/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
1
  from np_codeocean.utils import *
2
- from np_codeocean.upload import *
2
+ from np_codeocean.np_session_utils import *
@@ -0,0 +1,367 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import datetime
5
+ import doctest
6
+ import pathlib
7
+ from collections.abc import Iterable
8
+ import shutil
9
+
10
+ import aind_data_transfer_models.core
11
+ import np_config
12
+ import np_logging
13
+ import np_session
14
+ import np_tools
15
+
16
+ import np_codeocean.utils as utils
17
+
18
+ logger = np_logging.get_logger(__name__)
19
+
20
+ @dataclasses.dataclass
21
+ class CodeOceanUpload:
22
+ """Objects required for uploading a Mindscope Neuropixels session to CodeOcean.
23
+ Paths are symlinks to files on np-exp.
24
+ """
25
+ session: np_session.Session
26
+ """Session object that the paths belong to."""
27
+
28
+ platform: utils.AINDPlatform
29
+ """The 'platform' in the Neural Dynamics data schema language (effectively the rig
30
+ type, which determines the processing pipeline the data follows).
31
+
32
+ Our rules are:
33
+ - if it ran in a behavior box: `behavior`
34
+ - anything else: `ecephys`
35
+
36
+ This means there will be behavior-only sessions that ran on NP-rigs
37
+ without ephys data (habs, opto experiments etc.), that will be uploaded as
38
+ `ecephys` platform data.
39
+ """
40
+
41
+ behavior: pathlib.Path | None
42
+ """Directory of symlinks to files in top-level of session folder on np-exp,
43
+ plus all files in `exp` and `qc` subfolders, if present. Excludes behavior video files
44
+ and video info jsons."""
45
+
46
+ behavior_videos: pathlib.Path | None
47
+ """Directory of symlinks to behavior video files and video info jsons in
48
+ top-level of session folder on np-exp."""
49
+
50
+ ephys: pathlib.Path | None
51
+ """Directory of symlinks to raw ephys data files on np-exp, with only one
52
+ `recording` per `Record Node` folder."""
53
+
54
+ aind_metadata: pathlib.Path | None
55
+ """Directory of symlinks to aind metadata json files in top-level of session folder
56
+ on np-exp."""
57
+
58
+ job: pathlib.Path
59
+ """File containing job parameters for `aind-data-transfer`"""
60
+
61
+ force_cloud_sync: bool = False
62
+ """If True, re-upload and re-make raw asset even if data exists on S3."""
63
+
64
+ @property
65
+ def project_name(self) -> str:
66
+ if isinstance(self.session, np_session.PipelineSession):
67
+ return "OpenScope"
68
+ return "Dynamic Routing"
69
+
70
+ @property
71
+ def root(self) -> pathlib.Path:
72
+ for attr in (self.behavior, self.behavior_videos, self.ephys, self.aind_metadata):
73
+ if attr is not None:
74
+ return attr.parent
75
+ raise ValueError(f"No upload directories assigned to {self!r}")
76
+
77
+ def create_aind_metadata_symlinks(upload: CodeOceanUpload) -> bool:
78
+ """
79
+ Create symlinks in `dest` pointing to aind metadata json files from the root directory
80
+ on np-exp. Returns True if any metadata files are found in np-exp and the `aind_metadata`
81
+ folder is created.
82
+ """
83
+ has_metadata_files = False
84
+ for src in upload.session.npexp_path.glob('*'):
85
+ if src.stem in utils.AIND_METADATA_NAMES:
86
+ np_tools.symlink(src, upload.aind_metadata / src.name)
87
+ has_metadata_files = True
88
+ if has_metadata_files:
89
+ logger.debug(f'Finished creating symlinks to aind metadata files in {upload.session.npexp_path}')
90
+ else:
91
+ logger.debug(f'No metadata files found in {upload.session.npexp_path}; No symlinks for metadata were made')
92
+ return has_metadata_files
93
+
94
+
95
+ def create_ephys_symlinks(session: np_session.Session, dest: pathlib.Path,
96
+ recording_dirs: Iterable[str] | None = None) -> None:
97
+ """Create symlinks in `dest` pointing to raw ephys data files on np-exp, with only one
98
+ `recording` per `Record Node` folder (the largest, if multiple found).
99
+
100
+ Relative paths are preserved, so `dest` will essentially be a merge of
101
+ _probeABC / _probeDEF folders.
102
+
103
+ Top-level items other than `Record Node *` folders are excluded.
104
+ """
105
+ root_path = session.npexp_path
106
+ if isinstance(session, np_session.PipelineSession) and session.lims_path is not None:
107
+ # if ephys has been uploaded to lims, use lims path, as large raw data may have
108
+ # been deleted from np-exp
109
+ if any(
110
+ np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
111
+ session.npexp_path, specific_recording_dir_names=recording_dirs
112
+ )
113
+ ):
114
+ root_path = session.lims_path
115
+ logger.info(f'Creating symlinks to raw ephys data files in {root_path}...')
116
+ for abs_path, rel_path in np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
117
+ root_path, specific_recording_dir_names=recording_dirs
118
+ ):
119
+ if not abs_path.is_dir():
120
+ np_tools.symlink(abs_path, dest / rel_path)
121
+ logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
122
+ utils.cleanup_ephys_symlinks(dest)
123
+
124
+
125
+ def create_behavior_symlinks(session: np_session.Session, dest: pathlib.Path | None) -> None:
126
+ """Create symlinks in `dest` pointing to files in top-level of session
127
+ folder on np-exp, plus all files in `exp` subfolder, if present.
128
+ """
129
+ if dest is None:
130
+ logger.debug(f"No behavior folder supplied for {session}")
131
+ return
132
+ subfolder_names = ('exp', 'qc')
133
+ logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
134
+ for src in session.npexp_path.glob('*'):
135
+ if not src.is_dir() and not utils.is_behavior_video_file(src):
136
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
137
+ logger.debug(f'Finished creating symlinks to top-level files in {session.npexp_path}')
138
+
139
+ for name in subfolder_names:
140
+ subfolder = session.npexp_path / name
141
+ if not subfolder.exists():
142
+ continue
143
+ for src in subfolder.rglob('*'):
144
+ if not src.is_dir():
145
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
146
+ logger.debug(f'Finished creating symlinks to {name!r} files')
147
+
148
+
149
+ def create_behavior_videos_symlinks(session: np_session.Session, dest: pathlib.Path | None) -> None:
150
+ """Create symlinks in `dest` pointing to MVR video files and info jsons in top-level of session
151
+ folder on np-exp.
152
+ """
153
+ if dest is None:
154
+ logger.debug(f"No behavior_videos folder supplied for {session}")
155
+ return
156
+ logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
157
+ for src in session.npexp_path.glob('*'):
158
+ if utils.is_behavior_video_file(src):
159
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
160
+ logger.debug(f'Finished creating symlinks to behavior video files in {session.npexp_path}')
161
+
162
+
163
+ def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
164
+ """
165
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
166
+ >>> get_surface_channel_start_time(session)
167
+ datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
168
+ """
169
+ sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
170
+ if not sync_messages_paths:
171
+ raise ValueError(f'No sync messages txt found for surface channel session {session}')
172
+ sync_messages_path = sync_messages_paths[0]
173
+
174
+ with open(sync_messages_path, 'r') as f:
175
+ software_time_line = f.readlines()[0]
176
+
177
+ timestamp_value = float(software_time_line[software_time_line.index(':')+2:].strip())
178
+ timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
179
+ return timestamp
180
+
181
+
182
+ def get_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int | bool]:
183
+ """
184
+ >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
185
+ >>> utils.is_surface_channel_recording(path)
186
+ True
187
+ >>> upload = create_codeocean_upload(path)
188
+ >>> ephys_upload_csv = get_upload_csv_for_session(upload)
189
+ >>> ephys_upload_csv['modality0.source']
190
+ '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
191
+ >>> ephys_upload_csv.keys()
192
+ dict_keys(['project_name', 'platform', 'subject-id', 'force_cloud_sync', 'modality0', 'modality0.source', 'acq-datetime'])
193
+ """
194
+ params = {
195
+ 'project_name': upload.project_name,
196
+ 'platform': upload.platform,
197
+ 'subject-id': str(upload.session.mouse),
198
+ 'force_cloud_sync': upload.force_cloud_sync,
199
+ }
200
+ idx = 0
201
+ for modality_name, attr_name in {
202
+ 'ecephys': 'ephys',
203
+ 'behavior': 'behavior',
204
+ 'behavior-videos': 'behavior_videos',
205
+ }.items():
206
+ if getattr(upload, attr_name) is not None:
207
+ params[f'modality{idx}'] = modality_name
208
+ params[f'modality{idx}.source'] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
209
+ idx += 1
210
+
211
+ if upload.aind_metadata:
212
+ params['metadata_dir'] = upload.aind_metadata.as_posix()
213
+
214
+ if utils.is_surface_channel_recording(upload.session.npexp_path.as_posix()):
215
+ date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
216
+ session_date_time = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
217
+ params['acq-datetime'] = f'{session_date_time.strftime(utils.ACQ_DATETIME_FORMAT)}'
218
+ else:
219
+ params['acq-datetime'] = f'{upload.session.start.strftime(utils.ACQ_DATETIME_FORMAT)}'
220
+
221
+ return params # type: ignore
222
+
223
+
224
+ def is_ephys_session(session: np_session.Session) -> bool:
225
+ return bool(next(session.npexp_path.rglob('settings*.xml'), None))
226
+
227
+ def get_np_session(session_path_or_folder_name: str) -> np_session.Session:
228
+ """Accommodates surface channel folders, and updates the returned instance's
229
+ npexp_path accordingly"""
230
+ is_surface_channel_recording = utils.is_surface_channel_recording(session_path_or_folder_name)
231
+ session = np_session.Session(session_path_or_folder_name)
232
+ if is_surface_channel_recording and not utils.is_surface_channel_recording(session.npexp_path.name):
233
+ # manually assign surface channel path which was lost when creating
234
+ # session object
235
+ session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
236
+ if 'surface_channels' not in session.npexp_path.name or not session.npexp_path.exists():
237
+ raise FileNotFoundError(f"Surface channel path {session.npexp_path} does not exist, or does not exist in expected folder (ie np-exp)")
238
+ return session
239
+
240
+ def create_codeocean_upload(
241
+ session_path_or_folder_name: str,
242
+ recording_dirs: Iterable[str] | None = None,
243
+ force_cloud_sync: bool = False,
244
+ codeocean_root: pathlib.Path = np_session.NPEXP_PATH / 'codeocean',
245
+ ) -> CodeOceanUpload:
246
+ """Create directories of symlinks to np-exp files with correct structure
247
+ for upload to CodeOcean.
248
+
249
+ - only one `recording` per `Record Node` folder (largest if multiple found)
250
+ - job file for feeding into `aind-data-transfer`
251
+
252
+ >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
253
+ >>> upload.behavior is None
254
+ True
255
+ >>> upload.ephys.exists()
256
+ True
257
+ """
258
+ platform: utils.AINDPlatform = 'ecephys' # all session-type uploads with a folder of data are ecephys platform; behavior platform is for behavior-box sessions
259
+
260
+ session = get_np_session(str(session_path_or_folder_name))
261
+ if utils.is_surface_channel_recording(str(session_path_or_folder_name)):
262
+ root = codeocean_root / f'{session.folder}_surface_channels'
263
+ behavior = None
264
+ behavior_videos = None
265
+ else:
266
+ root = codeocean_root / session.folder
267
+ behavior = np_config.normalize_path(root / 'behavior')
268
+ behavior_videos = behavior.with_name('behavior-videos')
269
+
270
+ logger.debug(f'Created directory {root} for CodeOcean upload')
271
+
272
+ logger.info('Attempting to create sub directory for AIND metadata jsons..')
273
+ metadata_path = get_aind_metadata_path(root)
274
+
275
+ return CodeOceanUpload(
276
+ session = session,
277
+ behavior = behavior,
278
+ behavior_videos = behavior_videos,
279
+ ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
280
+ aind_metadata = metadata_path if has_metadata(session) else None,
281
+ job = np_config.normalize_path(root / 'upload.csv'),
282
+ force_cloud_sync=force_cloud_sync,
283
+ platform=platform,
284
+ )
285
+
286
+ def has_metadata(session: np_session.Session) -> bool:
287
+ return any(
288
+ (session.npexp_path / name).exists()
289
+ for name in utils.AIND_METADATA_NAMES
290
+ )
291
+
292
+ def get_aind_metadata_path(upload_root: pathlib.Path) -> pathlib.Path:
293
+ return np_config.normalize_path(upload_root / 'aind_metadata')
294
+
295
+ def upload_session(
296
+ session_path_or_folder_name: str,
297
+ recording_dirs: Iterable[str] | None = None,
298
+ force: bool = False,
299
+ dry_run: bool = False,
300
+ test: bool = False,
301
+ hpc_upload_job_email: str = utils.HPC_UPLOAD_JOB_EMAIL,
302
+ regenerate_symlinks: bool = True,
303
+ adjust_ephys_timestamps: bool = True,
304
+ codeocean_configs: aind_data_transfer_models.core.CodeOceanPipelineMonitorConfigs | None = None,
305
+ ) -> None:
306
+ codeocean_root = np_session.NPEXP_PATH / ('codeocean-dev' if test else 'codeocean')
307
+ logger.debug(f'{codeocean_root = }')
308
+ upload = create_codeocean_upload(
309
+ str(session_path_or_folder_name),
310
+ codeocean_root=codeocean_root,
311
+ recording_dirs=recording_dirs,
312
+ force_cloud_sync=force
313
+ )
314
+ if regenerate_symlinks and upload.root.exists():
315
+ logger.debug(f'Removing existing {upload.root = }')
316
+ shutil.rmtree(upload.root.as_posix(), ignore_errors=True)
317
+ if upload.aind_metadata:
318
+ create_aind_metadata_symlinks(upload)
319
+ if upload.ephys:
320
+ create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
321
+ if upload.behavior:
322
+ create_behavior_symlinks(upload.session, upload.behavior)
323
+ if upload.behavior_videos:
324
+ create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
325
+ if adjust_ephys_timestamps and upload.ephys:
326
+ if not upload.behavior: # includes surface channel recordings
327
+ logger.warning(f"Cannot adjust ephys timestamps for {upload.session} - no behavior folder supplied for upload")
328
+ else:
329
+ try:
330
+ utils.write_corrected_ephys_timestamps(ephys_dir=upload.ephys, behavior_dir=upload.behavior)
331
+ except utils.SyncFileNotFoundError:
332
+ raise FileNotFoundError(
333
+ (
334
+ f"Cannot adjust timestamps - no sync file found in {upload.behavior}. "
335
+ "If the session doesn't have one, run with "
336
+ "`adjust_ephys_timestamps=False` or `--no-sync` flag in CLI"
337
+ )
338
+ ) from None
339
+ for path in (upload.ephys, upload.behavior, upload.behavior_videos, upload.aind_metadata):
340
+ if path is not None and path.exists():
341
+ utils.convert_symlinks_to_posix(path)
342
+ csv_content: dict = get_upload_csv_for_session(upload)
343
+ utils.write_upload_csv(csv_content, upload.job)
344
+ np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
345
+ utils.put_jobs_for_hpc_upload(
346
+ utils.get_job_models_from_csv(upload.job, codeocean_configs=codeocean_configs),
347
+ upload_service_url=utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE,
348
+ user_email=hpc_upload_job_email,
349
+ dry_run=dry_run,
350
+ save_path=upload.job.with_suffix('.json'),
351
+ )
352
+ if not dry_run:
353
+ logger.info(f'Finished submitting {upload.session} - check progress at {utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE}')
354
+
355
+ if (is_split_recording :=
356
+ recording_dirs is not None
357
+ and len(tuple(recording_dirs)) > 1
358
+ and isinstance(recording_dirs, str)
359
+ ):
360
+ logger.warning(f"Split recording {upload.session} will need to be sorted manually with `CONCAT=True`")
361
+
362
+ if __name__ == '__main__':
363
+ import doctest
364
+
365
+ doctest.testmod(
366
+ optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
367
+ )
@@ -0,0 +1,20 @@
1
+ import datetime
2
+ import logging
3
+ import pathlib
4
+ import npc_lims
5
+
6
+ import np_codeocean
7
+
8
+ logging.basicConfig(
9
+ filename=f"logs/{pathlib.Path(__file__).stem}_{datetime.datetime.now().strftime('%Y-%d-%m_%H-%M-%S')}.log",
10
+ level=logging.DEBUG,
11
+ format="%(asctime)s | %(name)s | %(levelname)s | %(message)s",
12
+ datefmt="%Y-%m-%d %H:%M:%S",
13
+ )
14
+ logger = logging.getLogger(__name__)
15
+
16
+ session_id = 'ecephys_702136_2024-03-06_11-25-22'
17
+ root = npc_lims.get_raw_data_root(session_id)
18
+ ephys = root / 'ecephys_clipped'
19
+
20
+ np_codeocean.cleanup_ephys_symlinks(ephys)