np_codeocean 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,385 +1,462 @@
1
- from __future__ import annotations
2
-
3
- import dataclasses
4
- import datetime
5
- import doctest
6
- import pathlib
7
- from collections.abc import Iterable
8
- import shutil
9
- import time
10
- from typing import Any
11
-
12
- from aind_codeocean_pipeline_monitor.models import PipelineMonitorSettings
13
- import np_config
14
- import np_logging
15
- import np_session
16
- import np_tools
17
-
18
- import np_codeocean.utils as utils
19
- import typing_extensions
20
- from aind_data_schema_models.modalities import Modality
21
-
22
- logger = np_logging.get_logger(__name__)
23
-
24
- @dataclasses.dataclass
25
- class CodeOceanUpload:
26
- """Objects required for uploading a Mindscope Neuropixels session to CodeOcean.
27
- Paths are symlinks to files on np-exp.
28
- """
29
- session: np_session.Session
30
- """Session object that the paths belong to."""
31
-
32
- platform: utils.AINDPlatform
33
- """The 'platform' in the Neural Dynamics data schema language (effectively the rig
34
- type, which determines the processing pipeline the data follows).
35
-
36
- Our rules are:
37
- - if it ran in a behavior box: `behavior`
38
- - anything else: `ecephys`
39
-
40
- This means there will be behavior-only sessions that ran on NP-rigs
41
- without ephys data (habs, opto experiments etc.), that will be uploaded as
42
- `ecephys` platform data.
43
- """
44
-
45
- behavior: pathlib.Path | None
46
- """Directory of symlinks to files in top-level of session folder on np-exp,
47
- plus all files in `exp` and `qc` subfolders, if present. Excludes behavior video files
48
- and video info jsons."""
49
-
50
- behavior_videos: pathlib.Path | None
51
- """Directory of symlinks to behavior video files and video info jsons in
52
- top-level of session folder on np-exp."""
53
-
54
- ephys: pathlib.Path | None
55
- """Directory of symlinks to raw ephys data files on np-exp, with only one
56
- `recording` per `Record Node` folder."""
57
-
58
- aind_metadata: pathlib.Path | None
59
- """Directory of symlinks to aind metadata json files in top-level of session folder
60
- on np-exp."""
61
-
62
- job: pathlib.Path
63
- """File containing job parameters for `aind-data-transfer`"""
64
-
65
- force_cloud_sync: bool = False
66
- """If True, re-upload and re-make raw asset even if data exists on S3."""
67
-
68
- @property
69
- def project_name(self) -> str:
70
- if isinstance(self.session, np_session.PipelineSession):
71
- return "OpenScope"
72
- return "Dynamic Routing"
73
-
74
- @property
75
- def root(self) -> pathlib.Path:
76
- for attr in (self.behavior, self.behavior_videos, self.ephys, self.aind_metadata):
77
- if attr is not None:
78
- return attr.parent
79
- raise ValueError(f"No upload directories assigned to {self!r}")
80
-
81
- def create_aind_metadata_symlinks(upload: CodeOceanUpload) -> bool:
82
- """
83
- Create symlinks in `dest` pointing to aind metadata json files from the root directory
84
- on np-exp. Returns True if any metadata files are found in np-exp and the `aind_metadata`
85
- folder is created.
86
- """
87
- has_metadata_files = False
88
- for src in upload.session.npexp_path.glob('*'):
89
- if src.stem in utils.AIND_METADATA_NAMES:
90
- np_tools.symlink(src, upload.aind_metadata / src.name)
91
- has_metadata_files = True
92
- if has_metadata_files:
93
- logger.debug(f'Finished creating symlinks to aind metadata files in {upload.session.npexp_path}')
94
- else:
95
- logger.debug(f'No metadata files found in {upload.session.npexp_path}; No symlinks for metadata were made')
96
- return has_metadata_files
97
-
98
-
99
- def create_ephys_symlinks(session: np_session.Session, dest: pathlib.Path,
100
- recording_dirs: Iterable[str] | None = None) -> None:
101
- """Create symlinks in `dest` pointing to raw ephys data files on np-exp, with only one
102
- `recording` per `Record Node` folder (the largest, if multiple found).
103
-
104
- Relative paths are preserved, so `dest` will essentially be a merge of
105
- _probeABC / _probeDEF folders.
106
-
107
- Top-level items other than `Record Node *` folders are excluded.
108
- """
109
- root_path = session.npexp_path
110
- if isinstance(session, np_session.PipelineSession) and session.lims_path is not None:
111
- # if ephys has been uploaded to lims, use lims path, as large raw data may have
112
- # been deleted from np-exp
113
- if any(
114
- np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
115
- session.npexp_path, specific_recording_dir_names=recording_dirs
116
- )
117
- ):
118
- root_path = session.lims_path
119
- logger.info(f'Creating symlinks to raw ephys data files in {root_path}...')
120
- for abs_path, rel_path in np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
121
- root_path, specific_recording_dir_names=recording_dirs
122
- ):
123
- if not abs_path.is_dir():
124
- np_tools.symlink(abs_path, dest / rel_path)
125
- logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
126
- utils.cleanup_ephys_symlinks(dest)
127
-
128
-
129
- def create_behavior_symlinks(session: np_session.Session, dest: pathlib.Path | None) -> None:
130
- """Create symlinks in `dest` pointing to files in top-level of session
131
- folder on np-exp, plus all files in `exp` subfolder, if present.
132
- """
133
- if dest is None:
134
- logger.debug(f"No behavior folder supplied for {session}")
135
- return
136
- subfolder_names = ('exp', 'qc')
137
- logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
138
- for src in session.npexp_path.glob('*'):
139
- if not src.is_dir() and not utils.is_behavior_video_file(src):
140
- np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
141
- logger.debug(f'Finished creating symlinks to top-level files in {session.npexp_path}')
142
-
143
- for name in subfolder_names:
144
- subfolder = session.npexp_path / name
145
- if not subfolder.exists():
146
- continue
147
- for src in subfolder.rglob('*'):
148
- if not src.is_dir():
149
- np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
150
- logger.debug(f'Finished creating symlinks to {name!r} files')
151
-
152
-
153
- def create_behavior_videos_symlinks(session: np_session.Session, dest: pathlib.Path | None) -> None:
154
- """Create symlinks in `dest` pointing to MVR video files and info jsons in top-level of session
155
- folder on np-exp.
156
- """
157
- if dest is None:
158
- logger.debug(f"No behavior_videos folder supplied for {session}")
159
- return
160
- logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
161
- for src in session.npexp_path.glob('*'):
162
- if utils.is_behavior_video_file(src):
163
- np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
164
- logger.debug(f'Finished creating symlinks to behavior video files in {session.npexp_path}')
165
-
166
-
167
- def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
168
- """
169
- >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
170
- >>> get_surface_channel_start_time(session)
171
- datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
172
- """
173
- sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
174
- if not sync_messages_paths:
175
- raise ValueError(f'No sync messages txt found for surface channel session {session}')
176
- sync_messages_path = sync_messages_paths[0]
177
-
178
- with open(sync_messages_path, 'r') as f:
179
- software_time_line = f.readlines()[0]
180
-
181
- timestamp_value = float(software_time_line[software_time_line.index(':')+2:].strip())
182
- timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
183
- return timestamp
184
-
185
-
186
- def get_upload_params_from_session(upload: CodeOceanUpload) -> dict[str, Any]:
187
- """
188
- >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
189
- >>> utils.is_surface_channel_recording(path)
190
- True
191
- >>> upload = create_codeocean_upload(path)
192
- >>> ephys_upload_params = get_upload_params_from_session(upload)
193
- >>> ephys_upload_params['modalities']['ecephys']
194
- '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
195
- >>> ephys_upload_params.keys()
196
- dict_keys(['project_name', 'platform', 'subject_id', 'force_cloud_sync', 'modalities', 'acq_datetime'])
197
- """
198
- params = {
199
- 'project_name': upload.project_name,
200
- 'platform': upload.platform,
201
- 'subject_id': str(upload.session.mouse),
202
- 'force_cloud_sync': upload.force_cloud_sync,
203
- }
204
- modalities = dict() # {modality_abbr: input_source}
205
- for modality_abbr, attr_name in {
206
- Modality.ECEPHYS.abbreviation: 'ephys',
207
- Modality.BEHAVIOR.abbreviation: 'behavior',
208
- Modality.BEHAVIOR_VIDEOS.abbreviation: 'behavior_videos',
209
- }.items():
210
- if getattr(upload, attr_name) is not None:
211
- modalities[modality_abbr] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
212
- params['modalities'] = modalities
213
-
214
- if upload.aind_metadata:
215
- params['metadata_dir'] = upload.aind_metadata.as_posix()
216
-
217
- if utils.is_surface_channel_recording(upload.session.npexp_path.as_posix()):
218
- date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
219
- params['acq_datetime'] = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
220
- else:
221
- params['acq_datetime'] = upload.session.start
222
- return params # type: ignore
223
-
224
-
225
- def is_ephys_session(session: np_session.Session) -> bool:
226
- return bool(next(session.npexp_path.rglob('settings*.xml'), None))
227
-
228
- def get_np_session(session_path_or_folder_name: str) -> np_session.Session:
229
- """Accommodates surface channel folders, and updates the returned instance's
230
- npexp_path accordingly"""
231
- is_surface_channel_recording = utils.is_surface_channel_recording(session_path_or_folder_name)
232
- session = np_session.Session(session_path_or_folder_name)
233
- if is_surface_channel_recording and not utils.is_surface_channel_recording(session.npexp_path.name):
234
- # manually assign surface channel path which was lost when creating
235
- # session object
236
- session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
237
- if 'surface_channels' not in session.npexp_path.name or not session.npexp_path.exists():
238
- raise FileNotFoundError(f"Surface channel path {session.npexp_path} does not exist, or does not exist in expected folder (ie np-exp)")
239
- return session
240
-
241
- def create_codeocean_upload(
242
- session_path_or_folder_name: str,
243
- recording_dirs: Iterable[str] | None = None,
244
- force_cloud_sync: bool = False,
245
- codeocean_root: pathlib.Path = np_session.NPEXP_PATH / 'codeocean',
246
- ) -> CodeOceanUpload:
247
- """Create directories of symlinks to np-exp files with correct structure
248
- for upload to CodeOcean.
249
-
250
- - only one `recording` per `Record Node` folder (largest if multiple found)
251
- - job file for feeding into `aind-data-transfer`
252
-
253
- >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
254
- >>> upload.behavior is None
255
- True
256
- >>> upload.ephys.exists()
257
- True
258
- """
259
- platform: utils.AINDPlatform = 'ecephys' # all session-type uploads with a folder of data are ecephys platform; behavior platform is for behavior-box sessions
260
-
261
- session = get_np_session(str(session_path_or_folder_name))
262
- if utils.is_surface_channel_recording(str(session_path_or_folder_name)):
263
- root = codeocean_root / f'{session.folder}_surface_channels'
264
- behavior = None
265
- behavior_videos = None
266
- else:
267
- root = codeocean_root / session.folder
268
- behavior = np_config.normalize_path(root / 'behavior')
269
- behavior_videos = behavior.with_name('behavior-videos')
270
-
271
- logger.debug(f'Created directory {root} for CodeOcean upload')
272
-
273
- logger.info('Attempting to create sub directory for AIND metadata jsons..')
274
- metadata_path = get_aind_metadata_path(root)
275
-
276
- return CodeOceanUpload(
277
- session = session,
278
- behavior = behavior,
279
- behavior_videos = behavior_videos,
280
- ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
281
- aind_metadata = metadata_path if has_metadata(session) else None,
282
- job = np_config.normalize_path(root / 'upload.csv'),
283
- force_cloud_sync=force_cloud_sync,
284
- platform=platform,
285
- )
286
-
287
- def has_metadata(session: np_session.Session) -> bool:
288
- return any(
289
- (session.npexp_path / f"{name}.json").exists()
290
- for name in utils.AIND_METADATA_NAMES
291
- )
292
-
293
- def get_aind_metadata_path(upload_root: pathlib.Path) -> pathlib.Path:
294
- return np_config.normalize_path(upload_root / 'aind_metadata')
295
-
296
- def upload_session(
297
- session_path_or_folder_name: str,
298
- recording_dirs: Iterable[str] | None = None,
299
- force: bool = False,
300
- dry_run: bool = False,
301
- test: bool = False,
302
- hpc_upload_job_email: str = utils.HPC_UPLOAD_JOB_EMAIL,
303
- regenerate_symlinks: bool = True,
304
- adjust_ephys_timestamps: bool = True,
305
- codeocean_pipeline_settings: dict[str, PipelineMonitorSettings] | None = None,
306
- extra_UploadJobConfigsV2_params: dict[str, Any] | None = None,
307
- ) -> None:
308
- codeocean_root = np_session.NPEXP_PATH / ('codeocean-dev' if test else 'codeocean')
309
- logger.debug(f'{codeocean_root = }')
310
- upload = create_codeocean_upload(
311
- str(session_path_or_folder_name),
312
- codeocean_root=codeocean_root,
313
- recording_dirs=recording_dirs,
314
- force_cloud_sync=force
315
- )
316
- if regenerate_symlinks and upload.root.exists():
317
- logger.debug(f'Removing existing {upload.root = }')
318
- shutil.rmtree(upload.root.as_posix(), ignore_errors=True)
319
- if upload.aind_metadata:
320
- create_aind_metadata_symlinks(upload)
321
- if upload.ephys:
322
- create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
323
- if upload.behavior:
324
- create_behavior_symlinks(upload.session, upload.behavior)
325
- if upload.behavior_videos:
326
- create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
327
- timestamps_adjusted = False
328
- if adjust_ephys_timestamps and upload.ephys:
329
- if not upload.behavior: # includes surface channel recordings
330
- logger.warning(f"Cannot adjust ephys timestamps for {upload.session} - no behavior folder supplied for upload")
331
- else:
332
- try:
333
- utils.write_corrected_ephys_timestamps(ephys_dir=upload.ephys, behavior_dir=upload.behavior)
334
- except utils.SyncFileNotFoundError:
335
- raise FileNotFoundError(
336
- (
337
- f"Cannot adjust timestamps - no sync file found in {upload.behavior}. "
338
- "If the session doesn't have one, run with "
339
- "`adjust_ephys_timestamps=False` or `--no-sync` flag in CLI"
340
- )
341
- ) from None
342
- else:
343
- timestamps_adjusted = True
344
- for path in (upload.ephys, upload.behavior, upload.behavior_videos, upload.aind_metadata):
345
- if path is not None and path.exists():
346
- utils.convert_symlinks_to_posix(path)
347
- job_params_from_session: dict = get_upload_params_from_session(upload)
348
- np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
349
- if extra_UploadJobConfigsV2_params is None:
350
- extra_UploadJobConfigsV2_params = {}
351
- if 'codeocean_pipeline_settings' in extra_UploadJobConfigsV2_params:
352
- raise ValueError(
353
- "Cannot pass `codeocean_pipeline_settings` as a parameter to `extra_UploadJobConfigsV2_params`. "
354
- "Use `codeocean_pipeline_settings` parameter instead."
355
- )
356
- utils.put_jobs_for_hpc_upload(
357
- utils.create_upload_job_configs_v2(
358
- **job_params_from_session,
359
- codeocean_pipeline_settings=codeocean_pipeline_settings,
360
- check_timestamps=timestamps_adjusted,
361
- test=test,
362
- user_email=hpc_upload_job_email,
363
- **extra_UploadJobConfigsV2_params
364
- ),
365
- upload_service_url=utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE,
366
- user_email=hpc_upload_job_email,
367
- dry_run=dry_run,
368
- save_path=upload.job.with_suffix('.json'),
369
- )
370
- if not dry_run:
371
- logger.info(f'Finished submitting {upload.session} - check progress at {utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE}')
372
-
373
- if (is_split_recording :=
374
- recording_dirs is not None
375
- and len(tuple(recording_dirs)) > 1
376
- and isinstance(recording_dirs, str)
377
- ):
378
- logger.warning(f"Split recording {upload.session} will need to be sorted manually with `CONCAT=True`")
379
-
380
- if __name__ == '__main__':
381
- import doctest
382
-
383
- doctest.testmod(
384
- optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
385
- )
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import datetime
5
+ import doctest
6
+ import pathlib
7
+ import shutil
8
+ from collections.abc import Iterable
9
+ from typing import Any
10
+
11
+ import np_config
12
+ import np_logging
13
+ import np_session
14
+ import np_tools
15
+ from aind_codeocean_pipeline_monitor.models import PipelineMonitorSettings
16
+ from aind_data_schema_models.modalities import Modality
17
+
18
+ import np_codeocean.utils as utils
19
+
20
+ logger = np_logging.get_logger(__name__)
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class CodeOceanUpload:
25
+ """Objects required for uploading a Mindscope Neuropixels session to CodeOcean.
26
+ Paths are symlinks to files on np-exp.
27
+ """
28
+
29
+ session: np_session.Session
30
+ """Session object that the paths belong to."""
31
+
32
+ platform: utils.AINDPlatform
33
+ """The 'platform' in the Neural Dynamics data schema language (effectively the rig
34
+ type, which determines the processing pipeline the data follows).
35
+
36
+ Our rules are:
37
+ - if it ran in a behavior box: `behavior`
38
+ - anything else: `ecephys`
39
+
40
+ This means there will be behavior-only sessions that ran on NP-rigs
41
+ without ephys data (habs, opto experiments etc.), that will be uploaded as
42
+ `ecephys` platform data.
43
+ """
44
+
45
+ behavior: pathlib.Path | None
46
+ """Directory of symlinks to files in top-level of session folder on np-exp,
47
+ plus all files in `exp` and `qc` subfolders, if present. Excludes behavior video files
48
+ and video info jsons."""
49
+
50
+ behavior_videos: pathlib.Path | None
51
+ """Directory of symlinks to behavior video files and video info jsons in
52
+ top-level of session folder on np-exp."""
53
+
54
+ ephys: pathlib.Path | None
55
+ """Directory of symlinks to raw ephys data files on np-exp, with only one
56
+ `recording` per `Record Node` folder."""
57
+
58
+ aind_metadata: pathlib.Path | None
59
+ """Directory of symlinks to aind metadata json files in top-level of session folder
60
+ on np-exp."""
61
+
62
+ job: pathlib.Path
63
+ """File containing job parameters for `aind-data-transfer`"""
64
+
65
+ force_cloud_sync: bool = False
66
+ """If True, re-upload and re-make raw asset even if data exists on S3."""
67
+
68
+ @property
69
+ def project_name(self) -> str:
70
+ if isinstance(self.session, np_session.PipelineSession):
71
+ return "OpenScope"
72
+ return "Dynamic Routing"
73
+
74
+ @property
75
+ def root(self) -> pathlib.Path:
76
+ for attr in (
77
+ self.behavior,
78
+ self.behavior_videos,
79
+ self.ephys,
80
+ self.aind_metadata,
81
+ ):
82
+ if attr is not None:
83
+ return attr.parent
84
+ raise ValueError(f"No upload directories assigned to {self!r}")
85
+
86
+
87
+ def create_aind_metadata_symlinks(upload: CodeOceanUpload) -> bool:
88
+ """
89
+ Create symlinks in `dest` pointing to aind metadata json files from the root directory
90
+ on np-exp. Returns True if any metadata files are found in np-exp and the `aind_metadata`
91
+ folder is created.
92
+ """
93
+ has_metadata_files = False
94
+ for src in upload.session.npexp_path.glob("*"):
95
+ if src.stem in utils.AIND_METADATA_NAMES:
96
+ np_tools.symlink(src, upload.aind_metadata / src.name)
97
+ has_metadata_files = True
98
+ if has_metadata_files:
99
+ logger.debug(
100
+ f"Finished creating symlinks to aind metadata files in {upload.session.npexp_path}"
101
+ )
102
+ else:
103
+ logger.debug(
104
+ f"No metadata files found in {upload.session.npexp_path}; No symlinks for metadata were made"
105
+ )
106
+ return has_metadata_files
107
+
108
+
109
+ def create_ephys_symlinks(
110
+ session: np_session.Session,
111
+ dest: pathlib.Path,
112
+ recording_dirs: Iterable[str] | None = None,
113
+ ) -> None:
114
+ """Create symlinks in `dest` pointing to raw ephys data files on np-exp, with only one
115
+ `recording` per `Record Node` folder (the largest, if multiple found).
116
+
117
+ Relative paths are preserved, so `dest` will essentially be a merge of
118
+ _probeABC / _probeDEF folders.
119
+
120
+ Top-level items other than `Record Node *` folders are excluded.
121
+ """
122
+ root_path = session.npexp_path
123
+ if (
124
+ isinstance(session, np_session.PipelineSession)
125
+ and session.lims_path is not None
126
+ ):
127
+ # if ephys has been uploaded to lims, use lims path, as large raw data may have
128
+ # been deleted from np-exp
129
+ if any(
130
+ np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
131
+ session.npexp_path, specific_recording_dir_names=recording_dirs
132
+ )
133
+ ):
134
+ root_path = session.lims_path
135
+ logger.info(f"Creating symlinks to raw ephys data files in {root_path}...")
136
+ for (
137
+ abs_path,
138
+ rel_path,
139
+ ) in np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
140
+ root_path, specific_recording_dir_names=recording_dirs
141
+ ):
142
+ if not abs_path.is_dir():
143
+ np_tools.symlink(abs_path, dest / rel_path)
144
+ logger.debug(f"Finished creating symlinks to raw ephys data files in {root_path}")
145
+ utils.cleanup_ephys_symlinks(dest)
146
+
147
+
148
+ def create_behavior_symlinks(
149
+ session: np_session.Session, dest: pathlib.Path | None
150
+ ) -> None:
151
+ """Create symlinks in `dest` pointing to files in top-level of session
152
+ folder on np-exp, plus all files in `exp` subfolder, if present.
153
+ """
154
+ if dest is None:
155
+ logger.debug(f"No behavior folder supplied for {session}")
156
+ return
157
+ subfolder_names = ("exp", "qc")
158
+ logger.info(f"Creating symlinks in {dest} to files in {session.npexp_path}...")
159
+ for src in session.npexp_path.glob("*"):
160
+ if not src.is_dir() and not utils.is_behavior_video_file(src):
161
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
162
+ logger.debug(
163
+ f"Finished creating symlinks to top-level files in {session.npexp_path}"
164
+ )
165
+
166
+ for name in subfolder_names:
167
+ subfolder = session.npexp_path / name
168
+ if not subfolder.exists():
169
+ continue
170
+ for src in subfolder.rglob("*"):
171
+ if not src.is_dir():
172
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
173
+ logger.debug(f"Finished creating symlinks to {name!r} files")
174
+
175
+
176
+ def create_behavior_videos_symlinks(
177
+ session: np_session.Session, dest: pathlib.Path | None
178
+ ) -> None:
179
+ """Create symlinks in `dest` pointing to MVR video files and info jsons in top-level of session
180
+ folder on np-exp.
181
+ """
182
+ if dest is None:
183
+ logger.debug(f"No behavior_videos folder supplied for {session}")
184
+ return
185
+ logger.info(f"Creating symlinks in {dest} to files in {session.npexp_path}...")
186
+ for src in session.npexp_path.glob("*"):
187
+ if utils.is_behavior_video_file(src):
188
+ np_tools.symlink(src, dest / src.relative_to(session.npexp_path))
189
+ logger.debug(
190
+ f"Finished creating symlinks to behavior video files in {session.npexp_path}"
191
+ )
192
+
193
+
194
+ def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
195
+ """
196
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
197
+ >>> get_surface_channel_start_time(session)
198
+ datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
199
+ """
200
+ sync_messages_paths = tuple(session.npexp_path.glob("*/*/*/sync_messages.txt"))
201
+ if not sync_messages_paths:
202
+ raise ValueError(
203
+ f"No sync messages txt found for surface channel session {session}"
204
+ )
205
+ sync_messages_path = sync_messages_paths[0]
206
+
207
+ with open(sync_messages_path) as f:
208
+ software_time_line = f.readlines()[0]
209
+
210
+ timestamp_value = float(
211
+ software_time_line[software_time_line.index(":") + 2 :].strip()
212
+ )
213
+ timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
214
+ return timestamp
215
+
216
+
217
+ def get_upload_params_from_session(upload: CodeOceanUpload) -> dict[str, Any]:
218
+ """
219
+ >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
220
+ >>> utils.is_surface_channel_recording(path)
221
+ True
222
+ >>> upload = create_codeocean_upload(path)
223
+ >>> ephys_upload_params = get_upload_params_from_session(upload)
224
+ >>> ephys_upload_params['modalities']['ecephys']
225
+ '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
226
+ >>> ephys_upload_params.keys()
227
+ dict_keys(['project_name', 'platform', 'subject_id', 'force_cloud_sync', 'modalities', 'acq_datetime'])
228
+ """
229
+ params = {
230
+ "project_name": upload.project_name,
231
+ "platform": upload.platform,
232
+ "subject_id": str(upload.session.mouse),
233
+ "force_cloud_sync": upload.force_cloud_sync,
234
+ }
235
+ modalities = dict() # {modality_abbr: input_source}
236
+ for modality_abbr, attr_name in {
237
+ Modality.ECEPHYS.abbreviation: "ephys",
238
+ Modality.BEHAVIOR.abbreviation: "behavior",
239
+ Modality.BEHAVIOR_VIDEOS.abbreviation: "behavior_videos",
240
+ }.items():
241
+ if getattr(upload, attr_name) is not None:
242
+ modalities[modality_abbr] = np_config.normalize_path(
243
+ getattr(upload, attr_name)
244
+ ).as_posix()
245
+ params["modalities"] = modalities
246
+
247
+ if upload.aind_metadata:
248
+ params["metadata_dir"] = upload.aind_metadata.as_posix()
249
+
250
+ if utils.is_surface_channel_recording(upload.session.npexp_path.as_posix()):
251
+ date = datetime.datetime(
252
+ upload.session.date.year, upload.session.date.month, upload.session.date.day
253
+ )
254
+ params["acq_datetime"] = date.combine(
255
+ upload.session.date, get_surface_channel_start_time(upload.session).time()
256
+ )
257
+ else:
258
+ params["acq_datetime"] = upload.session.start
259
+ return params # type: ignore
260
+
261
+
262
+ def is_ephys_session(session: np_session.Session) -> bool:
263
+ return bool(next(session.npexp_path.rglob("settings*.xml"), None))
264
+
265
+
266
+ def get_np_session(session_path_or_folder_name: str) -> np_session.Session:
267
+ """Accommodates surface channel folders, and updates the returned instance's
268
+ npexp_path accordingly"""
269
+ is_surface_channel_recording = utils.is_surface_channel_recording(
270
+ session_path_or_folder_name
271
+ )
272
+ session = np_session.Session(session_path_or_folder_name)
273
+ if is_surface_channel_recording and not utils.is_surface_channel_recording(
274
+ session.npexp_path.name
275
+ ):
276
+ # manually assign surface channel path which was lost when creating
277
+ # session object
278
+ session = np_session.Session(
279
+ session.npexp_path.parent / f"{session.folder}_surface_channels"
280
+ )
281
+ if (
282
+ "surface_channels" not in session.npexp_path.name
283
+ or not session.npexp_path.exists()
284
+ ):
285
+ raise FileNotFoundError(
286
+ f"Surface channel path {session.npexp_path} does not exist, or does not exist in expected folder (ie np-exp)"
287
+ )
288
+ return session
289
+
290
+
291
+ def create_codeocean_upload(
292
+ session_path_or_folder_name: str,
293
+ recording_dirs: Iterable[str] | None = None,
294
+ force_cloud_sync: bool = False,
295
+ codeocean_root: pathlib.Path = np_session.NPEXP_PATH / "codeocean",
296
+ ) -> CodeOceanUpload:
297
+ """Create directories of symlinks to np-exp files with correct structure
298
+ for upload to CodeOcean.
299
+
300
+ - only one `recording` per `Record Node` folder (largest if multiple found)
301
+ - job file for feeding into `aind-data-transfer`
302
+
303
+ >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
304
+ >>> upload.behavior is None
305
+ True
306
+ >>> upload.ephys.exists()
307
+ True
308
+ """
309
+ platform: utils.AINDPlatform = (
310
+ "ecephys" # all session-type uploads with a folder of data are ecephys platform; behavior platform is for behavior-box sessions
311
+ )
312
+
313
+ session = get_np_session(str(session_path_or_folder_name))
314
+ if utils.is_surface_channel_recording(str(session_path_or_folder_name)):
315
+ root = codeocean_root / f"{session.folder}_surface_channels"
316
+ behavior = None
317
+ behavior_videos = None
318
+ else:
319
+ root = codeocean_root / session.folder
320
+ behavior = np_config.normalize_path(root / "behavior")
321
+ behavior_videos = behavior.with_name("behavior-videos")
322
+
323
+ logger.debug(f"Created directory {root} for CodeOcean upload")
324
+
325
+ logger.info("Attempting to create sub directory for AIND metadata jsons..")
326
+ metadata_path = get_aind_metadata_path(root)
327
+
328
+ return CodeOceanUpload(
329
+ session=session,
330
+ behavior=behavior,
331
+ behavior_videos=behavior_videos,
332
+ ephys=(
333
+ np_config.normalize_path(root / "ephys")
334
+ if is_ephys_session(session)
335
+ else None
336
+ ),
337
+ aind_metadata=metadata_path if has_metadata(session) else None,
338
+ job=np_config.normalize_path(root / "upload.csv"),
339
+ force_cloud_sync=force_cloud_sync,
340
+ platform=platform,
341
+ )
342
+
343
+
344
+ def has_metadata(session: np_session.Session) -> bool:
345
+ return any(
346
+ (session.npexp_path / f"{name}.json").exists()
347
+ for name in utils.AIND_METADATA_NAMES
348
+ )
349
+
350
+
351
+ def get_aind_metadata_path(upload_root: pathlib.Path) -> pathlib.Path:
352
+ return np_config.normalize_path(upload_root / "aind_metadata")
353
+
354
+
355
+ def upload_session(
356
+ session_path_or_folder_name: str,
357
+ recording_dirs: Iterable[str] | None = None,
358
+ force: bool = False,
359
+ dry_run: bool = False,
360
+ test: bool = False,
361
+ hpc_upload_job_email: str = utils.HPC_UPLOAD_JOB_EMAIL,
362
+ regenerate_symlinks: bool = True,
363
+ adjust_ephys_timestamps: bool = True,
364
+ codeocean_pipeline_settings: dict[str, PipelineMonitorSettings] | None = None,
365
+ extra_UploadJobConfigsV2_params: dict[str, Any] | None = None,
366
+ ) -> None:
367
+ codeocean_root = np_session.NPEXP_PATH / ("codeocean-dev" if test else "codeocean")
368
+ logger.debug(f"{codeocean_root = }")
369
+ upload = create_codeocean_upload(
370
+ str(session_path_or_folder_name),
371
+ codeocean_root=codeocean_root,
372
+ recording_dirs=recording_dirs,
373
+ force_cloud_sync=force,
374
+ )
375
+ if regenerate_symlinks and upload.root.exists():
376
+ logger.debug(f"Removing existing {upload.root = }")
377
+ shutil.rmtree(upload.root.as_posix(), ignore_errors=True)
378
+ if upload.aind_metadata:
379
+ create_aind_metadata_symlinks(upload)
380
+ if upload.ephys:
381
+ create_ephys_symlinks(
382
+ upload.session, upload.ephys, recording_dirs=recording_dirs
383
+ )
384
+ if upload.behavior:
385
+ create_behavior_symlinks(upload.session, upload.behavior)
386
+ if upload.behavior_videos:
387
+ create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
388
+ timestamps_adjusted = False
389
+ if adjust_ephys_timestamps and upload.ephys:
390
+ if not upload.behavior: # includes surface channel recordings
391
+ logger.warning(
392
+ f"Cannot adjust ephys timestamps for {upload.session} - no behavior folder supplied for upload"
393
+ )
394
+ else:
395
+ try:
396
+ utils.write_corrected_ephys_timestamps(
397
+ ephys_dir=upload.ephys, behavior_dir=upload.behavior
398
+ )
399
+ except utils.SyncFileNotFoundError:
400
+ raise FileNotFoundError(
401
+ f"Cannot adjust timestamps - no sync file found in {upload.behavior}. "
402
+ "If the session doesn't have one, run with "
403
+ "`adjust_ephys_timestamps=False` or `--no-sync` flag in CLI"
404
+ ) from None
405
+ else:
406
+ timestamps_adjusted = True
407
+ for path in (
408
+ upload.ephys,
409
+ upload.behavior,
410
+ upload.behavior_videos,
411
+ upload.aind_metadata,
412
+ ):
413
+ if path is not None and path.exists():
414
+ utils.convert_symlinks_to_posix(path)
415
+ job_params_from_session: dict = get_upload_params_from_session(upload)
416
+ np_logging.web("np_codeocean").info(
417
+ f"Submitting {upload.session} to hpc upload queue"
418
+ )
419
+ if extra_UploadJobConfigsV2_params is None:
420
+ extra_UploadJobConfigsV2_params = {}
421
+ if "codeocean_pipeline_settings" in extra_UploadJobConfigsV2_params:
422
+ raise ValueError(
423
+ "Cannot pass `codeocean_pipeline_settings` as a parameter to `extra_UploadJobConfigsV2_params`. "
424
+ "Use `codeocean_pipeline_settings` parameter instead."
425
+ )
426
+ utils.put_jobs_for_hpc_upload(
427
+ utils.create_upload_job_configs_v2(
428
+ **job_params_from_session,
429
+ codeocean_pipeline_settings=codeocean_pipeline_settings,
430
+ check_timestamps=timestamps_adjusted,
431
+ test=test,
432
+ user_email=hpc_upload_job_email,
433
+ **extra_UploadJobConfigsV2_params,
434
+ ),
435
+ upload_service_url=(
436
+ utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE
437
+ ),
438
+ user_email=hpc_upload_job_email,
439
+ dry_run=dry_run,
440
+ save_path=upload.job.with_suffix(".json"),
441
+ )
442
+ if not dry_run:
443
+ logger.info(
444
+ f"Finished submitting {upload.session} - check progress at {utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE}"
445
+ )
446
+
447
+ if (
448
+ is_split_recording := recording_dirs is not None
449
+ and len(tuple(recording_dirs)) > 1
450
+ and isinstance(recording_dirs, str)
451
+ ):
452
+ logger.warning(
453
+ f"Split recording {upload.session} will need to be sorted manually with `CONCAT=True`"
454
+ )
455
+
456
+
457
+ if __name__ == "__main__":
458
+ import doctest
459
+
460
+ doctest.testmod(
461
+ optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
462
+ )