np_codeocean 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
np_codeocean/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
1
  from np_codeocean.utils import *
2
- from np_codeocean.upload import *
2
+ from np_codeocean.np_session_utils import *
@@ -0,0 +1,347 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import datetime
5
+ import doctest
6
+ import pathlib
7
+ from collections.abc import Iterable
8
+ import shutil
9
+
10
+ import np_config
11
+ import np_logging
12
+ import np_session
13
+ import np_tools
14
+
15
+ import np_codeocean.utils as utils
16
+
17
+ logger = np_logging.get_logger(__name__)
18
+
19
+ @dataclasses.dataclass
20
+ class CodeOceanUpload:
21
+ """Objects required for uploading a Mindscope Neuropixels session to CodeOcean.
22
+ Paths are symlinks to files on np-exp.
23
+ """
24
+ session: np_session.Session
25
+ """Session object that the paths belong to."""
26
+
27
+ platform: utils.AINDPlatform
28
+ """The 'platform' in the Neural Dynamics data schema language (effectively the rig
29
+ type, which determines the processing pipeline the data follows).
30
+
31
+ Our rules are:
32
+ - if it ran in a behavior box: `behavior`
33
+ - anything else: `ecephys`
34
+
35
+ This means there will be behavior-only sessions that ran on NP-rigs
36
+ without ephys data (habs, opto experiments etc.), that will be uploaded as
37
+ `ecephys` platform data.
38
+ """
39
+
40
+ behavior: pathlib.Path | None
41
+ """Directory of symlinks to files in top-level of session folder on np-exp,
42
+ plus all files in `exp` and `qc` subfolders, if present. Excludes behavior video files
43
+ and video info jsons."""
44
+
45
+ behavior_videos: pathlib.Path | None
46
+ """Directory of symlinks to behavior video files and video info jsons in
47
+ top-level of session folder on np-exp."""
48
+
49
+ ephys: pathlib.Path | None
50
+ """Directory of symlinks to raw ephys data files on np-exp, with only one
51
+ `recording` per `Record Node` folder."""
52
+
53
+ aind_metadata: pathlib.Path | None
54
+ """Directory of symlinks to aind metadata json files in top-level of session folder
55
+ on np-exp."""
56
+
57
+ job: pathlib.Path
58
+ """File containing job parameters for `aind-data-transfer`"""
59
+
60
+ force_cloud_sync: bool = False
61
+ """If True, re-upload and re-make raw asset even if data exists on S3."""
62
+
63
+ @property
64
+ def project_name(self) -> str:
65
+ if isinstance(self.session, np_session.PipelineSession):
66
+ return "OpenScope"
67
+ return "Dynamic Routing"
68
+
69
+ @property
70
+ def root(self) -> pathlib.Path:
71
+ for attr in (self.behavior, self.behavior_videos, self.ephys, self.aind_metadata):
72
+ if attr is not None:
73
+ return attr.parent
74
+ raise ValueError(f"No upload directories assigned to {self!r}")
75
+
76
+ def create_aind_metadata_symlinks(upload: CodeOceanUpload) -> bool:
77
+ """
78
+ Create symlinks in `dest` pointing to aind metadata json files from the root directory
79
+ on np-exp. Returns True if any metadata files are found in np-exp and the `aind_metadata`
80
+ folder is created.
81
+ """
82
+ has_metadata_files = False
83
+ for src in upload.session.npexp_path.glob('*'):
84
+ if src.stem in utils.AIND_METADATA_NAMES:
85
+ np_tools.symlink(utils.ensure_posix(src), upload.aind_metadata / src.name)
86
+ has_metadata_files = True
87
+ if has_metadata_files:
88
+ logger.debug(f'Finished creating symlinks to aind metadata files in {upload.session.npexp_path}')
89
+ else:
90
+ logger.debug(f'No metadata files found in {upload.session.npexp_path}; No symlinks for metadata were made')
91
+ return has_metadata_files
92
+
93
+
94
+ def create_ephys_symlinks(session: np_session.Session, dest: pathlib.Path,
95
+ recording_dirs: Iterable[str] | None = None) -> None:
96
+ """Create symlinks in `dest` pointing to raw ephys data files on np-exp, with only one
97
+ `recording` per `Record Node` folder (the largest, if multiple found).
98
+
99
+ Relative paths are preserved, so `dest` will essentially be a merge of
100
+ _probeABC / _probeDEF folders.
101
+
102
+ Top-level items other than `Record Node *` folders are excluded.
103
+ """
104
+ root_path = session.npexp_path
105
+ if isinstance(session, np_session.PipelineSession) and session.lims_path is not None:
106
+ # if ephys has been uploaded to lims, use lims path, as large raw data may have
107
+ # been deleted from np-exp
108
+ if any(
109
+ np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
110
+ session.npexp_path, specific_recording_dir_names=recording_dirs
111
+ )
112
+ ):
113
+ root_path = session.lims_path
114
+ logger.info(f'Creating symlinks to raw ephys data files in {root_path}...')
115
+ for abs_path, rel_path in np_tools.get_filtered_ephys_paths_relative_to_record_node_parents(
116
+ root_path, specific_recording_dir_names=recording_dirs
117
+ ):
118
+ if not abs_path.is_dir():
119
+ np_tools.symlink(utils.ensure_posix(abs_path), dest / rel_path)
120
+ logger.debug(f'Finished creating symlinks to raw ephys data files in {root_path}')
121
+ utils.cleanup_ephys_symlinks(dest)
122
+
123
+
124
+ def create_behavior_symlinks(session: np_session.Session, dest: pathlib.Path | None) -> None:
125
+ """Create symlinks in `dest` pointing to files in top-level of session
126
+ folder on np-exp, plus all files in `exp` subfolder, if present.
127
+ """
128
+ if dest is None:
129
+ logger.debug(f"No behavior folder supplied for {session}")
130
+ return
131
+ subfolder_names = ('exp', 'qc')
132
+ logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
133
+ for src in session.npexp_path.glob('*'):
134
+ if not src.is_dir() and not utils.is_behavior_video_file(src):
135
+ np_tools.symlink(utils.ensure_posix(src), dest / src.relative_to(session.npexp_path))
136
+ logger.debug(f'Finished creating symlinks to top-level files in {session.npexp_path}')
137
+
138
+ for name in subfolder_names:
139
+ subfolder = session.npexp_path / name
140
+ if not subfolder.exists():
141
+ continue
142
+ for src in subfolder.rglob('*'):
143
+ if not src.is_dir():
144
+ np_tools.symlink(utils.ensure_posix(src), dest / src.relative_to(session.npexp_path))
145
+ logger.debug(f'Finished creating symlinks to {name!r} files')
146
+
147
+
148
+ def create_behavior_videos_symlinks(session: np_session.Session, dest: pathlib.Path | None) -> None:
149
+ """Create symlinks in `dest` pointing to MVR video files and info jsons in top-level of session
150
+ folder on np-exp.
151
+ """
152
+ if dest is None:
153
+ logger.debug(f"No behavior_videos folder supplied for {session}")
154
+ return
155
+ logger.info(f'Creating symlinks in {dest} to files in {session.npexp_path}...')
156
+ for src in session.npexp_path.glob('*'):
157
+ if utils.is_behavior_video_file(src):
158
+ np_tools.symlink(utils.ensure_posix(src), dest / src.relative_to(session.npexp_path))
159
+ logger.debug(f'Finished creating symlinks to behavior video files in {session.npexp_path}')
160
+
161
+
162
+ def get_surface_channel_start_time(session: np_session.Session) -> datetime.datetime:
163
+ """
164
+ >>> session = np_session.Session("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
165
+ >>> get_surface_channel_start_time(session)
166
+ datetime.datetime(2023, 11, 29, 14, 56, 25, 219000)
167
+ """
168
+ sync_messages_paths = tuple(session.npexp_path.glob('*/*/*/sync_messages.txt'))
169
+ if not sync_messages_paths:
170
+ raise ValueError(f'No sync messages txt found for surface channel session {session}')
171
+ sync_messages_path = sync_messages_paths[0]
172
+
173
+ with open(sync_messages_path, 'r') as f:
174
+ software_time_line = f.readlines()[0]
175
+
176
+ timestamp_value = float(software_time_line[software_time_line.index(':')+2:].strip())
177
+ timestamp = datetime.datetime.fromtimestamp(timestamp_value / 1e3)
178
+ return timestamp
179
+
180
+
181
+ def get_upload_csv_for_session(upload: CodeOceanUpload) -> dict[str, str | int | bool]:
182
+ """
183
+ >>> path = "//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels"
184
+ >>> utils.is_surface_channel_recording(path)
185
+ True
186
+ >>> upload = create_codeocean_upload(path)
187
+ >>> ephys_upload_csv = get_upload_csv_for_session(upload)
188
+ >>> ephys_upload_csv['modality0.source']
189
+ '//allen/programs/mindscope/workgroups/np-exp/codeocean/DRpilot_690706_20231129_surface_channels/ephys'
190
+ >>> ephys_upload_csv.keys()
191
+ dict_keys(['project_name', 'platform', 'subject-id', 'force_cloud_sync', 'modality0', 'modality0.source', 'acq-datetime'])
192
+ """
193
+ params = {
194
+ 'project_name': upload.project_name,
195
+ 'platform': upload.platform,
196
+ 'subject-id': str(upload.session.mouse),
197
+ 'force_cloud_sync': upload.force_cloud_sync,
198
+ }
199
+ idx = 0
200
+ for modality_name, attr_name in {
201
+ 'ecephys': 'ephys',
202
+ 'behavior': 'behavior',
203
+ 'behavior-videos': 'behavior_videos',
204
+ }.items():
205
+ if getattr(upload, attr_name) is not None:
206
+ params[f'modality{idx}'] = modality_name
207
+ params[f'modality{idx}.source'] = np_config.normalize_path(getattr(upload, attr_name)).as_posix()
208
+ idx += 1
209
+
210
+ if upload.aind_metadata:
211
+ params['metadata_dir'] = upload.aind_metadata.as_posix()
212
+
213
+ if utils.is_surface_channel_recording(upload.session.npexp_path.as_posix()):
214
+ date = datetime.datetime(upload.session.date.year, upload.session.date.month, upload.session.date.day)
215
+ session_date_time = date.combine(upload.session.date, get_surface_channel_start_time(upload.session).time())
216
+ params['acq-datetime'] = f'{session_date_time.strftime(utils.ACQ_DATETIME_FORMAT)}'
217
+ else:
218
+ params['acq-datetime'] = f'{upload.session.start.strftime(utils.ACQ_DATETIME_FORMAT)}'
219
+
220
+ return params # type: ignore
221
+
222
+
223
+ def is_ephys_session(session: np_session.Session) -> bool:
224
+ return bool(next(session.npexp_path.rglob('settings*.xml'), None))
225
+
226
+ def get_np_session(session_path_or_folder_name: str) -> np_session.Session:
227
+ """Accommodates surface channel folders, and updates the returned instance's
228
+ npexp_path accordingly"""
229
+ is_surface_channel_recording = utils.is_surface_channel_recording(session_path_or_folder_name)
230
+ session = np_session.Session(session_path_or_folder_name)
231
+ if is_surface_channel_recording and not utils.is_surface_channel_recording(session.npexp_path.name):
232
+ # manually assign surface channel path which was lost when creating
233
+ # session object
234
+ session = np_session.Session(session.npexp_path.parent / f'{session.folder}_surface_channels')
235
+ if 'surface_channels' not in session.npexp_path.name or not session.npexp_path.exists():
236
+ raise FileNotFoundError(f"Surface channel path {session.npexp_path} does not exist, or does not exist in expected folder (ie np-exp)")
237
+ return session
238
+
239
+ def create_codeocean_upload(
240
+ session_path_or_folder_name: str,
241
+ recording_dirs: Iterable[str] | None = None,
242
+ force_cloud_sync: bool = False,
243
+ codeocean_root: pathlib.Path = np_session.NPEXP_PATH / 'codeocean',
244
+ ) -> CodeOceanUpload:
245
+ """Create directories of symlinks to np-exp files with correct structure
246
+ for upload to CodeOcean.
247
+
248
+ - only one `recording` per `Record Node` folder (largest if multiple found)
249
+ - job file for feeding into `aind-data-transfer`
250
+
251
+ >>> upload = create_codeocean_upload("//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_690706_20231129_surface_channels")
252
+ >>> upload.behavior is None
253
+ True
254
+ >>> upload.ephys.exists()
255
+ True
256
+ """
257
+ platform: utils.AINDPlatform = 'ecephys' # all session-type uploads with a folder of data are ecephys platform; behavior platform is for behavior-box sessions
258
+
259
+ session = get_np_session(str(session_path_or_folder_name))
260
+ if utils.is_surface_channel_recording(str(session_path_or_folder_name)):
261
+ root = codeocean_root / f'{session.folder}_surface_channels'
262
+ behavior = None
263
+ behavior_videos = None
264
+ else:
265
+ root = codeocean_root / session.folder
266
+ behavior = np_config.normalize_path(root / 'behavior')
267
+ behavior_videos = behavior.with_name('behavior-videos')
268
+
269
+ logger.debug(f'Created directory {root} for CodeOcean upload')
270
+
271
+ logger.info('Attempting to create sub directory for AIND metadata jsons..')
272
+ metadata_path = get_aind_metadata_path(root)
273
+
274
+ return CodeOceanUpload(
275
+ session = session,
276
+ behavior = behavior,
277
+ behavior_videos = behavior_videos,
278
+ ephys = np_config.normalize_path(root / 'ephys') if is_ephys_session(session) else None,
279
+ aind_metadata = metadata_path if has_metadata(session) else None,
280
+ job = np_config.normalize_path(root / 'upload.csv'),
281
+ force_cloud_sync=force_cloud_sync,
282
+ platform=platform,
283
+ )
284
+
285
+ def has_metadata(session: np_session.Session) -> bool:
286
+ return any(
287
+ (session.npexp_path / name).exists()
288
+ for name in utils.AIND_METADATA_NAMES
289
+ )
290
+
291
+ def get_aind_metadata_path(upload_root: pathlib.Path) -> pathlib.Path:
292
+ return np_config.normalize_path(upload_root / 'aind_metadata')
293
+
294
+ def upload_session(
295
+ session_path_or_folder_name: str,
296
+ recording_dirs: Iterable[str] | None = None,
297
+ force: bool = False,
298
+ dry_run: bool = False,
299
+ test: bool = False,
300
+ hpc_upload_job_email: str = utils.HPC_UPLOAD_JOB_EMAIL,
301
+ regenerate_symlinks: bool = True,
302
+ ) -> None:
303
+ codeocean_root = np_session.NPEXP_PATH / ('codeocean-dev' if test else 'codeocean')
304
+ logger.debug(f'{codeocean_root = }')
305
+ upload = create_codeocean_upload(
306
+ str(session_path_or_folder_name),
307
+ codeocean_root=codeocean_root,
308
+ recording_dirs=recording_dirs,
309
+ force_cloud_sync=force
310
+ )
311
+ if regenerate_symlinks and upload.root.exists():
312
+ logger.debug(f'Removing existing {upload.root = }')
313
+ shutil.rmtree(upload.root.as_posix())
314
+ if upload.aind_metadata:
315
+ create_aind_metadata_symlinks(upload)
316
+ if upload.ephys:
317
+ create_ephys_symlinks(upload.session, upload.ephys, recording_dirs=recording_dirs)
318
+ if upload.behavior:
319
+ create_behavior_symlinks(upload.session, upload.behavior)
320
+ if upload.behavior_videos:
321
+ create_behavior_videos_symlinks(upload.session, upload.behavior_videos)
322
+ csv_content: dict = get_upload_csv_for_session(upload)
323
+ utils.write_upload_csv(csv_content, upload.job)
324
+ np_logging.web('np_codeocean').info(f'Submitting {upload.session} to hpc upload queue')
325
+ utils.put_jobs_for_hpc_upload(
326
+ utils.get_job_models_from_csv(upload.job),
327
+ upload_service_url=utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE,
328
+ user_email=hpc_upload_job_email,
329
+ dry_run=dry_run,
330
+ save_path=upload.job.with_suffix('.json'),
331
+ )
332
+ if not dry_run:
333
+ logger.info(f'Finished submitting {upload.session} - check progress at {utils.DEV_SERVICE if test else utils.AIND_DATA_TRANSFER_SERVICE}')
334
+
335
+ if (is_split_recording :=
336
+ recording_dirs is not None
337
+ and len(tuple(recording_dirs)) > 1
338
+ and isinstance(recording_dirs, str)
339
+ ):
340
+ logger.warning(f"Split recording {upload.session} will need to be sorted manually with `CONCAT=True`")
341
+
342
+ if __name__ == '__main__':
343
+ import doctest
344
+
345
+ doctest.testmod(
346
+ optionflags=(doctest.IGNORE_EXCEPTION_DETAIL | doctest.NORMALIZE_WHITESPACE),
347
+ )
@@ -0,0 +1,20 @@
1
+ import datetime
2
+ import logging
3
+ import pathlib
4
+ import npc_lims
5
+
6
+ import np_codeocean
7
+
8
+ logging.basicConfig(
9
+ filename=f"logs/{pathlib.Path(__file__).stem}_{datetime.datetime.now().strftime('%Y-%d-%m_%H-%M-%S')}.log",
10
+ level=logging.DEBUG,
11
+ format="%(asctime)s | %(name)s | %(levelname)s | %(message)s",
12
+ datefmt="%Y-%m-%d %H:%M:%S",
13
+ )
14
+ logger = logging.getLogger(__name__)
15
+
16
+ session_id = 'ecephys_702136_2024-03-06_11-25-22'
17
+ root = npc_lims.get_raw_data_root(session_id)
18
+ ephys = root / 'ecephys_clipped'
19
+
20
+ np_codeocean.cleanup_ephys_symlinks(ephys)