sl-shared-assets 3.1.3__py3-none-any.whl → 4.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +8 -0
- sl_shared_assets/__init__.pyi +8 -0
- sl_shared_assets/cli.py +23 -9
- sl_shared_assets/cli.pyi +7 -2
- sl_shared_assets/data_classes/__init__.py +13 -1
- sl_shared_assets/data_classes/__init__.pyi +6 -0
- sl_shared_assets/data_classes/configuration_data.py +1 -1
- sl_shared_assets/data_classes/runtime_data.py +29 -18
- sl_shared_assets/data_classes/runtime_data.pyi +10 -7
- sl_shared_assets/data_classes/session_data.py +269 -230
- sl_shared_assets/data_classes/session_data.pyi +111 -34
- sl_shared_assets/data_classes/surgery_data.py +7 -7
- sl_shared_assets/data_classes/surgery_data.pyi +7 -7
- sl_shared_assets/server/server.py +2 -2
- sl_shared_assets/tools/packaging_tools.py +7 -8
- sl_shared_assets/tools/packaging_tools.pyi +2 -0
- sl_shared_assets/tools/project_management_tools.py +182 -91
- sl_shared_assets/tools/project_management_tools.pyi +48 -12
- {sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.1.dist-info}/METADATA +45 -47
- sl_shared_assets-4.0.1.dist-info/RECORD +36 -0
- sl_shared_assets-3.1.3.dist-info/RECORD +0 -36
- {sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.1.dist-info}/WHEEL +0 -0
- {sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.1.dist-info}/entry_points.txt +0 -0
- {sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,11 +13,12 @@ from ataraxis_base_utilities import LogLevel, console
|
|
|
13
13
|
from ..data_classes import (
|
|
14
14
|
SessionData,
|
|
15
15
|
SessionTypes,
|
|
16
|
-
|
|
16
|
+
TrackerFileNames,
|
|
17
17
|
RunTrainingDescriptor,
|
|
18
18
|
LickTrainingDescriptor,
|
|
19
19
|
WindowCheckingDescriptor,
|
|
20
20
|
MesoscopeExperimentDescriptor,
|
|
21
|
+
get_processing_tracker,
|
|
21
22
|
)
|
|
22
23
|
from .packaging_tools import calculate_directory_checksum
|
|
23
24
|
|
|
@@ -149,28 +150,20 @@ class ProjectManifest:
|
|
|
149
150
|
"""
|
|
150
151
|
return tuple(self._data.select("animal").unique().sort("animal").to_series().to_list())
|
|
151
152
|
|
|
152
|
-
|
|
153
|
-
def sessions(self) -> tuple[str, ...]:
|
|
154
|
-
"""Returns all session IDs stored inside the manifest file.
|
|
155
|
-
|
|
156
|
-
This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
|
|
157
|
-
of the target project.
|
|
158
|
-
"""
|
|
159
|
-
return tuple(self._data.select("session").sort("session").to_series().to_list())
|
|
160
|
-
|
|
161
|
-
def get_sessions_for_animal(
|
|
153
|
+
def _get_filtered_sessions(
|
|
162
154
|
self,
|
|
163
|
-
animal: str | int,
|
|
155
|
+
animal: str | int | None = None,
|
|
164
156
|
exclude_incomplete: bool = True,
|
|
165
157
|
dataset_ready_only: bool = False,
|
|
166
158
|
not_dataset_ready_only: bool = False,
|
|
167
159
|
) -> tuple[str, ...]:
|
|
168
|
-
"""
|
|
160
|
+
"""This worker method is used to get a list of sessions with optional filtering.
|
|
169
161
|
|
|
170
|
-
|
|
162
|
+
User-facing methods call this worker under-the-hood to fetch the filtered tuple of sessions.
|
|
171
163
|
|
|
172
164
|
Args:
|
|
173
|
-
animal:
|
|
165
|
+
animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
|
|
166
|
+
animals.
|
|
174
167
|
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
175
168
|
list.
|
|
176
169
|
dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
|
|
@@ -179,22 +172,27 @@ class ProjectManifest:
|
|
|
179
172
|
as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
|
|
180
173
|
enabled, the 'dataset_ready_only' option takes precedence.
|
|
181
174
|
|
|
175
|
+
Returns:
|
|
176
|
+
The tuple of session IDs matching the filter criteria.
|
|
177
|
+
|
|
182
178
|
Raises:
|
|
183
179
|
ValueError: If the specified animal is not found in the manifest file.
|
|
184
180
|
"""
|
|
181
|
+
data = self._data
|
|
185
182
|
|
|
186
|
-
#
|
|
187
|
-
if
|
|
188
|
-
animal
|
|
189
|
-
|
|
190
|
-
|
|
183
|
+
# Filter by animal if specified
|
|
184
|
+
if animal is not None:
|
|
185
|
+
# Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
|
|
186
|
+
if self._animal_string:
|
|
187
|
+
animal = str(animal)
|
|
188
|
+
else:
|
|
189
|
+
animal = int(animal)
|
|
191
190
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
191
|
+
if animal not in self.animals:
|
|
192
|
+
message = f"Animal ID '{animal}' not found in the project manifest. Available animals: {self.animals}."
|
|
193
|
+
console.error(message=message, error=ValueError)
|
|
195
194
|
|
|
196
|
-
|
|
197
|
-
data = self._data.filter(pl.col("animal") == animal)
|
|
195
|
+
data = data.filter(pl.col("animal") == animal)
|
|
198
196
|
|
|
199
197
|
# Optionally filters out incomplete sessions
|
|
200
198
|
if exclude_incomplete:
|
|
@@ -210,6 +208,51 @@ class ProjectManifest:
|
|
|
210
208
|
sessions = data.select("session").sort("session").to_series().to_list()
|
|
211
209
|
return tuple(sessions)
|
|
212
210
|
|
|
211
|
+
@property
|
|
212
|
+
def sessions(self) -> tuple[str, ...]:
|
|
213
|
+
"""Returns all session IDs stored inside the manifest file.
|
|
214
|
+
|
|
215
|
+
This property provides a tuple of all sessions, independent of the participating animal, that were recorded as
|
|
216
|
+
part of the target project. Use the get_sessions() method to get the list of session tuples with filtering.
|
|
217
|
+
"""
|
|
218
|
+
return self._get_filtered_sessions(animal=None, exclude_incomplete=False)
|
|
219
|
+
|
|
220
|
+
def get_sessions(
|
|
221
|
+
self,
|
|
222
|
+
animal: str | int | None = None,
|
|
223
|
+
exclude_incomplete: bool = True,
|
|
224
|
+
dataset_ready_only: bool = False,
|
|
225
|
+
not_dataset_ready_only: bool = False,
|
|
226
|
+
) -> tuple[str, ...]:
|
|
227
|
+
"""Returns requested session IDs based on selected filtering criteria.
|
|
228
|
+
|
|
229
|
+
This method provides a tuple of sessions based on the specified filters. If no animal is specified, returns
|
|
230
|
+
sessions for all animals in the project.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
|
|
234
|
+
animals.
|
|
235
|
+
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
236
|
+
list.
|
|
237
|
+
dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
|
|
238
|
+
the output list. Enabling this option only shows sessions that can be integrated into a dataset.
|
|
239
|
+
not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
|
|
240
|
+
as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
|
|
241
|
+
enabled, the 'dataset_ready_only' option takes precedence.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
The tuple of session IDs matching the filter criteria.
|
|
245
|
+
|
|
246
|
+
Raises:
|
|
247
|
+
ValueError: If the specified animal is not found in the manifest file.
|
|
248
|
+
"""
|
|
249
|
+
return self._get_filtered_sessions(
|
|
250
|
+
animal=animal,
|
|
251
|
+
exclude_incomplete=exclude_incomplete,
|
|
252
|
+
dataset_ready_only=dataset_ready_only,
|
|
253
|
+
not_dataset_ready_only=not_dataset_ready_only,
|
|
254
|
+
)
|
|
255
|
+
|
|
213
256
|
def get_session_info(self, session: str) -> pl.DataFrame:
|
|
214
257
|
"""Returns a Polars DataFrame that stores detailed information for the specified session.
|
|
215
258
|
|
|
@@ -237,7 +280,7 @@ def generate_project_manifest(
|
|
|
237
280
|
This function evaluates the input project directory and builds the 'manifest' file for the project. The file
|
|
238
281
|
includes the descriptive information about every session stored inside the input project folder and the state of
|
|
239
282
|
the session's data processing (which processing pipelines have been applied to each session). The file will be
|
|
240
|
-
created under the 'output_path' directory and use the following name pattern:
|
|
283
|
+
created under the 'output_path' directory and use the following name pattern: ProjectName_manifest.feather.
|
|
241
284
|
|
|
242
285
|
Notes:
|
|
243
286
|
The manifest file is primarily used to capture and move project state information between machines, typically
|
|
@@ -369,7 +412,9 @@ def generate_project_manifest(
|
|
|
369
412
|
manifest["complete"].append(session_data.raw_data.telomere_path.exists())
|
|
370
413
|
|
|
371
414
|
# Data verification status
|
|
372
|
-
tracker =
|
|
415
|
+
tracker = get_processing_tracker(
|
|
416
|
+
root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY
|
|
417
|
+
)
|
|
373
418
|
manifest["integrity"].append(tracker.is_complete)
|
|
374
419
|
|
|
375
420
|
# If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing
|
|
@@ -383,15 +428,21 @@ def generate_project_manifest(
|
|
|
383
428
|
continue # Cycles to the next session
|
|
384
429
|
|
|
385
430
|
# Suite2p (single-day) processing status.
|
|
386
|
-
tracker =
|
|
431
|
+
tracker = get_processing_tracker(
|
|
432
|
+
file_name=TrackerFileNames.SUITE2P, root=session_data.processed_data.processed_data_path
|
|
433
|
+
)
|
|
387
434
|
manifest["suite2p"].append(tracker.is_complete)
|
|
388
435
|
|
|
389
436
|
# Behavior data processing status.
|
|
390
|
-
tracker =
|
|
437
|
+
tracker = get_processing_tracker(
|
|
438
|
+
file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
|
|
439
|
+
)
|
|
391
440
|
manifest["behavior"].append(tracker.is_complete)
|
|
392
441
|
|
|
393
442
|
# DeepLabCut (video) processing status.
|
|
394
|
-
tracker =
|
|
443
|
+
tracker = get_processing_tracker(
|
|
444
|
+
file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
|
|
445
|
+
)
|
|
395
446
|
manifest["video"].append(tracker.is_complete)
|
|
396
447
|
|
|
397
448
|
# Tracks whether the session's data is currently in the processing or dataset integration mode.
|
|
@@ -435,6 +486,7 @@ def generate_project_manifest(
|
|
|
435
486
|
|
|
436
487
|
def verify_session_checksum(
|
|
437
488
|
session_path: Path,
|
|
489
|
+
manager_id: int,
|
|
438
490
|
create_processed_data_directory: bool = True,
|
|
439
491
|
processed_data_root: None | Path = None,
|
|
440
492
|
update_manifest: bool = False,
|
|
@@ -459,6 +511,8 @@ def verify_session_checksum(
|
|
|
459
511
|
Args:
|
|
460
512
|
session_path: The path to the session directory to be verified. Note, the input session directory must contain
|
|
461
513
|
the 'raw_data' subdirectory.
|
|
514
|
+
manager_id: The xxHash-64 hash-value that specifies the unique identifier of the manager process that
|
|
515
|
+
manages the integrity verification runtime.
|
|
462
516
|
create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
|
|
463
517
|
processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
|
|
464
518
|
the root directory where to store the processed data from all projects, and it will be automatically
|
|
@@ -476,14 +530,12 @@ def verify_session_checksum(
|
|
|
476
530
|
)
|
|
477
531
|
|
|
478
532
|
# Initializes the ProcessingTracker instance for the verification tracker file
|
|
479
|
-
tracker =
|
|
533
|
+
tracker = get_processing_tracker(root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY)
|
|
534
|
+
console.echo(f"{tracker.file_path}")
|
|
480
535
|
|
|
481
536
|
# Updates the tracker data to communicate that the verification process has started. This automatically clears
|
|
482
537
|
# the previous 'completed' status.
|
|
483
|
-
tracker.start()
|
|
484
|
-
|
|
485
|
-
# Try starts here to allow for proper error-driven 'start' terminations of the tracker cannot acquire the lock for
|
|
486
|
-
# a long time, or if another runtime is already underway.
|
|
538
|
+
tracker.start(manager_id=manager_id)
|
|
487
539
|
try:
|
|
488
540
|
# Re-calculates the checksum for the raw_data directory
|
|
489
541
|
calculated_checksum = calculate_directory_checksum(
|
|
@@ -502,14 +554,14 @@ def verify_session_checksum(
|
|
|
502
554
|
|
|
503
555
|
else:
|
|
504
556
|
# Sets the tracker to indicate that the verification runtime completed successfully.
|
|
505
|
-
tracker.stop()
|
|
557
|
+
tracker.stop(manager_id=manager_id)
|
|
506
558
|
|
|
507
559
|
finally:
|
|
508
560
|
# If the code reaches this section while the tracker indicates that the processing is still running,
|
|
509
561
|
# this means that the verification runtime encountered an error. Configures the tracker to indicate that this
|
|
510
562
|
# runtime finished with an error to prevent deadlocking the runtime.
|
|
511
563
|
if tracker.is_running:
|
|
512
|
-
tracker.error()
|
|
564
|
+
tracker.error(manager_id=manager_id)
|
|
513
565
|
|
|
514
566
|
# If the runtime is configured to generate the project manifest file, attempts to generate and overwrite the
|
|
515
567
|
# existing manifest file for the target project.
|
|
@@ -543,9 +595,8 @@ def resolve_p53_marker(
|
|
|
543
595
|
from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
|
|
544
596
|
that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
|
|
545
597
|
|
|
546
|
-
For the p53.bin marker to be created, the session must
|
|
547
|
-
|
|
548
|
-
dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
|
|
598
|
+
For the p53.bin marker to be created, the session must not be undergoing processing. For the p53 marker
|
|
599
|
+
to be removed, the session must not be undergoing dataset integration.
|
|
549
600
|
|
|
550
601
|
Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
|
|
551
602
|
which is used to support further Sun lab data processing pipelines.
|
|
@@ -570,34 +621,69 @@ def resolve_p53_marker(
|
|
|
570
621
|
make_processed_data_directory=create_processed_data_directory,
|
|
571
622
|
)
|
|
572
623
|
|
|
573
|
-
# If the p53.bin marker exists and the runtime is configured to remove it,
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
624
|
+
# If the p53.bin marker exists and the runtime is configured to remove it, attempts to remove the marker file.
|
|
625
|
+
if session_data.processed_data.p53_path.exists() and remove:
|
|
626
|
+
# This section deals with a unique nuance related to the Sun lab processing server organization. Specifically,
|
|
627
|
+
# the user accounts are not allowed to modify or create files in the data directories owned by the service
|
|
628
|
+
# accounts. In turn, this prevents user accounts from modifying the processed data directory to indicate when
|
|
629
|
+
# they are running a dataset integration pipeline on the processed data. To work around this problem, the
|
|
630
|
+
# dataset integration pipeline now creates a 'semaphore' marker for each session that is currently being
|
|
631
|
+
# integrated into a dataset. This semaphore marker is stored under the root user working directory, inside the
|
|
632
|
+
# subdirectory called 'semaphore'.
|
|
633
|
+
|
|
634
|
+
# The parent of the shared sun-lab processed data directory is the root 'working' volume. All user directories
|
|
635
|
+
# are stored under this root working directory.
|
|
636
|
+
if processed_data_root is None:
|
|
637
|
+
# If the processed data root is not provided, sets it to the great-grandparent of the session directory.
|
|
638
|
+
# This works assuming that the data is stored under: root/project/animal/session.
|
|
639
|
+
processed_data_root = session_path.parents[2]
|
|
640
|
+
working_root = processed_data_root.parent
|
|
641
|
+
|
|
642
|
+
# Loops over each user directory and checks whether a semaphore marker exists for the processed session.
|
|
643
|
+
for directory in working_root.iterdir():
|
|
644
|
+
if (
|
|
645
|
+
len([marker for marker in directory.joinpath("semaphore").glob(f"*{session_data.session_name}.bin")])
|
|
646
|
+
> 0
|
|
647
|
+
):
|
|
648
|
+
# Aborts with an error if the semaphore marker prevents the p53 marker from being removed.
|
|
649
|
+
message = (
|
|
650
|
+
f"Unable to remove the dataset marker for the session' {session_data.session_name}' acquired "
|
|
651
|
+
f"for the animal '{session_data.animal_id}' under the '{session_data.project_name}' project. "
|
|
652
|
+
f"The session data is currently being integrated into a dataset by the owner the "
|
|
653
|
+
f"'{directory.stem}' user directory. Wait until the ongoing dataset integration is complete and "
|
|
654
|
+
f"repeat the command that produced this error."
|
|
655
|
+
)
|
|
656
|
+
console.error(message=message, error=RuntimeError)
|
|
584
657
|
|
|
658
|
+
# If the session does not have a corresponding semaphore marker in any user directories, removes the p53 marker
|
|
659
|
+
# file.
|
|
660
|
+
session_data.processed_data.p53_path.unlink()
|
|
585
661
|
message = (
|
|
586
|
-
f"Dataset marker for the session {session_data.session_name} acquired for the animal "
|
|
587
|
-
f"{session_data.animal_id}
|
|
662
|
+
f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
|
|
663
|
+
f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Removed."
|
|
588
664
|
)
|
|
589
665
|
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
590
|
-
return # Ends
|
|
666
|
+
return # Ends remove runtime
|
|
591
667
|
|
|
592
|
-
# If the marker does not exist and the function is called in 'remove' mode, aborts the runtime
|
|
593
|
-
elif remove:
|
|
668
|
+
# If the marker does not exist and the function is called in 'remove' mode, aborts the runtime early
|
|
669
|
+
elif not session_data.processed_data.p53_path.exists() and remove:
|
|
594
670
|
message = (
|
|
595
|
-
f"Dataset marker for the session {session_data.session_name} acquired for the animal "
|
|
596
|
-
f"{session_data.animal_id}
|
|
671
|
+
f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
|
|
672
|
+
f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Does not exist. No actions "
|
|
673
|
+
f"taken."
|
|
597
674
|
)
|
|
598
675
|
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
599
676
|
return # Ends remove runtime
|
|
600
677
|
|
|
678
|
+
elif session_data.processed_data.p53_path.exists():
|
|
679
|
+
message = (
|
|
680
|
+
f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
|
|
681
|
+
f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Already exists. No actions "
|
|
682
|
+
f"taken."
|
|
683
|
+
)
|
|
684
|
+
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
685
|
+
return # Ends create runtime
|
|
686
|
+
|
|
601
687
|
# The rest of the runtime deals with determining whether it is safe to create the marker file.
|
|
602
688
|
# Queries the type of the processed session
|
|
603
689
|
session_type = session_data.session_type
|
|
@@ -606,53 +692,58 @@ def resolve_p53_marker(
|
|
|
606
692
|
# p53.bin file. Similarly, any incomplete session is automatically excluded from dataset formation.
|
|
607
693
|
if session_type == SessionTypes.WINDOW_CHECKING or not session_data.raw_data.telomere_path.exists():
|
|
608
694
|
message = (
|
|
609
|
-
f"Unable to generate the dataset marker for the session {session_data.session_name} acquired for the "
|
|
610
|
-
f"animal {session_data.animal_id}
|
|
611
|
-
f"is of Window Checking type. These sessions must be manually evaluated and marked for
|
|
612
|
-
f"by the experimenter. "
|
|
695
|
+
f"Unable to generate the dataset marker for the session '{session_data.session_name}' acquired for the "
|
|
696
|
+
f"animal '{session_data.animal_id}' under the '{session_data.project_name}' project, as the session is "
|
|
697
|
+
f"incomplete or is of Window Checking type. These sessions must be manually evaluated and marked for "
|
|
698
|
+
f"dataset inclusion by the experimenter. "
|
|
613
699
|
)
|
|
614
|
-
console.
|
|
615
|
-
return
|
|
700
|
+
console.error(message=message, error=RuntimeError)
|
|
616
701
|
|
|
617
702
|
# Training sessions collect similar data and share processing pipeline requirements
|
|
703
|
+
error: bool = False
|
|
618
704
|
if session_type == SessionTypes.LICK_TRAINING or session_type == SessionTypes.RUN_TRAINING:
|
|
619
705
|
# Ensures that the session is not being processed with one of the supported pipelines.
|
|
620
|
-
behavior_tracker =
|
|
621
|
-
|
|
706
|
+
behavior_tracker = get_processing_tracker(
|
|
707
|
+
file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
|
|
708
|
+
)
|
|
709
|
+
video_tracker = get_processing_tracker(
|
|
710
|
+
file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
|
|
711
|
+
)
|
|
622
712
|
if behavior_tracker.is_running or video_tracker.is_running:
|
|
623
713
|
# Note, training runtimes do not require suite2p processing.
|
|
624
|
-
|
|
625
|
-
f"Unable to generate the dataset marker for the session {session_data.session_name} acquired for the "
|
|
626
|
-
f"animal {session_data.animal_id} and {session_data.project_name} project, as it is currently being "
|
|
627
|
-
f"processed by one of the data processing pipelines. Wait until the session is fully processed by all "
|
|
628
|
-
f"pipelines and repeat the command that encountered this error."
|
|
629
|
-
)
|
|
630
|
-
console.echo(message=message, level=LogLevel.ERROR)
|
|
631
|
-
return
|
|
714
|
+
error = True
|
|
632
715
|
|
|
633
716
|
# Mesoscope experiment sessions require additional processing with suite2p
|
|
634
|
-
|
|
635
|
-
behavior_tracker =
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
717
|
+
elif session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
|
|
718
|
+
behavior_tracker = get_processing_tracker(
|
|
719
|
+
file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
|
|
720
|
+
)
|
|
721
|
+
suite2p_tracker = get_processing_tracker(
|
|
722
|
+
file_name=TrackerFileNames.SUITE2P, root=session_data.processed_data.processed_data_path
|
|
723
|
+
)
|
|
724
|
+
video_tracker = get_processing_tracker(
|
|
725
|
+
file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
|
|
726
|
+
)
|
|
727
|
+
console.echo(f"{behavior_tracker.is_running}")
|
|
728
|
+
if behavior_tracker.is_running or video_tracker.is_running or suite2p_tracker.is_running:
|
|
729
|
+
error = True
|
|
730
|
+
|
|
731
|
+
# If the session is currently being processed by one or more pipelines, aborts with an error.
|
|
732
|
+
if error:
|
|
733
|
+
message = (
|
|
734
|
+
f"Unable to generate the dataset marker for the session '{session_data.session_name}' acquired for the "
|
|
735
|
+
f"animal '{session_data.animal_id}' under the '{session_data.project_name}' project, as it is "
|
|
736
|
+
f"currently being processed by one of the data processing pipelines. Wait until the session is fully "
|
|
737
|
+
f"processed by all pipelines and repeat the command that encountered this error."
|
|
738
|
+
)
|
|
739
|
+
console.error(message=message, error=RuntimeError)
|
|
649
740
|
|
|
650
741
|
# If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
|
|
651
742
|
# file, preventing the session from being processed again as long as the marker exists.
|
|
652
743
|
session_data.processed_data.p53_path.touch()
|
|
653
744
|
message = (
|
|
654
|
-
f"Dataset marker for the session {session_data.session_name} acquired for the animal "
|
|
655
|
-
f"{session_data.animal_id}
|
|
745
|
+
f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
|
|
746
|
+
f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Created."
|
|
656
747
|
)
|
|
657
748
|
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
658
749
|
|
|
@@ -5,11 +5,12 @@ import polars as pl
|
|
|
5
5
|
from ..data_classes import (
|
|
6
6
|
SessionData as SessionData,
|
|
7
7
|
SessionTypes as SessionTypes,
|
|
8
|
-
|
|
8
|
+
TrackerFileNames as TrackerFileNames,
|
|
9
9
|
RunTrainingDescriptor as RunTrainingDescriptor,
|
|
10
10
|
LickTrainingDescriptor as LickTrainingDescriptor,
|
|
11
11
|
WindowCheckingDescriptor as WindowCheckingDescriptor,
|
|
12
12
|
MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
|
|
13
|
+
get_processing_tracker as get_processing_tracker,
|
|
13
14
|
)
|
|
14
15
|
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
15
16
|
|
|
@@ -61,26 +62,56 @@ class ProjectManifest:
|
|
|
61
62
|
|
|
62
63
|
This provides a tuple of all animal IDs participating in the target project.
|
|
63
64
|
"""
|
|
65
|
+
def _get_filtered_sessions(
|
|
66
|
+
self,
|
|
67
|
+
animal: str | int | None = None,
|
|
68
|
+
exclude_incomplete: bool = True,
|
|
69
|
+
dataset_ready_only: bool = False,
|
|
70
|
+
not_dataset_ready_only: bool = False,
|
|
71
|
+
) -> tuple[str, ...]:
|
|
72
|
+
"""This worker method is used to get a list of sessions with optional filtering.
|
|
73
|
+
|
|
74
|
+
User-facing methods call this worker under-the-hood to fetch the filtered tuple of sessions.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
|
|
78
|
+
animals.
|
|
79
|
+
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
80
|
+
list.
|
|
81
|
+
dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
|
|
82
|
+
the output list. Enabling this option only shows sessions that can be integrated into a dataset.
|
|
83
|
+
not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
|
|
84
|
+
as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
|
|
85
|
+
enabled, the 'dataset_ready_only' option takes precedence.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
The tuple of session IDs matching the filter criteria.
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: If the specified animal is not found in the manifest file.
|
|
92
|
+
"""
|
|
64
93
|
@property
|
|
65
94
|
def sessions(self) -> tuple[str, ...]:
|
|
66
95
|
"""Returns all session IDs stored inside the manifest file.
|
|
67
96
|
|
|
68
|
-
This provides a tuple of all sessions, independent of the participating animal, that were recorded as
|
|
69
|
-
of the target project.
|
|
97
|
+
This property provides a tuple of all sessions, independent of the participating animal, that were recorded as
|
|
98
|
+
part of the target project. Use the get_sessions() method to get the list of session tuples with filtering.
|
|
70
99
|
"""
|
|
71
|
-
def
|
|
100
|
+
def get_sessions(
|
|
72
101
|
self,
|
|
73
|
-
animal: str | int,
|
|
102
|
+
animal: str | int | None = None,
|
|
74
103
|
exclude_incomplete: bool = True,
|
|
75
104
|
dataset_ready_only: bool = False,
|
|
76
105
|
not_dataset_ready_only: bool = False,
|
|
77
106
|
) -> tuple[str, ...]:
|
|
78
|
-
"""Returns
|
|
107
|
+
"""Returns requested session IDs based on selected filtering criteria.
|
|
79
108
|
|
|
80
|
-
This provides a tuple of
|
|
109
|
+
This method provides a tuple of sessions based on the specified filters. If no animal is specified, returns
|
|
110
|
+
sessions for all animals in the project.
|
|
81
111
|
|
|
82
112
|
Args:
|
|
83
|
-
animal:
|
|
113
|
+
animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
|
|
114
|
+
animals.
|
|
84
115
|
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
85
116
|
list.
|
|
86
117
|
dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
|
|
@@ -89,6 +120,9 @@ class ProjectManifest:
|
|
|
89
120
|
as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
|
|
90
121
|
enabled, the 'dataset_ready_only' option takes precedence.
|
|
91
122
|
|
|
123
|
+
Returns:
|
|
124
|
+
The tuple of session IDs matching the filter criteria.
|
|
125
|
+
|
|
92
126
|
Raises:
|
|
93
127
|
ValueError: If the specified animal is not found in the manifest file.
|
|
94
128
|
"""
|
|
@@ -114,7 +148,7 @@ def generate_project_manifest(
|
|
|
114
148
|
This function evaluates the input project directory and builds the 'manifest' file for the project. The file
|
|
115
149
|
includes the descriptive information about every session stored inside the input project folder and the state of
|
|
116
150
|
the session's data processing (which processing pipelines have been applied to each session). The file will be
|
|
117
|
-
created under the 'output_path' directory and use the following name pattern:
|
|
151
|
+
created under the 'output_path' directory and use the following name pattern: ProjectName_manifest.feather.
|
|
118
152
|
|
|
119
153
|
Notes:
|
|
120
154
|
The manifest file is primarily used to capture and move project state information between machines, typically
|
|
@@ -132,6 +166,7 @@ def generate_project_manifest(
|
|
|
132
166
|
|
|
133
167
|
def verify_session_checksum(
|
|
134
168
|
session_path: Path,
|
|
169
|
+
manager_id: int,
|
|
135
170
|
create_processed_data_directory: bool = True,
|
|
136
171
|
processed_data_root: None | Path = None,
|
|
137
172
|
update_manifest: bool = False,
|
|
@@ -156,6 +191,8 @@ def verify_session_checksum(
|
|
|
156
191
|
Args:
|
|
157
192
|
session_path: The path to the session directory to be verified. Note, the input session directory must contain
|
|
158
193
|
the 'raw_data' subdirectory.
|
|
194
|
+
manager_id: The xxHash-64 hash-value that specifies the unique identifier of the manager process that
|
|
195
|
+
manages the integrity verification runtime.
|
|
159
196
|
create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
|
|
160
197
|
processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
|
|
161
198
|
the root directory where to store the processed data from all projects, and it will be automatically
|
|
@@ -182,9 +219,8 @@ def resolve_p53_marker(
|
|
|
182
219
|
from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
|
|
183
220
|
that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
|
|
184
221
|
|
|
185
|
-
For the p53.bin marker to be created, the session must
|
|
186
|
-
|
|
187
|
-
dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
|
|
222
|
+
For the p53.bin marker to be created, the session must not be undergoing processing. For the p53 marker
|
|
223
|
+
to be removed, the session must not be undergoing dataset integration.
|
|
188
224
|
|
|
189
225
|
Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
|
|
190
226
|
which is used to support further Sun lab data processing pipelines.
|