sl-shared-assets 3.1.3__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -13,11 +13,12 @@ from ataraxis_base_utilities import LogLevel, console
13
13
  from ..data_classes import (
14
14
  SessionData,
15
15
  SessionTypes,
16
- ProcessingTracker,
16
+ TrackerFileNames,
17
17
  RunTrainingDescriptor,
18
18
  LickTrainingDescriptor,
19
19
  WindowCheckingDescriptor,
20
20
  MesoscopeExperimentDescriptor,
21
+ get_processing_tracker,
21
22
  )
22
23
  from .packaging_tools import calculate_directory_checksum
23
24
 
@@ -149,28 +150,20 @@ class ProjectManifest:
149
150
  """
150
151
  return tuple(self._data.select("animal").unique().sort("animal").to_series().to_list())
151
152
 
152
- @property
153
- def sessions(self) -> tuple[str, ...]:
154
- """Returns all session IDs stored inside the manifest file.
155
-
156
- This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
157
- of the target project.
158
- """
159
- return tuple(self._data.select("session").sort("session").to_series().to_list())
160
-
161
- def get_sessions_for_animal(
153
+ def _get_filtered_sessions(
162
154
  self,
163
- animal: str | int,
155
+ animal: str | int | None = None,
164
156
  exclude_incomplete: bool = True,
165
157
  dataset_ready_only: bool = False,
166
158
  not_dataset_ready_only: bool = False,
167
159
  ) -> tuple[str, ...]:
168
- """Returns all session IDs for the target animal.
160
+ """This worker method is used to get a list of sessions with optional filtering.
169
161
 
170
- This provides a tuple of all sessions performed by the target animal as part of the target project.
162
+ User-facing methods call this worker under-the-hood to fetch the filtered tuple of sessions.
171
163
 
172
164
  Args:
173
- animal: The ID of the animal for which to get the session data.
165
+ animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
166
+ animals.
174
167
  exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
175
168
  list.
176
169
  dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
@@ -179,22 +172,27 @@ class ProjectManifest:
179
172
  as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
180
173
  enabled, the 'dataset_ready_only' option takes precedence.
181
174
 
175
+ Returns:
176
+ The tuple of session IDs matching the filter criteria.
177
+
182
178
  Raises:
183
179
  ValueError: If the specified animal is not found in the manifest file.
184
180
  """
181
+ data = self._data
185
182
 
186
- # Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
187
- if self._animal_string:
188
- animal = str(animal)
189
- else:
190
- animal = int(animal)
183
+ # Filter by animal if specified
184
+ if animal is not None:
185
+ # Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
186
+ if self._animal_string:
187
+ animal = str(animal)
188
+ else:
189
+ animal = int(animal)
191
190
 
192
- if animal not in self.animals:
193
- message = f"Animal ID '{animal}' not found in manifest. Available animals: {self.animals}"
194
- console.error(message=message, error=ValueError)
191
+ if animal not in self.animals:
192
+ message = f"Animal ID '{animal}' not found in the project manifest. Available animals: {self.animals}."
193
+ console.error(message=message, error=ValueError)
195
194
 
196
- # Filters by animal ID
197
- data = self._data.filter(pl.col("animal") == animal)
195
+ data = data.filter(pl.col("animal") == animal)
198
196
 
199
197
  # Optionally filters out incomplete sessions
200
198
  if exclude_incomplete:
@@ -210,6 +208,51 @@ class ProjectManifest:
210
208
  sessions = data.select("session").sort("session").to_series().to_list()
211
209
  return tuple(sessions)
212
210
 
211
+ @property
212
+ def sessions(self) -> tuple[str, ...]:
213
+ """Returns all session IDs stored inside the manifest file.
214
+
215
+ This property provides a tuple of all sessions, independent of the participating animal, that were recorded as
216
+ part of the target project. Use the get_sessions() method to get the list of session tuples with filtering.
217
+ """
218
+ return self._get_filtered_sessions(animal=None, exclude_incomplete=False)
219
+
220
+ def get_sessions(
221
+ self,
222
+ animal: str | int | None = None,
223
+ exclude_incomplete: bool = True,
224
+ dataset_ready_only: bool = False,
225
+ not_dataset_ready_only: bool = False,
226
+ ) -> tuple[str, ...]:
227
+ """Returns requested session IDs based on selected filtering criteria.
228
+
229
+ This method provides a tuple of sessions based on the specified filters. If no animal is specified, returns
230
+ sessions for all animals in the project.
231
+
232
+ Args:
233
+ animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
234
+ animals.
235
+ exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
236
+ list.
237
+ dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
238
+ the output list. Enabling this option only shows sessions that can be integrated into a dataset.
239
+ not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
240
+ as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
241
+ enabled, the 'dataset_ready_only' option takes precedence.
242
+
243
+ Returns:
244
+ The tuple of session IDs matching the filter criteria.
245
+
246
+ Raises:
247
+ ValueError: If the specified animal is not found in the manifest file.
248
+ """
249
+ return self._get_filtered_sessions(
250
+ animal=animal,
251
+ exclude_incomplete=exclude_incomplete,
252
+ dataset_ready_only=dataset_ready_only,
253
+ not_dataset_ready_only=not_dataset_ready_only,
254
+ )
255
+
213
256
  def get_session_info(self, session: str) -> pl.DataFrame:
214
257
  """Returns a Polars DataFrame that stores detailed information for the specified session.
215
258
 
@@ -237,7 +280,7 @@ def generate_project_manifest(
237
280
  This function evaluates the input project directory and builds the 'manifest' file for the project. The file
238
281
  includes the descriptive information about every session stored inside the input project folder and the state of
239
282
  the session's data processing (which processing pipelines have been applied to each session). The file will be
240
- created under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
283
+ created under the 'output_path' directory and use the following name pattern: ProjectName_manifest.feather.
241
284
 
242
285
  Notes:
243
286
  The manifest file is primarily used to capture and move project state information between machines, typically
@@ -369,7 +412,9 @@ def generate_project_manifest(
369
412
  manifest["complete"].append(session_data.raw_data.telomere_path.exists())
370
413
 
371
414
  # Data verification status
372
- tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
415
+ tracker = get_processing_tracker(
416
+ root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY
417
+ )
373
418
  manifest["integrity"].append(tracker.is_complete)
374
419
 
375
420
  # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing
@@ -383,15 +428,21 @@ def generate_project_manifest(
383
428
  continue # Cycles to the next session
384
429
 
385
430
  # Suite2p (single-day) processing status.
386
- tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
431
+ tracker = get_processing_tracker(
432
+ file_name=TrackerFileNames.SUITE2P, root=session_data.processed_data.processed_data_path
433
+ )
387
434
  manifest["suite2p"].append(tracker.is_complete)
388
435
 
389
436
  # Behavior data processing status.
390
- tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
437
+ tracker = get_processing_tracker(
438
+ file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
439
+ )
391
440
  manifest["behavior"].append(tracker.is_complete)
392
441
 
393
442
  # DeepLabCut (video) processing status.
394
- tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
443
+ tracker = get_processing_tracker(
444
+ file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
445
+ )
395
446
  manifest["video"].append(tracker.is_complete)
396
447
 
397
448
  # Tracks whether the session's data is currently in the processing or dataset integration mode.
@@ -435,6 +486,7 @@ def generate_project_manifest(
435
486
 
436
487
  def verify_session_checksum(
437
488
  session_path: Path,
489
+ manager_id: int,
438
490
  create_processed_data_directory: bool = True,
439
491
  processed_data_root: None | Path = None,
440
492
  update_manifest: bool = False,
@@ -459,6 +511,8 @@ def verify_session_checksum(
459
511
  Args:
460
512
  session_path: The path to the session directory to be verified. Note, the input session directory must contain
461
513
  the 'raw_data' subdirectory.
514
+ manager_id: The xxHash-64 hash-value that specifies the unique identifier of the manager process that
515
+ manages the integrity verification runtime.
462
516
  create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
463
517
  processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
464
518
  the root directory where to store the processed data from all projects, and it will be automatically
@@ -476,14 +530,12 @@ def verify_session_checksum(
476
530
  )
477
531
 
478
532
  # Initializes the ProcessingTracker instance for the verification tracker file
479
- tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
533
+ tracker = get_processing_tracker(root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY)
534
+ console.echo(f"{tracker.file_path}")
480
535
 
481
536
  # Updates the tracker data to communicate that the verification process has started. This automatically clears
482
537
  # the previous 'completed' status.
483
- tracker.start()
484
-
485
- # Try starts here to allow for proper error-driven 'start' terminations of the tracker cannot acquire the lock for
486
- # a long time, or if another runtime is already underway.
538
+ tracker.start(manager_id=manager_id)
487
539
  try:
488
540
  # Re-calculates the checksum for the raw_data directory
489
541
  calculated_checksum = calculate_directory_checksum(
@@ -502,14 +554,14 @@ def verify_session_checksum(
502
554
 
503
555
  else:
504
556
  # Sets the tracker to indicate that the verification runtime completed successfully.
505
- tracker.stop()
557
+ tracker.stop(manager_id=manager_id)
506
558
 
507
559
  finally:
508
560
  # If the code reaches this section while the tracker indicates that the processing is still running,
509
561
  # this means that the verification runtime encountered an error. Configures the tracker to indicate that this
510
562
  # runtime finished with an error to prevent deadlocking the runtime.
511
563
  if tracker.is_running:
512
- tracker.error()
564
+ tracker.error(manager_id=manager_id)
513
565
 
514
566
  # If the runtime is configured to generate the project manifest file, attempts to generate and overwrite the
515
567
  # existing manifest file for the target project.
@@ -543,9 +595,8 @@ def resolve_p53_marker(
543
595
  from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
544
596
  that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
545
597
 
546
- For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
547
- p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
548
- dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
598
+ For the p53.bin marker to be created, the session must not be undergoing processing. For the p53 marker
599
+ to be removed, the session must not be undergoing dataset integration.
549
600
 
550
601
  Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
551
602
  which is used to support further Sun lab data processing pipelines.
@@ -570,34 +621,69 @@ def resolve_p53_marker(
570
621
  make_processed_data_directory=create_processed_data_directory,
571
622
  )
572
623
 
573
- # If the p53.bin marker exists and the runtime is configured to remove it, removes the marker file. If the runtime
574
- # is configured to create the marker, the method aborts the runtime (as the marker already exists).
575
- if session_data.processed_data.p53_path.exists():
576
- if remove:
577
- session_data.processed_data.p53_path.unlink()
578
- message = (
579
- f"Dataset marker for the session {session_data.session_name} acquired for the animal "
580
- f"{session_data.animal_id} and {session_data.project_name} project: Removed."
581
- )
582
- console.echo(message=message, level=LogLevel.SUCCESS)
583
- return # Ends remove runtime
624
+ # If the p53.bin marker exists and the runtime is configured to remove it, attempts to remove the marker file.
625
+ if session_data.processed_data.p53_path.exists() and remove:
626
+ # This section deals with a unique nuance related to the Sun lab processing server organization. Specifically,
627
+ # the user accounts are not allowed to modify or create files in the data directories owned by the service
628
+ # accounts. In turn, this prevents user accounts from modifying the processed data directory to indicate when
629
+ # they are running a dataset integration pipeline on the processed data. To work around this problem, the
630
+ # dataset integration pipeline now creates a 'semaphore' marker for each session that is currently being
631
+ # integrated into a dataset. This semaphore marker is stored under the root user working directory, inside the
632
+ # subdirectory called 'semaphore'.
633
+
634
+ # The parent of the shared sun-lab processed data directory is the root 'working' volume. All user directories
635
+ # are stored under this root working directory.
636
+ if processed_data_root is None:
637
+ # If the processed data root is not provided, sets it to the great-grandparent of the session directory.
638
+ # This works assuming that the data is stored under: root/project/animal/session.
639
+ processed_data_root = session_path.parents[2]
640
+ working_root = processed_data_root.parent
641
+
642
+ # Loops over each user directory and checks whether a semaphore marker exists for the processed session.
643
+ for directory in working_root.iterdir():
644
+ if (
645
+ len([marker for marker in directory.joinpath("semaphore").glob(f"*{session_data.session_name}.bin")])
646
+ > 0
647
+ ):
648
+ # Aborts with an error if the semaphore marker prevents the p53 marker from being removed.
649
+ message = (
650
+ f"Unable to remove the dataset marker for the session' {session_data.session_name}' acquired "
651
+ f"for the animal '{session_data.animal_id}' under the '{session_data.project_name}' project. "
652
+ f"The session data is currently being integrated into a dataset by the owner the "
653
+ f"'{directory.stem}' user directory. Wait until the ongoing dataset integration is complete and "
654
+ f"repeat the command that produced this error."
655
+ )
656
+ console.error(message=message, error=RuntimeError)
584
657
 
658
+ # If the session does not have a corresponding semaphore marker in any user directories, removes the p53 marker
659
+ # file.
660
+ session_data.processed_data.p53_path.unlink()
585
661
  message = (
586
- f"Dataset marker for the session {session_data.session_name} acquired for the animal "
587
- f"{session_data.animal_id} and {session_data.project_name} project: Already exists. No actions taken."
662
+ f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
663
+ f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Removed."
588
664
  )
589
665
  console.echo(message=message, level=LogLevel.SUCCESS)
590
- return # Ends create runtime
666
+ return # Ends remove runtime
591
667
 
592
- # If the marker does not exist and the function is called in 'remove' mode, aborts the runtime
593
- elif remove:
668
+ # If the marker does not exist and the function is called in 'remove' mode, aborts the runtime early
669
+ elif not session_data.processed_data.p53_path.exists() and remove:
594
670
  message = (
595
- f"Dataset marker for the session {session_data.session_name} acquired for the animal "
596
- f"{session_data.animal_id} and {session_data.project_name} project: Does not exist. No actions taken."
671
+ f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
672
+ f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Does not exist. No actions "
673
+ f"taken."
597
674
  )
598
675
  console.echo(message=message, level=LogLevel.SUCCESS)
599
676
  return # Ends remove runtime
600
677
 
678
+ elif session_data.processed_data.p53_path.exists():
679
+ message = (
680
+ f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
681
+ f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Already exists. No actions "
682
+ f"taken."
683
+ )
684
+ console.echo(message=message, level=LogLevel.SUCCESS)
685
+ return # Ends create runtime
686
+
601
687
  # The rest of the runtime deals with determining whether it is safe to create the marker file.
602
688
  # Queries the type of the processed session
603
689
  session_type = session_data.session_type
@@ -606,53 +692,58 @@ def resolve_p53_marker(
606
692
  # p53.bin file. Similarly, any incomplete session is automatically excluded from dataset formation.
607
693
  if session_type == SessionTypes.WINDOW_CHECKING or not session_data.raw_data.telomere_path.exists():
608
694
  message = (
609
- f"Unable to generate the dataset marker for the session {session_data.session_name} acquired for the "
610
- f"animal {session_data.animal_id} and {session_data.project_name} project, as the session is incomplete or "
611
- f"is of Window Checking type. These sessions must be manually evaluated and marked for dataset inclusion "
612
- f"by the experimenter. "
695
+ f"Unable to generate the dataset marker for the session '{session_data.session_name}' acquired for the "
696
+ f"animal '{session_data.animal_id}' under the '{session_data.project_name}' project, as the session is "
697
+ f"incomplete or is of Window Checking type. These sessions must be manually evaluated and marked for "
698
+ f"dataset inclusion by the experimenter. "
613
699
  )
614
- console.echo(message=message, level=LogLevel.ERROR)
615
- return
700
+ console.error(message=message, error=RuntimeError)
616
701
 
617
702
  # Training sessions collect similar data and share processing pipeline requirements
703
+ error: bool = False
618
704
  if session_type == SessionTypes.LICK_TRAINING or session_type == SessionTypes.RUN_TRAINING:
619
705
  # Ensures that the session is not being processed with one of the supported pipelines.
620
- behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
621
- video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
706
+ behavior_tracker = get_processing_tracker(
707
+ file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
708
+ )
709
+ video_tracker = get_processing_tracker(
710
+ file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
711
+ )
622
712
  if behavior_tracker.is_running or video_tracker.is_running:
623
713
  # Note, training runtimes do not require suite2p processing.
624
- message = (
625
- f"Unable to generate the dataset marker for the session {session_data.session_name} acquired for the "
626
- f"animal {session_data.animal_id} and {session_data.project_name} project, as it is currently being "
627
- f"processed by one of the data processing pipelines. Wait until the session is fully processed by all "
628
- f"pipelines and repeat the command that encountered this error."
629
- )
630
- console.echo(message=message, level=LogLevel.ERROR)
631
- return
714
+ error = True
632
715
 
633
716
  # Mesoscope experiment sessions require additional processing with suite2p
634
- if session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
635
- behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
636
- suite2p_tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
637
- video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
638
-
639
- # Similar to the above, ensures that the session is not being processed with one of the supported pipelines.
640
- if behavior_tracker.is_running or suite2p_tracker.is_running or video_tracker.is_running:
641
- message = (
642
- f"Unable to generate the dataset marker for the session {session_data.session_name} acquired for the "
643
- f"animal {session_data.animal_id} and {session_data.project_name} project, as it is currently being "
644
- f"processed by one of the data processing pipelines. Wait until the session is fully processed by all "
645
- f"pipelines and repeat the command that encountered this error."
646
- )
647
- console.echo(message=message, level=LogLevel.ERROR)
648
- return
717
+ elif session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
718
+ behavior_tracker = get_processing_tracker(
719
+ file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
720
+ )
721
+ suite2p_tracker = get_processing_tracker(
722
+ file_name=TrackerFileNames.SUITE2P, root=session_data.processed_data.processed_data_path
723
+ )
724
+ video_tracker = get_processing_tracker(
725
+ file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
726
+ )
727
+ console.echo(f"{behavior_tracker.is_running}")
728
+ if behavior_tracker.is_running or video_tracker.is_running or suite2p_tracker.is_running:
729
+ error = True
730
+
731
+ # If the session is currently being processed by one or more pipelines, aborts with an error.
732
+ if error:
733
+ message = (
734
+ f"Unable to generate the dataset marker for the session '{session_data.session_name}' acquired for the "
735
+ f"animal '{session_data.animal_id}' under the '{session_data.project_name}' project, as it is "
736
+ f"currently being processed by one of the data processing pipelines. Wait until the session is fully "
737
+ f"processed by all pipelines and repeat the command that encountered this error."
738
+ )
739
+ console.error(message=message, error=RuntimeError)
649
740
 
650
741
  # If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
651
742
  # file, preventing the session from being processed again as long as the marker exists.
652
743
  session_data.processed_data.p53_path.touch()
653
744
  message = (
654
- f"Dataset marker for the session {session_data.session_name} acquired for the animal "
655
- f"{session_data.animal_id} and {session_data.project_name} project: Created."
745
+ f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
746
+ f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Created."
656
747
  )
657
748
  console.echo(message=message, level=LogLevel.SUCCESS)
658
749
 
@@ -5,11 +5,12 @@ import polars as pl
5
5
  from ..data_classes import (
6
6
  SessionData as SessionData,
7
7
  SessionTypes as SessionTypes,
8
- ProcessingTracker as ProcessingTracker,
8
+ TrackerFileNames as TrackerFileNames,
9
9
  RunTrainingDescriptor as RunTrainingDescriptor,
10
10
  LickTrainingDescriptor as LickTrainingDescriptor,
11
11
  WindowCheckingDescriptor as WindowCheckingDescriptor,
12
12
  MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
13
+ get_processing_tracker as get_processing_tracker,
13
14
  )
14
15
  from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
15
16
 
@@ -61,26 +62,56 @@ class ProjectManifest:
61
62
 
62
63
  This provides a tuple of all animal IDs participating in the target project.
63
64
  """
65
+ def _get_filtered_sessions(
66
+ self,
67
+ animal: str | int | None = None,
68
+ exclude_incomplete: bool = True,
69
+ dataset_ready_only: bool = False,
70
+ not_dataset_ready_only: bool = False,
71
+ ) -> tuple[str, ...]:
72
+ """This worker method is used to get a list of sessions with optional filtering.
73
+
74
+ User-facing methods call this worker under-the-hood to fetch the filtered tuple of sessions.
75
+
76
+ Args:
77
+ animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
78
+ animals.
79
+ exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
80
+ list.
81
+ dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
82
+ the output list. Enabling this option only shows sessions that can be integrated into a dataset.
83
+ not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
84
+ as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
85
+ enabled, the 'dataset_ready_only' option takes precedence.
86
+
87
+ Returns:
88
+ The tuple of session IDs matching the filter criteria.
89
+
90
+ Raises:
91
+ ValueError: If the specified animal is not found in the manifest file.
92
+ """
64
93
  @property
65
94
  def sessions(self) -> tuple[str, ...]:
66
95
  """Returns all session IDs stored inside the manifest file.
67
96
 
68
- This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
69
- of the target project.
97
+ This property provides a tuple of all sessions, independent of the participating animal, that were recorded as
98
+ part of the target project. Use the get_sessions() method to get the list of session tuples with filtering.
70
99
  """
71
- def get_sessions_for_animal(
100
+ def get_sessions(
72
101
  self,
73
- animal: str | int,
102
+ animal: str | int | None = None,
74
103
  exclude_incomplete: bool = True,
75
104
  dataset_ready_only: bool = False,
76
105
  not_dataset_ready_only: bool = False,
77
106
  ) -> tuple[str, ...]:
78
- """Returns all session IDs for the target animal.
107
+ """Returns requested session IDs based on selected filtering criteria.
79
108
 
80
- This provides a tuple of all sessions performed by the target animal as part of the target project.
109
+ This method provides a tuple of sessions based on the specified filters. If no animal is specified, returns
110
+ sessions for all animals in the project.
81
111
 
82
112
  Args:
83
- animal: The ID of the animal for which to get the session data.
113
+ animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
114
+ animals.
84
115
  exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
85
116
  list.
86
117
  dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
@@ -89,6 +120,9 @@ class ProjectManifest:
89
120
  as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
90
121
  enabled, the 'dataset_ready_only' option takes precedence.
91
122
 
123
+ Returns:
124
+ The tuple of session IDs matching the filter criteria.
125
+
92
126
  Raises:
93
127
  ValueError: If the specified animal is not found in the manifest file.
94
128
  """
@@ -114,7 +148,7 @@ def generate_project_manifest(
114
148
  This function evaluates the input project directory and builds the 'manifest' file for the project. The file
115
149
  includes the descriptive information about every session stored inside the input project folder and the state of
116
150
  the session's data processing (which processing pipelines have been applied to each session). The file will be
117
- created under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
151
+ created under the 'output_path' directory and use the following name pattern: ProjectName_manifest.feather.
118
152
 
119
153
  Notes:
120
154
  The manifest file is primarily used to capture and move project state information between machines, typically
@@ -132,6 +166,7 @@ def generate_project_manifest(
132
166
 
133
167
  def verify_session_checksum(
134
168
  session_path: Path,
169
+ manager_id: int,
135
170
  create_processed_data_directory: bool = True,
136
171
  processed_data_root: None | Path = None,
137
172
  update_manifest: bool = False,
@@ -156,6 +191,8 @@ def verify_session_checksum(
156
191
  Args:
157
192
  session_path: The path to the session directory to be verified. Note, the input session directory must contain
158
193
  the 'raw_data' subdirectory.
194
+ manager_id: The xxHash-64 hash-value that specifies the unique identifier of the manager process that
195
+ manages the integrity verification runtime.
159
196
  create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
160
197
  processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
161
198
  the root directory where to store the processed data from all projects, and it will be automatically
@@ -182,9 +219,8 @@ def resolve_p53_marker(
182
219
  from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
183
220
  that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
184
221
 
185
- For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
186
- p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
187
- dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
222
+ For the p53.bin marker to be created, the session must not be undergoing processing. For the p53 marker
223
+ to be removed, the session must not be undergoing dataset integration.
188
224
 
189
225
  Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
190
226
  which is used to support further Sun lab data processing pipelines.