sl-shared-assets 3.1.2__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -8,16 +8,17 @@ from datetime import datetime
8
8
  import pytz
9
9
  import polars as pl
10
10
  from filelock import FileLock
11
- from ataraxis_base_utilities import console
11
+ from ataraxis_base_utilities import LogLevel, console
12
12
 
13
13
  from ..data_classes import (
14
14
  SessionData,
15
15
  SessionTypes,
16
- ProcessingTracker,
16
+ TrackerFileNames,
17
17
  RunTrainingDescriptor,
18
18
  LickTrainingDescriptor,
19
19
  WindowCheckingDescriptor,
20
20
  MesoscopeExperimentDescriptor,
21
+ get_processing_tracker,
21
22
  )
22
23
  from .packaging_tools import calculate_directory_checksum
23
24
 
@@ -149,28 +150,20 @@ class ProjectManifest:
149
150
  """
150
151
  return tuple(self._data.select("animal").unique().sort("animal").to_series().to_list())
151
152
 
152
- @property
153
- def sessions(self) -> tuple[str, ...]:
154
- """Returns all session IDs stored inside the manifest file.
155
-
156
- This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
157
- of the target project.
158
- """
159
- return tuple(self._data.select("session").sort("session").to_series().to_list())
160
-
161
- def get_sessions_for_animal(
153
+ def _get_filtered_sessions(
162
154
  self,
163
- animal: str | int,
155
+ animal: str | int | None = None,
164
156
  exclude_incomplete: bool = True,
165
157
  dataset_ready_only: bool = False,
166
158
  not_dataset_ready_only: bool = False,
167
159
  ) -> tuple[str, ...]:
168
- """Returns all session IDs for the target animal.
160
+ """This worker method is used to get a list of sessions with optional filtering.
169
161
 
170
- This provides a tuple of all sessions performed by the target animal as part of the target project.
162
+ User-facing methods call this worker under-the-hood to fetch the filtered tuple of sessions.
171
163
 
172
164
  Args:
173
- animal: The ID of the animal for which to get the session data.
165
+ animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
166
+ animals.
174
167
  exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
175
168
  list.
176
169
  dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
@@ -179,22 +172,27 @@ class ProjectManifest:
179
172
  as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
180
173
  enabled, the 'dataset_ready_only' option takes precedence.
181
174
 
175
+ Returns:
176
+ The tuple of session IDs matching the filter criteria.
177
+
182
178
  Raises:
183
179
  ValueError: If the specified animal is not found in the manifest file.
184
180
  """
181
+ data = self._data
185
182
 
186
- # Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
187
- if self._animal_string:
188
- animal = str(animal)
189
- else:
190
- animal = int(animal)
183
+ # Filter by animal if specified
184
+ if animal is not None:
185
+ # Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
186
+ if self._animal_string:
187
+ animal = str(animal)
188
+ else:
189
+ animal = int(animal)
191
190
 
192
- if animal not in self.animals:
193
- message = f"Animal ID '{animal}' not found in manifest. Available animals: {self.animals}"
194
- console.error(message=message, error=ValueError)
191
+ if animal not in self.animals:
192
+ message = f"Animal ID '{animal}' not found in the project manifest. Available animals: {self.animals}."
193
+ console.error(message=message, error=ValueError)
195
194
 
196
- # Filters by animal ID
197
- data = self._data.filter(pl.col("animal") == animal)
195
+ data = data.filter(pl.col("animal") == animal)
198
196
 
199
197
  # Optionally filters out incomplete sessions
200
198
  if exclude_incomplete:
@@ -210,6 +208,51 @@ class ProjectManifest:
210
208
  sessions = data.select("session").sort("session").to_series().to_list()
211
209
  return tuple(sessions)
212
210
 
211
+ @property
212
+ def sessions(self) -> tuple[str, ...]:
213
+ """Returns all session IDs stored inside the manifest file.
214
+
215
+ This property provides a tuple of all sessions, independent of the participating animal, that were recorded as
216
+ part of the target project. Use the get_sessions() method to get the list of session tuples with filtering.
217
+ """
218
+ return self._get_filtered_sessions(animal=None, exclude_incomplete=False)
219
+
220
+ def get_sessions(
221
+ self,
222
+ animal: str | int | None = None,
223
+ exclude_incomplete: bool = True,
224
+ dataset_ready_only: bool = False,
225
+ not_dataset_ready_only: bool = False,
226
+ ) -> tuple[str, ...]:
227
+ """Returns requested session IDs based on selected filtering criteria.
228
+
229
+ This method provides a tuple of sessions based on the specified filters. If no animal is specified, returns
230
+ sessions for all animals in the project.
231
+
232
+ Args:
233
+ animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
234
+ animals.
235
+ exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
236
+ list.
237
+ dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
238
+ the output list. Enabling this option only shows sessions that can be integrated into a dataset.
239
+ not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
240
+ as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
241
+ enabled, the 'dataset_ready_only' option takes precedence.
242
+
243
+ Returns:
244
+ The tuple of session IDs matching the filter criteria.
245
+
246
+ Raises:
247
+ ValueError: If the specified animal is not found in the manifest file.
248
+ """
249
+ return self._get_filtered_sessions(
250
+ animal=animal,
251
+ exclude_incomplete=exclude_incomplete,
252
+ dataset_ready_only=dataset_ready_only,
253
+ not_dataset_ready_only=not_dataset_ready_only,
254
+ )
255
+
213
256
  def get_session_info(self, session: str) -> pl.DataFrame:
214
257
  """Returns a Polars DataFrame that stores detailed information for the specified session.
215
258
 
@@ -237,7 +280,7 @@ def generate_project_manifest(
237
280
  This function evaluates the input project directory and builds the 'manifest' file for the project. The file
238
281
  includes the descriptive information about every session stored inside the input project folder and the state of
239
282
  the session's data processing (which processing pipelines have been applied to each session). The file will be
240
- created under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
283
+ created under the 'output_path' directory and use the following name pattern: ProjectName_manifest.feather.
241
284
 
242
285
  Notes:
243
286
  The manifest file is primarily used to capture and move project state information between machines, typically
@@ -362,12 +405,16 @@ def generate_project_manifest(
362
405
  manifest["notes"].append(descriptor.experimenter_notes)
363
406
  except Exception:
364
407
  manifest["notes"].append("N/A")
408
+ else:
409
+ manifest["notes"].append("N/A")
365
410
 
366
411
  # If the session raw_data folder contains the telomere.bin file, marks the session as complete.
367
412
  manifest["complete"].append(session_data.raw_data.telomere_path.exists())
368
413
 
369
414
  # Data verification status
370
- tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
415
+ tracker = get_processing_tracker(
416
+ root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY
417
+ )
371
418
  manifest["integrity"].append(tracker.is_complete)
372
419
 
373
420
  # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing
@@ -381,15 +428,21 @@ def generate_project_manifest(
381
428
  continue # Cycles to the next session
382
429
 
383
430
  # Suite2p (single-day) processing status.
384
- tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
431
+ tracker = get_processing_tracker(
432
+ file_name=TrackerFileNames.SUITE2P, root=session_data.processed_data.processed_data_path
433
+ )
385
434
  manifest["suite2p"].append(tracker.is_complete)
386
435
 
387
436
  # Behavior data processing status.
388
- tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
437
+ tracker = get_processing_tracker(
438
+ file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
439
+ )
389
440
  manifest["behavior"].append(tracker.is_complete)
390
441
 
391
442
  # DeepLabCut (video) processing status.
392
- tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
443
+ tracker = get_processing_tracker(
444
+ file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
445
+ )
393
446
  manifest["video"].append(tracker.is_complete)
394
447
 
395
448
  # Tracks whether the session's data is currently in the processing or dataset integration mode.
@@ -433,6 +486,7 @@ def generate_project_manifest(
433
486
 
434
487
  def verify_session_checksum(
435
488
  session_path: Path,
489
+ manager_id: int,
436
490
  create_processed_data_directory: bool = True,
437
491
  processed_data_root: None | Path = None,
438
492
  update_manifest: bool = False,
@@ -457,6 +511,8 @@ def verify_session_checksum(
457
511
  Args:
458
512
  session_path: The path to the session directory to be verified. Note, the input session directory must contain
459
513
  the 'raw_data' subdirectory.
514
+ manager_id: The xxHash-64 hash-value that specifies the unique identifier of the manager process that
515
+ manages the integrity verification runtime.
460
516
  create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
461
517
  processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
462
518
  the root directory where to store the processed data from all projects, and it will be automatically
@@ -474,14 +530,12 @@ def verify_session_checksum(
474
530
  )
475
531
 
476
532
  # Initializes the ProcessingTracker instance for the verification tracker file
477
- tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
533
+ tracker = get_processing_tracker(root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY)
534
+ console.echo(f"{tracker.file_path}")
478
535
 
479
536
  # Updates the tracker data to communicate that the verification process has started. This automatically clears
480
537
  # the previous 'completed' status.
481
- tracker.start()
482
-
483
- # Try starts here to allow for proper error-driven 'start' terminations of the tracker cannot acquire the lock for
484
- # a long time, or if another runtime is already underway.
538
+ tracker.start(manager_id=manager_id)
485
539
  try:
486
540
  # Re-calculates the checksum for the raw_data directory
487
541
  calculated_checksum = calculate_directory_checksum(
@@ -500,14 +554,14 @@ def verify_session_checksum(
500
554
 
501
555
  else:
502
556
  # Sets the tracker to indicate that the verification runtime completed successfully.
503
- tracker.stop()
557
+ tracker.stop(manager_id=manager_id)
504
558
 
505
559
  finally:
506
560
  # If the code reaches this section while the tracker indicates that the processing is still running,
507
561
  # this means that the verification runtime encountered an error. Configures the tracker to indicate that this
508
562
  # runtime finished with an error to prevent deadlocking the runtime.
509
563
  if tracker.is_running:
510
- tracker.error()
564
+ tracker.error(manager_id=manager_id)
511
565
 
512
566
  # If the runtime is configured to generate the project manifest file, attempts to generate and overwrite the
513
567
  # existing manifest file for the target project.
@@ -541,9 +595,8 @@ def resolve_p53_marker(
541
595
  from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
542
596
  that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
543
597
 
544
- For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
545
- p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
546
- dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
598
+ For the p53.bin marker to be created, the session must not be undergoing processing. For the p53 marker
599
+ to be removed, the session must not be undergoing dataset integration.
547
600
 
548
601
  Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
549
602
  which is used to support further Sun lab data processing pipelines.
@@ -568,19 +621,69 @@ def resolve_p53_marker(
568
621
  make_processed_data_directory=create_processed_data_directory,
569
622
  )
570
623
 
571
- # If the p53.bin marker exists and the runtime is configured to remove it, removes the marker file. If the runtime
572
- # is configured to create the marker, the method aborts the runtime (as the marker already exists).
573
- if session_data.processed_data.p53_path.exists():
574
- if remove:
575
- session_data.processed_data.p53_path.unlink()
576
- return # Ends remove runtime
624
+ # If the p53.bin marker exists and the runtime is configured to remove it, attempts to remove the marker file.
625
+ if session_data.processed_data.p53_path.exists() and remove:
626
+ # This section deals with a unique nuance related to the Sun lab processing server organization. Specifically,
627
+ # the user accounts are not allowed to modify or create files in the data directories owned by the service
628
+ # accounts. In turn, this prevents user accounts from modifying the processed data directory to indicate when
629
+ # they are running a dataset integration pipeline on the processed data. To work around this problem, the
630
+ # dataset integration pipeline now creates a 'semaphore' marker for each session that is currently being
631
+ # integrated into a dataset. This semaphore marker is stored under the root user working directory, inside the
632
+ # subdirectory called 'semaphore'.
633
+
634
+ # The parent of the shared sun-lab processed data directory is the root 'working' volume. All user directories
635
+ # are stored under this root working directory.
636
+ if processed_data_root is None:
637
+ # If the processed data root is not provided, sets it to the great-grandparent of the session directory.
638
+ # This works assuming that the data is stored under: root/project/animal/session.
639
+ processed_data_root = session_path.parents[2]
640
+ working_root = processed_data_root.parent
641
+
642
+ # Loops over each user directory and checks whether a semaphore marker exists for the processed session.
643
+ for directory in working_root.iterdir():
644
+ if (
645
+ len([marker for marker in directory.joinpath("semaphore").glob(f"*{session_data.session_name}.bin")])
646
+ > 0
647
+ ):
648
+ # Aborts with an error if the semaphore marker prevents the p53 marker from being removed.
649
+ message = (
650
+ f"Unable to remove the dataset marker for the session' {session_data.session_name}' acquired "
651
+ f"for the animal '{session_data.animal_id}' under the '{session_data.project_name}' project. "
652
+ f"The session data is currently being integrated into a dataset by the owner the "
653
+ f"'{directory.stem}' user directory. Wait until the ongoing dataset integration is complete and "
654
+ f"repeat the command that produced this error."
655
+ )
656
+ console.error(message=message, error=RuntimeError)
577
657
 
578
- return # Ends create runtime
658
+ # If the session does not have a corresponding semaphore marker in any user directories, removes the p53 marker
659
+ # file.
660
+ session_data.processed_data.p53_path.unlink()
661
+ message = (
662
+ f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
663
+ f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Removed."
664
+ )
665
+ console.echo(message=message, level=LogLevel.SUCCESS)
666
+ return # Ends remove runtime
579
667
 
580
- # If the marker does not exist and the function is called in 'remove' mode, aborts the runtime
581
- elif remove:
668
+ # If the marker does not exist and the function is called in 'remove' mode, aborts the runtime early
669
+ elif not session_data.processed_data.p53_path.exists() and remove:
670
+ message = (
671
+ f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
672
+ f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Does not exist. No actions "
673
+ f"taken."
674
+ )
675
+ console.echo(message=message, level=LogLevel.SUCCESS)
582
676
  return # Ends remove runtime
583
677
 
678
+ elif session_data.processed_data.p53_path.exists():
679
+ message = (
680
+ f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
681
+ f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Already exists. No actions "
682
+ f"taken."
683
+ )
684
+ console.echo(message=message, level=LogLevel.SUCCESS)
685
+ return # Ends create runtime
686
+
584
687
  # The rest of the runtime deals with determining whether it is safe to create the marker file.
585
688
  # Queries the type of the processed session
586
689
  session_type = session_data.session_type
@@ -588,30 +691,61 @@ def resolve_p53_marker(
588
691
  # Window checking sessions are not designed to be integrated into datasets, so they cannot be marked with the
589
692
  # p53.bin file. Similarly, any incomplete session is automatically excluded from dataset formation.
590
693
  if session_type == SessionTypes.WINDOW_CHECKING or not session_data.raw_data.telomere_path.exists():
591
- return
694
+ message = (
695
+ f"Unable to generate the dataset marker for the session '{session_data.session_name}' acquired for the "
696
+ f"animal '{session_data.animal_id}' under the '{session_data.project_name}' project, as the session is "
697
+ f"incomplete or is of Window Checking type. These sessions must be manually evaluated and marked for "
698
+ f"dataset inclusion by the experimenter. "
699
+ )
700
+ console.error(message=message, error=RuntimeError)
592
701
 
593
702
  # Training sessions collect similar data and share processing pipeline requirements
703
+ error: bool = False
594
704
  if session_type == SessionTypes.LICK_TRAINING or session_type == SessionTypes.RUN_TRAINING:
595
705
  # Ensures that the session is not being processed with one of the supported pipelines.
596
- behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
597
- video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
706
+ behavior_tracker = get_processing_tracker(
707
+ file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
708
+ )
709
+ video_tracker = get_processing_tracker(
710
+ file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
711
+ )
598
712
  if behavior_tracker.is_running or video_tracker.is_running:
599
713
  # Note, training runtimes do not require suite2p processing.
600
- return
714
+ error = True
601
715
 
602
716
  # Mesoscope experiment sessions require additional processing with suite2p
603
- if session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
604
- behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
605
- suite2p_tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
606
- video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
717
+ elif session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
718
+ behavior_tracker = get_processing_tracker(
719
+ file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
720
+ )
721
+ suite2p_tracker = get_processing_tracker(
722
+ file_name=TrackerFileNames.SUITE2P, root=session_data.processed_data.processed_data_path
723
+ )
724
+ video_tracker = get_processing_tracker(
725
+ file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
726
+ )
727
+ console.echo(f"{behavior_tracker.is_running}")
728
+ if behavior_tracker.is_running or video_tracker.is_running or suite2p_tracker.is_running:
729
+ error = True
607
730
 
608
- # Similar to the above, ensures that the session is not being processed with one of the supported pipelines.
609
- if behavior_tracker.is_running or suite2p_tracker.is_running or video_tracker.is_running:
610
- return
731
+ # If the session is currently being processed by one or more pipelines, aborts with an error.
732
+ if error:
733
+ message = (
734
+ f"Unable to generate the dataset marker for the session '{session_data.session_name}' acquired for the "
735
+ f"animal '{session_data.animal_id}' under the '{session_data.project_name}' project, as it is "
736
+ f"currently being processed by one of the data processing pipelines. Wait until the session is fully "
737
+ f"processed by all pipelines and repeat the command that encountered this error."
738
+ )
739
+ console.error(message=message, error=RuntimeError)
611
740
 
612
741
  # If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
613
742
  # file, preventing the session from being processed again as long as the marker exists.
614
743
  session_data.processed_data.p53_path.touch()
744
+ message = (
745
+ f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
746
+ f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Created."
747
+ )
748
+ console.echo(message=message, level=LogLevel.SUCCESS)
615
749
 
616
750
  # If the runtime is configured to generate the project manifest file, attempts to generate and overwrite the
617
751
  # existing manifest file for the target project.
@@ -5,11 +5,12 @@ import polars as pl
5
5
  from ..data_classes import (
6
6
  SessionData as SessionData,
7
7
  SessionTypes as SessionTypes,
8
- ProcessingTracker as ProcessingTracker,
8
+ TrackerFileNames as TrackerFileNames,
9
9
  RunTrainingDescriptor as RunTrainingDescriptor,
10
10
  LickTrainingDescriptor as LickTrainingDescriptor,
11
11
  WindowCheckingDescriptor as WindowCheckingDescriptor,
12
12
  MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
13
+ get_processing_tracker as get_processing_tracker,
13
14
  )
14
15
  from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
15
16
 
@@ -61,26 +62,56 @@ class ProjectManifest:
61
62
 
62
63
  This provides a tuple of all animal IDs participating in the target project.
63
64
  """
65
+ def _get_filtered_sessions(
66
+ self,
67
+ animal: str | int | None = None,
68
+ exclude_incomplete: bool = True,
69
+ dataset_ready_only: bool = False,
70
+ not_dataset_ready_only: bool = False,
71
+ ) -> tuple[str, ...]:
72
+ """This worker method is used to get a list of sessions with optional filtering.
73
+
74
+ User-facing methods call this worker under-the-hood to fetch the filtered tuple of sessions.
75
+
76
+ Args:
77
+ animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
78
+ animals.
79
+ exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
80
+ list.
81
+ dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
82
+ the output list. Enabling this option only shows sessions that can be integrated into a dataset.
83
+ not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
84
+ as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
85
+ enabled, the 'dataset_ready_only' option takes precedence.
86
+
87
+ Returns:
88
+ The tuple of session IDs matching the filter criteria.
89
+
90
+ Raises:
91
+ ValueError: If the specified animal is not found in the manifest file.
92
+ """
64
93
  @property
65
94
  def sessions(self) -> tuple[str, ...]:
66
95
  """Returns all session IDs stored inside the manifest file.
67
96
 
68
- This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
69
- of the target project.
97
+ This property provides a tuple of all sessions, independent of the participating animal, that were recorded as
98
+ part of the target project. Use the get_sessions() method to get the list of session tuples with filtering.
70
99
  """
71
- def get_sessions_for_animal(
100
+ def get_sessions(
72
101
  self,
73
- animal: str | int,
102
+ animal: str | int | None = None,
74
103
  exclude_incomplete: bool = True,
75
104
  dataset_ready_only: bool = False,
76
105
  not_dataset_ready_only: bool = False,
77
106
  ) -> tuple[str, ...]:
78
- """Returns all session IDs for the target animal.
107
+ """Returns requested session IDs based on selected filtering criteria.
79
108
 
80
- This provides a tuple of all sessions performed by the target animal as part of the target project.
109
+ This method provides a tuple of sessions based on the specified filters. If no animal is specified, returns
110
+ sessions for all animals in the project.
81
111
 
82
112
  Args:
83
- animal: The ID of the animal for which to get the session data.
113
+ animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
114
+ animals.
84
115
  exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
85
116
  list.
86
117
  dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
@@ -89,6 +120,9 @@ class ProjectManifest:
89
120
  as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
90
121
  enabled, the 'dataset_ready_only' option takes precedence.
91
122
 
123
+ Returns:
124
+ The tuple of session IDs matching the filter criteria.
125
+
92
126
  Raises:
93
127
  ValueError: If the specified animal is not found in the manifest file.
94
128
  """
@@ -114,7 +148,7 @@ def generate_project_manifest(
114
148
  This function evaluates the input project directory and builds the 'manifest' file for the project. The file
115
149
  includes the descriptive information about every session stored inside the input project folder and the state of
116
150
  the session's data processing (which processing pipelines have been applied to each session). The file will be
117
- created under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
151
+ created under the 'output_path' directory and use the following name pattern: ProjectName_manifest.feather.
118
152
 
119
153
  Notes:
120
154
  The manifest file is primarily used to capture and move project state information between machines, typically
@@ -132,6 +166,7 @@ def generate_project_manifest(
132
166
 
133
167
  def verify_session_checksum(
134
168
  session_path: Path,
169
+ manager_id: int,
135
170
  create_processed_data_directory: bool = True,
136
171
  processed_data_root: None | Path = None,
137
172
  update_manifest: bool = False,
@@ -156,6 +191,8 @@ def verify_session_checksum(
156
191
  Args:
157
192
  session_path: The path to the session directory to be verified. Note, the input session directory must contain
158
193
  the 'raw_data' subdirectory.
194
+ manager_id: The xxHash-64 hash-value that specifies the unique identifier of the manager process that
195
+ manages the integrity verification runtime.
159
196
  create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
160
197
  processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
161
198
  the root directory where to store the processed data from all projects, and it will be automatically
@@ -182,9 +219,8 @@ def resolve_p53_marker(
182
219
  from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
183
220
  that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
184
221
 
185
- For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
186
- p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
187
- dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
222
+ For the p53.bin marker to be created, the session must not be undergoing processing. For the p53 marker
223
+ to be removed, the session must not be undergoing dataset integration.
188
224
 
189
225
  Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
190
226
  which is used to support further Sun lab data processing pipelines.