sl-shared-assets 3.1.2__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +8 -0
- sl_shared_assets/__init__.pyi +8 -0
- sl_shared_assets/cli.py +41 -24
- sl_shared_assets/cli.pyi +9 -4
- sl_shared_assets/data_classes/__init__.py +13 -1
- sl_shared_assets/data_classes/__init__.pyi +6 -0
- sl_shared_assets/data_classes/configuration_data.py +1 -1
- sl_shared_assets/data_classes/runtime_data.py +20 -9
- sl_shared_assets/data_classes/runtime_data.pyi +7 -4
- sl_shared_assets/data_classes/session_data.py +271 -232
- sl_shared_assets/data_classes/session_data.pyi +113 -36
- sl_shared_assets/data_classes/surgery_data.py +1 -1
- sl_shared_assets/data_classes/surgery_data.pyi +1 -1
- sl_shared_assets/server/server.py +41 -4
- sl_shared_assets/server/server.pyi +13 -1
- sl_shared_assets/tools/packaging_tools.py +7 -8
- sl_shared_assets/tools/packaging_tools.pyi +2 -0
- sl_shared_assets/tools/project_management_tools.py +195 -61
- sl_shared_assets/tools/project_management_tools.pyi +48 -12
- {sl_shared_assets-3.1.2.dist-info → sl_shared_assets-4.0.0.dist-info}/METADATA +45 -48
- sl_shared_assets-4.0.0.dist-info/RECORD +36 -0
- sl_shared_assets-3.1.2.dist-info/RECORD +0 -36
- {sl_shared_assets-3.1.2.dist-info → sl_shared_assets-4.0.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-3.1.2.dist-info → sl_shared_assets-4.0.0.dist-info}/entry_points.txt +0 -0
- {sl_shared_assets-3.1.2.dist-info → sl_shared_assets-4.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,16 +8,17 @@ from datetime import datetime
|
|
|
8
8
|
import pytz
|
|
9
9
|
import polars as pl
|
|
10
10
|
from filelock import FileLock
|
|
11
|
-
from ataraxis_base_utilities import console
|
|
11
|
+
from ataraxis_base_utilities import LogLevel, console
|
|
12
12
|
|
|
13
13
|
from ..data_classes import (
|
|
14
14
|
SessionData,
|
|
15
15
|
SessionTypes,
|
|
16
|
-
|
|
16
|
+
TrackerFileNames,
|
|
17
17
|
RunTrainingDescriptor,
|
|
18
18
|
LickTrainingDescriptor,
|
|
19
19
|
WindowCheckingDescriptor,
|
|
20
20
|
MesoscopeExperimentDescriptor,
|
|
21
|
+
get_processing_tracker,
|
|
21
22
|
)
|
|
22
23
|
from .packaging_tools import calculate_directory_checksum
|
|
23
24
|
|
|
@@ -149,28 +150,20 @@ class ProjectManifest:
|
|
|
149
150
|
"""
|
|
150
151
|
return tuple(self._data.select("animal").unique().sort("animal").to_series().to_list())
|
|
151
152
|
|
|
152
|
-
|
|
153
|
-
def sessions(self) -> tuple[str, ...]:
|
|
154
|
-
"""Returns all session IDs stored inside the manifest file.
|
|
155
|
-
|
|
156
|
-
This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
|
|
157
|
-
of the target project.
|
|
158
|
-
"""
|
|
159
|
-
return tuple(self._data.select("session").sort("session").to_series().to_list())
|
|
160
|
-
|
|
161
|
-
def get_sessions_for_animal(
|
|
153
|
+
def _get_filtered_sessions(
|
|
162
154
|
self,
|
|
163
|
-
animal: str | int,
|
|
155
|
+
animal: str | int | None = None,
|
|
164
156
|
exclude_incomplete: bool = True,
|
|
165
157
|
dataset_ready_only: bool = False,
|
|
166
158
|
not_dataset_ready_only: bool = False,
|
|
167
159
|
) -> tuple[str, ...]:
|
|
168
|
-
"""
|
|
160
|
+
"""This worker method is used to get a list of sessions with optional filtering.
|
|
169
161
|
|
|
170
|
-
|
|
162
|
+
User-facing methods call this worker under-the-hood to fetch the filtered tuple of sessions.
|
|
171
163
|
|
|
172
164
|
Args:
|
|
173
|
-
animal:
|
|
165
|
+
animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
|
|
166
|
+
animals.
|
|
174
167
|
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
175
168
|
list.
|
|
176
169
|
dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
|
|
@@ -179,22 +172,27 @@ class ProjectManifest:
|
|
|
179
172
|
as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
|
|
180
173
|
enabled, the 'dataset_ready_only' option takes precedence.
|
|
181
174
|
|
|
175
|
+
Returns:
|
|
176
|
+
The tuple of session IDs matching the filter criteria.
|
|
177
|
+
|
|
182
178
|
Raises:
|
|
183
179
|
ValueError: If the specified animal is not found in the manifest file.
|
|
184
180
|
"""
|
|
181
|
+
data = self._data
|
|
185
182
|
|
|
186
|
-
#
|
|
187
|
-
if
|
|
188
|
-
animal
|
|
189
|
-
|
|
190
|
-
|
|
183
|
+
# Filter by animal if specified
|
|
184
|
+
if animal is not None:
|
|
185
|
+
# Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
|
|
186
|
+
if self._animal_string:
|
|
187
|
+
animal = str(animal)
|
|
188
|
+
else:
|
|
189
|
+
animal = int(animal)
|
|
191
190
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
191
|
+
if animal not in self.animals:
|
|
192
|
+
message = f"Animal ID '{animal}' not found in the project manifest. Available animals: {self.animals}."
|
|
193
|
+
console.error(message=message, error=ValueError)
|
|
195
194
|
|
|
196
|
-
|
|
197
|
-
data = self._data.filter(pl.col("animal") == animal)
|
|
195
|
+
data = data.filter(pl.col("animal") == animal)
|
|
198
196
|
|
|
199
197
|
# Optionally filters out incomplete sessions
|
|
200
198
|
if exclude_incomplete:
|
|
@@ -210,6 +208,51 @@ class ProjectManifest:
|
|
|
210
208
|
sessions = data.select("session").sort("session").to_series().to_list()
|
|
211
209
|
return tuple(sessions)
|
|
212
210
|
|
|
211
|
+
@property
|
|
212
|
+
def sessions(self) -> tuple[str, ...]:
|
|
213
|
+
"""Returns all session IDs stored inside the manifest file.
|
|
214
|
+
|
|
215
|
+
This property provides a tuple of all sessions, independent of the participating animal, that were recorded as
|
|
216
|
+
part of the target project. Use the get_sessions() method to get the list of session tuples with filtering.
|
|
217
|
+
"""
|
|
218
|
+
return self._get_filtered_sessions(animal=None, exclude_incomplete=False)
|
|
219
|
+
|
|
220
|
+
def get_sessions(
|
|
221
|
+
self,
|
|
222
|
+
animal: str | int | None = None,
|
|
223
|
+
exclude_incomplete: bool = True,
|
|
224
|
+
dataset_ready_only: bool = False,
|
|
225
|
+
not_dataset_ready_only: bool = False,
|
|
226
|
+
) -> tuple[str, ...]:
|
|
227
|
+
"""Returns requested session IDs based on selected filtering criteria.
|
|
228
|
+
|
|
229
|
+
This method provides a tuple of sessions based on the specified filters. If no animal is specified, returns
|
|
230
|
+
sessions for all animals in the project.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
|
|
234
|
+
animals.
|
|
235
|
+
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
236
|
+
list.
|
|
237
|
+
dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
|
|
238
|
+
the output list. Enabling this option only shows sessions that can be integrated into a dataset.
|
|
239
|
+
not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
|
|
240
|
+
as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
|
|
241
|
+
enabled, the 'dataset_ready_only' option takes precedence.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
The tuple of session IDs matching the filter criteria.
|
|
245
|
+
|
|
246
|
+
Raises:
|
|
247
|
+
ValueError: If the specified animal is not found in the manifest file.
|
|
248
|
+
"""
|
|
249
|
+
return self._get_filtered_sessions(
|
|
250
|
+
animal=animal,
|
|
251
|
+
exclude_incomplete=exclude_incomplete,
|
|
252
|
+
dataset_ready_only=dataset_ready_only,
|
|
253
|
+
not_dataset_ready_only=not_dataset_ready_only,
|
|
254
|
+
)
|
|
255
|
+
|
|
213
256
|
def get_session_info(self, session: str) -> pl.DataFrame:
|
|
214
257
|
"""Returns a Polars DataFrame that stores detailed information for the specified session.
|
|
215
258
|
|
|
@@ -237,7 +280,7 @@ def generate_project_manifest(
|
|
|
237
280
|
This function evaluates the input project directory and builds the 'manifest' file for the project. The file
|
|
238
281
|
includes the descriptive information about every session stored inside the input project folder and the state of
|
|
239
282
|
the session's data processing (which processing pipelines have been applied to each session). The file will be
|
|
240
|
-
created under the 'output_path' directory and use the following name pattern:
|
|
283
|
+
created under the 'output_path' directory and use the following name pattern: ProjectName_manifest.feather.
|
|
241
284
|
|
|
242
285
|
Notes:
|
|
243
286
|
The manifest file is primarily used to capture and move project state information between machines, typically
|
|
@@ -362,12 +405,16 @@ def generate_project_manifest(
|
|
|
362
405
|
manifest["notes"].append(descriptor.experimenter_notes)
|
|
363
406
|
except Exception:
|
|
364
407
|
manifest["notes"].append("N/A")
|
|
408
|
+
else:
|
|
409
|
+
manifest["notes"].append("N/A")
|
|
365
410
|
|
|
366
411
|
# If the session raw_data folder contains the telomere.bin file, marks the session as complete.
|
|
367
412
|
manifest["complete"].append(session_data.raw_data.telomere_path.exists())
|
|
368
413
|
|
|
369
414
|
# Data verification status
|
|
370
|
-
tracker =
|
|
415
|
+
tracker = get_processing_tracker(
|
|
416
|
+
root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY
|
|
417
|
+
)
|
|
371
418
|
manifest["integrity"].append(tracker.is_complete)
|
|
372
419
|
|
|
373
420
|
# If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing
|
|
@@ -381,15 +428,21 @@ def generate_project_manifest(
|
|
|
381
428
|
continue # Cycles to the next session
|
|
382
429
|
|
|
383
430
|
# Suite2p (single-day) processing status.
|
|
384
|
-
tracker =
|
|
431
|
+
tracker = get_processing_tracker(
|
|
432
|
+
file_name=TrackerFileNames.SUITE2P, root=session_data.processed_data.processed_data_path
|
|
433
|
+
)
|
|
385
434
|
manifest["suite2p"].append(tracker.is_complete)
|
|
386
435
|
|
|
387
436
|
# Behavior data processing status.
|
|
388
|
-
tracker =
|
|
437
|
+
tracker = get_processing_tracker(
|
|
438
|
+
file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
|
|
439
|
+
)
|
|
389
440
|
manifest["behavior"].append(tracker.is_complete)
|
|
390
441
|
|
|
391
442
|
# DeepLabCut (video) processing status.
|
|
392
|
-
tracker =
|
|
443
|
+
tracker = get_processing_tracker(
|
|
444
|
+
file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
|
|
445
|
+
)
|
|
393
446
|
manifest["video"].append(tracker.is_complete)
|
|
394
447
|
|
|
395
448
|
# Tracks whether the session's data is currently in the processing or dataset integration mode.
|
|
@@ -433,6 +486,7 @@ def generate_project_manifest(
|
|
|
433
486
|
|
|
434
487
|
def verify_session_checksum(
|
|
435
488
|
session_path: Path,
|
|
489
|
+
manager_id: int,
|
|
436
490
|
create_processed_data_directory: bool = True,
|
|
437
491
|
processed_data_root: None | Path = None,
|
|
438
492
|
update_manifest: bool = False,
|
|
@@ -457,6 +511,8 @@ def verify_session_checksum(
|
|
|
457
511
|
Args:
|
|
458
512
|
session_path: The path to the session directory to be verified. Note, the input session directory must contain
|
|
459
513
|
the 'raw_data' subdirectory.
|
|
514
|
+
manager_id: The xxHash-64 hash-value that specifies the unique identifier of the manager process that
|
|
515
|
+
manages the integrity verification runtime.
|
|
460
516
|
create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
|
|
461
517
|
processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
|
|
462
518
|
the root directory where to store the processed data from all projects, and it will be automatically
|
|
@@ -474,14 +530,12 @@ def verify_session_checksum(
|
|
|
474
530
|
)
|
|
475
531
|
|
|
476
532
|
# Initializes the ProcessingTracker instance for the verification tracker file
|
|
477
|
-
tracker =
|
|
533
|
+
tracker = get_processing_tracker(root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY)
|
|
534
|
+
console.echo(f"{tracker.file_path}")
|
|
478
535
|
|
|
479
536
|
# Updates the tracker data to communicate that the verification process has started. This automatically clears
|
|
480
537
|
# the previous 'completed' status.
|
|
481
|
-
tracker.start()
|
|
482
|
-
|
|
483
|
-
# Try starts here to allow for proper error-driven 'start' terminations of the tracker cannot acquire the lock for
|
|
484
|
-
# a long time, or if another runtime is already underway.
|
|
538
|
+
tracker.start(manager_id=manager_id)
|
|
485
539
|
try:
|
|
486
540
|
# Re-calculates the checksum for the raw_data directory
|
|
487
541
|
calculated_checksum = calculate_directory_checksum(
|
|
@@ -500,14 +554,14 @@ def verify_session_checksum(
|
|
|
500
554
|
|
|
501
555
|
else:
|
|
502
556
|
# Sets the tracker to indicate that the verification runtime completed successfully.
|
|
503
|
-
tracker.stop()
|
|
557
|
+
tracker.stop(manager_id=manager_id)
|
|
504
558
|
|
|
505
559
|
finally:
|
|
506
560
|
# If the code reaches this section while the tracker indicates that the processing is still running,
|
|
507
561
|
# this means that the verification runtime encountered an error. Configures the tracker to indicate that this
|
|
508
562
|
# runtime finished with an error to prevent deadlocking the runtime.
|
|
509
563
|
if tracker.is_running:
|
|
510
|
-
tracker.error()
|
|
564
|
+
tracker.error(manager_id=manager_id)
|
|
511
565
|
|
|
512
566
|
# If the runtime is configured to generate the project manifest file, attempts to generate and overwrite the
|
|
513
567
|
# existing manifest file for the target project.
|
|
@@ -541,9 +595,8 @@ def resolve_p53_marker(
|
|
|
541
595
|
from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
|
|
542
596
|
that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
|
|
543
597
|
|
|
544
|
-
For the p53.bin marker to be created, the session must
|
|
545
|
-
|
|
546
|
-
dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
|
|
598
|
+
For the p53.bin marker to be created, the session must not be undergoing processing. For the p53 marker
|
|
599
|
+
to be removed, the session must not be undergoing dataset integration.
|
|
547
600
|
|
|
548
601
|
Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
|
|
549
602
|
which is used to support further Sun lab data processing pipelines.
|
|
@@ -568,19 +621,69 @@ def resolve_p53_marker(
|
|
|
568
621
|
make_processed_data_directory=create_processed_data_directory,
|
|
569
622
|
)
|
|
570
623
|
|
|
571
|
-
# If the p53.bin marker exists and the runtime is configured to remove it,
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
624
|
+
# If the p53.bin marker exists and the runtime is configured to remove it, attempts to remove the marker file.
|
|
625
|
+
if session_data.processed_data.p53_path.exists() and remove:
|
|
626
|
+
# This section deals with a unique nuance related to the Sun lab processing server organization. Specifically,
|
|
627
|
+
# the user accounts are not allowed to modify or create files in the data directories owned by the service
|
|
628
|
+
# accounts. In turn, this prevents user accounts from modifying the processed data directory to indicate when
|
|
629
|
+
# they are running a dataset integration pipeline on the processed data. To work around this problem, the
|
|
630
|
+
# dataset integration pipeline now creates a 'semaphore' marker for each session that is currently being
|
|
631
|
+
# integrated into a dataset. This semaphore marker is stored under the root user working directory, inside the
|
|
632
|
+
# subdirectory called 'semaphore'.
|
|
633
|
+
|
|
634
|
+
# The parent of the shared sun-lab processed data directory is the root 'working' volume. All user directories
|
|
635
|
+
# are stored under this root working directory.
|
|
636
|
+
if processed_data_root is None:
|
|
637
|
+
# If the processed data root is not provided, sets it to the great-grandparent of the session directory.
|
|
638
|
+
# This works assuming that the data is stored under: root/project/animal/session.
|
|
639
|
+
processed_data_root = session_path.parents[2]
|
|
640
|
+
working_root = processed_data_root.parent
|
|
641
|
+
|
|
642
|
+
# Loops over each user directory and checks whether a semaphore marker exists for the processed session.
|
|
643
|
+
for directory in working_root.iterdir():
|
|
644
|
+
if (
|
|
645
|
+
len([marker for marker in directory.joinpath("semaphore").glob(f"*{session_data.session_name}.bin")])
|
|
646
|
+
> 0
|
|
647
|
+
):
|
|
648
|
+
# Aborts with an error if the semaphore marker prevents the p53 marker from being removed.
|
|
649
|
+
message = (
|
|
650
|
+
f"Unable to remove the dataset marker for the session' {session_data.session_name}' acquired "
|
|
651
|
+
f"for the animal '{session_data.animal_id}' under the '{session_data.project_name}' project. "
|
|
652
|
+
f"The session data is currently being integrated into a dataset by the owner the "
|
|
653
|
+
f"'{directory.stem}' user directory. Wait until the ongoing dataset integration is complete and "
|
|
654
|
+
f"repeat the command that produced this error."
|
|
655
|
+
)
|
|
656
|
+
console.error(message=message, error=RuntimeError)
|
|
577
657
|
|
|
578
|
-
|
|
658
|
+
# If the session does not have a corresponding semaphore marker in any user directories, removes the p53 marker
|
|
659
|
+
# file.
|
|
660
|
+
session_data.processed_data.p53_path.unlink()
|
|
661
|
+
message = (
|
|
662
|
+
f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
|
|
663
|
+
f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Removed."
|
|
664
|
+
)
|
|
665
|
+
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
666
|
+
return # Ends remove runtime
|
|
579
667
|
|
|
580
|
-
# If the marker does not exist and the function is called in 'remove' mode, aborts the runtime
|
|
581
|
-
elif remove:
|
|
668
|
+
# If the marker does not exist and the function is called in 'remove' mode, aborts the runtime early
|
|
669
|
+
elif not session_data.processed_data.p53_path.exists() and remove:
|
|
670
|
+
message = (
|
|
671
|
+
f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
|
|
672
|
+
f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Does not exist. No actions "
|
|
673
|
+
f"taken."
|
|
674
|
+
)
|
|
675
|
+
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
582
676
|
return # Ends remove runtime
|
|
583
677
|
|
|
678
|
+
elif session_data.processed_data.p53_path.exists():
|
|
679
|
+
message = (
|
|
680
|
+
f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
|
|
681
|
+
f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Already exists. No actions "
|
|
682
|
+
f"taken."
|
|
683
|
+
)
|
|
684
|
+
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
685
|
+
return # Ends create runtime
|
|
686
|
+
|
|
584
687
|
# The rest of the runtime deals with determining whether it is safe to create the marker file.
|
|
585
688
|
# Queries the type of the processed session
|
|
586
689
|
session_type = session_data.session_type
|
|
@@ -588,30 +691,61 @@ def resolve_p53_marker(
|
|
|
588
691
|
# Window checking sessions are not designed to be integrated into datasets, so they cannot be marked with the
|
|
589
692
|
# p53.bin file. Similarly, any incomplete session is automatically excluded from dataset formation.
|
|
590
693
|
if session_type == SessionTypes.WINDOW_CHECKING or not session_data.raw_data.telomere_path.exists():
|
|
591
|
-
|
|
694
|
+
message = (
|
|
695
|
+
f"Unable to generate the dataset marker for the session '{session_data.session_name}' acquired for the "
|
|
696
|
+
f"animal '{session_data.animal_id}' under the '{session_data.project_name}' project, as the session is "
|
|
697
|
+
f"incomplete or is of Window Checking type. These sessions must be manually evaluated and marked for "
|
|
698
|
+
f"dataset inclusion by the experimenter. "
|
|
699
|
+
)
|
|
700
|
+
console.error(message=message, error=RuntimeError)
|
|
592
701
|
|
|
593
702
|
# Training sessions collect similar data and share processing pipeline requirements
|
|
703
|
+
error: bool = False
|
|
594
704
|
if session_type == SessionTypes.LICK_TRAINING or session_type == SessionTypes.RUN_TRAINING:
|
|
595
705
|
# Ensures that the session is not being processed with one of the supported pipelines.
|
|
596
|
-
behavior_tracker =
|
|
597
|
-
|
|
706
|
+
behavior_tracker = get_processing_tracker(
|
|
707
|
+
file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
|
|
708
|
+
)
|
|
709
|
+
video_tracker = get_processing_tracker(
|
|
710
|
+
file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
|
|
711
|
+
)
|
|
598
712
|
if behavior_tracker.is_running or video_tracker.is_running:
|
|
599
713
|
# Note, training runtimes do not require suite2p processing.
|
|
600
|
-
|
|
714
|
+
error = True
|
|
601
715
|
|
|
602
716
|
# Mesoscope experiment sessions require additional processing with suite2p
|
|
603
|
-
|
|
604
|
-
behavior_tracker =
|
|
605
|
-
|
|
606
|
-
|
|
717
|
+
elif session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
|
|
718
|
+
behavior_tracker = get_processing_tracker(
|
|
719
|
+
file_name=TrackerFileNames.BEHAVIOR, root=session_data.processed_data.processed_data_path
|
|
720
|
+
)
|
|
721
|
+
suite2p_tracker = get_processing_tracker(
|
|
722
|
+
file_name=TrackerFileNames.SUITE2P, root=session_data.processed_data.processed_data_path
|
|
723
|
+
)
|
|
724
|
+
video_tracker = get_processing_tracker(
|
|
725
|
+
file_name=TrackerFileNames.VIDEO, root=session_data.processed_data.processed_data_path
|
|
726
|
+
)
|
|
727
|
+
console.echo(f"{behavior_tracker.is_running}")
|
|
728
|
+
if behavior_tracker.is_running or video_tracker.is_running or suite2p_tracker.is_running:
|
|
729
|
+
error = True
|
|
607
730
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
731
|
+
# If the session is currently being processed by one or more pipelines, aborts with an error.
|
|
732
|
+
if error:
|
|
733
|
+
message = (
|
|
734
|
+
f"Unable to generate the dataset marker for the session '{session_data.session_name}' acquired for the "
|
|
735
|
+
f"animal '{session_data.animal_id}' under the '{session_data.project_name}' project, as it is "
|
|
736
|
+
f"currently being processed by one of the data processing pipelines. Wait until the session is fully "
|
|
737
|
+
f"processed by all pipelines and repeat the command that encountered this error."
|
|
738
|
+
)
|
|
739
|
+
console.error(message=message, error=RuntimeError)
|
|
611
740
|
|
|
612
741
|
# If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
|
|
613
742
|
# file, preventing the session from being processed again as long as the marker exists.
|
|
614
743
|
session_data.processed_data.p53_path.touch()
|
|
744
|
+
message = (
|
|
745
|
+
f"Dataset marker for the session '{session_data.session_name}' acquired for the animal "
|
|
746
|
+
f"'{session_data.animal_id}' under the '{session_data.project_name}' project: Created."
|
|
747
|
+
)
|
|
748
|
+
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
615
749
|
|
|
616
750
|
# If the runtime is configured to generate the project manifest file, attempts to generate and overwrite the
|
|
617
751
|
# existing manifest file for the target project.
|
|
@@ -5,11 +5,12 @@ import polars as pl
|
|
|
5
5
|
from ..data_classes import (
|
|
6
6
|
SessionData as SessionData,
|
|
7
7
|
SessionTypes as SessionTypes,
|
|
8
|
-
|
|
8
|
+
TrackerFileNames as TrackerFileNames,
|
|
9
9
|
RunTrainingDescriptor as RunTrainingDescriptor,
|
|
10
10
|
LickTrainingDescriptor as LickTrainingDescriptor,
|
|
11
11
|
WindowCheckingDescriptor as WindowCheckingDescriptor,
|
|
12
12
|
MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
|
|
13
|
+
get_processing_tracker as get_processing_tracker,
|
|
13
14
|
)
|
|
14
15
|
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
15
16
|
|
|
@@ -61,26 +62,56 @@ class ProjectManifest:
|
|
|
61
62
|
|
|
62
63
|
This provides a tuple of all animal IDs participating in the target project.
|
|
63
64
|
"""
|
|
65
|
+
def _get_filtered_sessions(
|
|
66
|
+
self,
|
|
67
|
+
animal: str | int | None = None,
|
|
68
|
+
exclude_incomplete: bool = True,
|
|
69
|
+
dataset_ready_only: bool = False,
|
|
70
|
+
not_dataset_ready_only: bool = False,
|
|
71
|
+
) -> tuple[str, ...]:
|
|
72
|
+
"""This worker method is used to get a list of sessions with optional filtering.
|
|
73
|
+
|
|
74
|
+
User-facing methods call this worker under-the-hood to fetch the filtered tuple of sessions.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
|
|
78
|
+
animals.
|
|
79
|
+
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
80
|
+
list.
|
|
81
|
+
dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
|
|
82
|
+
the output list. Enabling this option only shows sessions that can be integrated into a dataset.
|
|
83
|
+
not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
|
|
84
|
+
as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
|
|
85
|
+
enabled, the 'dataset_ready_only' option takes precedence.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
The tuple of session IDs matching the filter criteria.
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: If the specified animal is not found in the manifest file.
|
|
92
|
+
"""
|
|
64
93
|
@property
|
|
65
94
|
def sessions(self) -> tuple[str, ...]:
|
|
66
95
|
"""Returns all session IDs stored inside the manifest file.
|
|
67
96
|
|
|
68
|
-
This provides a tuple of all sessions, independent of the participating animal, that were recorded as
|
|
69
|
-
of the target project.
|
|
97
|
+
This property provides a tuple of all sessions, independent of the participating animal, that were recorded as
|
|
98
|
+
part of the target project. Use the get_sessions() method to get the list of session tuples with filtering.
|
|
70
99
|
"""
|
|
71
|
-
def
|
|
100
|
+
def get_sessions(
|
|
72
101
|
self,
|
|
73
|
-
animal: str | int,
|
|
102
|
+
animal: str | int | None = None,
|
|
74
103
|
exclude_incomplete: bool = True,
|
|
75
104
|
dataset_ready_only: bool = False,
|
|
76
105
|
not_dataset_ready_only: bool = False,
|
|
77
106
|
) -> tuple[str, ...]:
|
|
78
|
-
"""Returns
|
|
107
|
+
"""Returns requested session IDs based on selected filtering criteria.
|
|
79
108
|
|
|
80
|
-
This provides a tuple of
|
|
109
|
+
This method provides a tuple of sessions based on the specified filters. If no animal is specified, returns
|
|
110
|
+
sessions for all animals in the project.
|
|
81
111
|
|
|
82
112
|
Args:
|
|
83
|
-
animal:
|
|
113
|
+
animal: An optional animal ID to filter the sessions. If set to None, the method returns sessions for all
|
|
114
|
+
animals.
|
|
84
115
|
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
85
116
|
list.
|
|
86
117
|
dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
|
|
@@ -89,6 +120,9 @@ class ProjectManifest:
|
|
|
89
120
|
as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
|
|
90
121
|
enabled, the 'dataset_ready_only' option takes precedence.
|
|
91
122
|
|
|
123
|
+
Returns:
|
|
124
|
+
The tuple of session IDs matching the filter criteria.
|
|
125
|
+
|
|
92
126
|
Raises:
|
|
93
127
|
ValueError: If the specified animal is not found in the manifest file.
|
|
94
128
|
"""
|
|
@@ -114,7 +148,7 @@ def generate_project_manifest(
|
|
|
114
148
|
This function evaluates the input project directory and builds the 'manifest' file for the project. The file
|
|
115
149
|
includes the descriptive information about every session stored inside the input project folder and the state of
|
|
116
150
|
the session's data processing (which processing pipelines have been applied to each session). The file will be
|
|
117
|
-
created under the 'output_path' directory and use the following name pattern:
|
|
151
|
+
created under the 'output_path' directory and use the following name pattern: ProjectName_manifest.feather.
|
|
118
152
|
|
|
119
153
|
Notes:
|
|
120
154
|
The manifest file is primarily used to capture and move project state information between machines, typically
|
|
@@ -132,6 +166,7 @@ def generate_project_manifest(
|
|
|
132
166
|
|
|
133
167
|
def verify_session_checksum(
|
|
134
168
|
session_path: Path,
|
|
169
|
+
manager_id: int,
|
|
135
170
|
create_processed_data_directory: bool = True,
|
|
136
171
|
processed_data_root: None | Path = None,
|
|
137
172
|
update_manifest: bool = False,
|
|
@@ -156,6 +191,8 @@ def verify_session_checksum(
|
|
|
156
191
|
Args:
|
|
157
192
|
session_path: The path to the session directory to be verified. Note, the input session directory must contain
|
|
158
193
|
the 'raw_data' subdirectory.
|
|
194
|
+
manager_id: The xxHash-64 hash-value that specifies the unique identifier of the manager process that
|
|
195
|
+
manages the integrity verification runtime.
|
|
159
196
|
create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
|
|
160
197
|
processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
|
|
161
198
|
the root directory where to store the processed data from all projects, and it will be automatically
|
|
@@ -182,9 +219,8 @@ def resolve_p53_marker(
|
|
|
182
219
|
from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
|
|
183
220
|
that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
|
|
184
221
|
|
|
185
|
-
For the p53.bin marker to be created, the session must
|
|
186
|
-
|
|
187
|
-
dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
|
|
222
|
+
For the p53.bin marker to be created, the session must not be undergoing processing. For the p53 marker
|
|
223
|
+
to be removed, the session must not be undergoing dataset integration.
|
|
188
224
|
|
|
189
225
|
Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
|
|
190
226
|
which is used to support further Sun lab data processing pipelines.
|