sl-shared-assets 3.1.3__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +8 -0
- sl_shared_assets/__init__.pyi +8 -0
- sl_shared_assets/cli.py +22 -9
- sl_shared_assets/cli.pyi +7 -2
- sl_shared_assets/data_classes/__init__.py +13 -1
- sl_shared_assets/data_classes/__init__.pyi +6 -0
- sl_shared_assets/data_classes/configuration_data.py +1 -1
- sl_shared_assets/data_classes/runtime_data.py +20 -9
- sl_shared_assets/data_classes/runtime_data.pyi +7 -4
- sl_shared_assets/data_classes/session_data.py +269 -230
- sl_shared_assets/data_classes/session_data.pyi +111 -34
- sl_shared_assets/data_classes/surgery_data.py +1 -1
- sl_shared_assets/data_classes/surgery_data.pyi +1 -1
- sl_shared_assets/server/server.py +2 -2
- sl_shared_assets/tools/packaging_tools.py +7 -8
- sl_shared_assets/tools/packaging_tools.pyi +2 -0
- sl_shared_assets/tools/project_management_tools.py +182 -91
- sl_shared_assets/tools/project_management_tools.pyi +48 -12
- {sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.0.dist-info}/METADATA +44 -47
- sl_shared_assets-4.0.0.dist-info/RECORD +36 -0
- sl_shared_assets-3.1.3.dist-info/RECORD +0 -36
- {sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.0.dist-info}/entry_points.txt +0 -0
- {sl_shared_assets-3.1.3.dist-info → sl_shared_assets-4.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -7,11 +7,13 @@ libraries use these classes to work with all lab-generated data."""
|
|
|
7
7
|
|
|
8
8
|
import copy
|
|
9
9
|
from enum import StrEnum
|
|
10
|
+
from random import randint
|
|
10
11
|
import shutil as sh
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
from dataclasses import field, dataclass
|
|
13
14
|
|
|
14
|
-
from
|
|
15
|
+
from xxhash import xxh3_64
|
|
16
|
+
from filelock import FileLock
|
|
15
17
|
from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
|
|
16
18
|
from ataraxis_data_structures import YamlConfig
|
|
17
19
|
from ataraxis_time.time_helpers import get_timestamp
|
|
@@ -46,6 +48,26 @@ class SessionTypes(StrEnum):
|
|
|
46
48
|
activity data."""
|
|
47
49
|
|
|
48
50
|
|
|
51
|
+
class TrackerFileNames(StrEnum):
|
|
52
|
+
"""Defines a set of processing tacker .yaml files supported by various Sun lab data preprocessing, processing, and
|
|
53
|
+
dataset formation pipelines.
|
|
54
|
+
|
|
55
|
+
This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
|
|
56
|
+
via the get_processing_tracker() function to generate ProcessingTracker instances.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
BEHAVIOR = "behavior_processing_tracker.yaml"
|
|
60
|
+
"""This file is used to track the state of the behavior log processing pipeline."""
|
|
61
|
+
SUITE2P = "suite2p_processing_tracker.yaml"
|
|
62
|
+
"""This file is used to track the state of the single-day suite2p processing pipeline."""
|
|
63
|
+
DATASET = "dataset_formation_tracker.yaml"
|
|
64
|
+
"""This file is used to track the state of the dataset formation pipeline."""
|
|
65
|
+
VIDEO = "video_processing_tracker.yaml"
|
|
66
|
+
"""This file is used to track the state of the video (DeepLabCut) processing pipeline."""
|
|
67
|
+
INTEGRITY = "integrity_verification_tracker.yaml"
|
|
68
|
+
"""This file is used to track the state of the data integrity verification pipeline."""
|
|
69
|
+
|
|
70
|
+
|
|
49
71
|
@dataclass()
|
|
50
72
|
class RawData:
|
|
51
73
|
"""Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
|
|
@@ -133,10 +155,6 @@ class RawData:
|
|
|
133
155
|
runtime initialization. Since runtime initialization is a complex process that may encounter a runtime error, the
|
|
134
156
|
marker is used to discover sessions that failed to initialize. Since uninitialized sessions by definition do not
|
|
135
157
|
contain any valuable data, they are marked for immediate deletion from all managed destinations."""
|
|
136
|
-
integrity_verification_tracker_path: Path = Path()
|
|
137
|
-
"""Stores the path to the integrity_verification.yaml tracker file. This file stores the current state of the data
|
|
138
|
-
integrity verification pipeline. It prevents more than one instance of the pipeline from working with the data
|
|
139
|
-
at a given time and communicates the outcome (success or failure) of the most recent pipeline runtime."""
|
|
140
158
|
|
|
141
159
|
def resolve_paths(self, root_directory_path: Path) -> None:
|
|
142
160
|
"""Resolves all paths managed by the class instance based on the input root directory path.
|
|
@@ -167,7 +185,6 @@ class RawData:
|
|
|
167
185
|
self.telomere_path = self.raw_data_path.joinpath("telomere.bin")
|
|
168
186
|
self.ubiquitin_path = self.raw_data_path.joinpath("ubiquitin.bin")
|
|
169
187
|
self.nk_path = self.raw_data_path.joinpath("nk.bin")
|
|
170
|
-
self.integrity_verification_tracker_path = self.raw_data_path.joinpath("integrity_verification_tracker.yaml")
|
|
171
188
|
|
|
172
189
|
def make_directories(self) -> None:
|
|
173
190
|
"""Ensures that all major subdirectories and the root directory exist, creating any missing directories.
|
|
@@ -203,15 +220,6 @@ class ProcessedData:
|
|
|
203
220
|
behavior_data_path: Path = Path()
|
|
204
221
|
"""Stores the path to the directory that contains the non-video and non-brain-activity data extracted from
|
|
205
222
|
.npz log files by the sl-behavior log processing pipeline."""
|
|
206
|
-
suite2p_processing_tracker_path: Path = Path()
|
|
207
|
-
"""Stores the path to the suite2p_processing_tracker.yaml tracker file. This file stores the current state of
|
|
208
|
-
processing the session with the sl-suite2p single-day pipeline."""
|
|
209
|
-
behavior_processing_tracker_path: Path = Path()
|
|
210
|
-
"""Stores the path to the behavior_processing_tracker.yaml file. This file stores the current state of processing
|
|
211
|
-
the session with the sl-behavior log-parsing pipeline."""
|
|
212
|
-
video_processing_tracker_path: Path = Path()
|
|
213
|
-
"""Stores the path to the video_processing_tracker.yaml file. This file stores the current state of processing
|
|
214
|
-
the session with the DeepLabCut-based video processing pipeline."""
|
|
215
223
|
p53_path: Path = Path()
|
|
216
224
|
"""Stores the path to the p53.bin file. This file serves as a lock-in marker that determines whether the session is
|
|
217
225
|
in the processing or dataset state. Specifically, if the file does not exist, the session data cannot be integrated
|
|
@@ -234,9 +242,6 @@ class ProcessedData:
|
|
|
234
242
|
self.camera_data_path = self.processed_data_path.joinpath("camera_data")
|
|
235
243
|
self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
|
|
236
244
|
self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
|
|
237
|
-
self.suite2p_processing_tracker_path = self.processed_data_path.joinpath("suite2p_processing_tracker.yaml")
|
|
238
|
-
self.behavior_processing_tracker_path = self.processed_data_path.joinpath("behavior_processing_tracker.yaml")
|
|
239
|
-
self.video_processing_tracker_path = self.processed_data_path.joinpath("video_processing_tracker.yaml")
|
|
240
245
|
self.p53_path = self.processed_data_path.joinpath("p53.bin")
|
|
241
246
|
|
|
242
247
|
def make_directories(self) -> None:
|
|
@@ -278,10 +283,10 @@ class SessionData(YamlConfig):
|
|
|
278
283
|
"""Stores the type of the session. Has to be set to one of the supported session types, defined in the SessionTypes
|
|
279
284
|
enumeration exposed by the sl-shared-assets library.
|
|
280
285
|
"""
|
|
281
|
-
acquisition_system: str | AcquisitionSystems
|
|
286
|
+
acquisition_system: str | AcquisitionSystems = AcquisitionSystems.MESOSCOPE_VR
|
|
282
287
|
"""Stores the name of the data acquisition system that acquired the data. Has to be set to one of the supported
|
|
283
288
|
acquisition systems, defined in the AcquisitionSystems enumeration exposed by the sl-shared-assets library."""
|
|
284
|
-
experiment_name: str | None
|
|
289
|
+
experiment_name: str | None = None
|
|
285
290
|
"""Stores the name of the experiment performed during the session. If the session_type field indicates that the
|
|
286
291
|
session is an experiment, this field communicates the specific experiment configuration used by the session. During
|
|
287
292
|
runtime, this name is used to load the specific experiment configuration data stored in a .yaml file with the same
|
|
@@ -427,7 +432,7 @@ class SessionData(YamlConfig):
|
|
|
427
432
|
|
|
428
433
|
# Saves the configured instance data to the session's folder so that it can be reused during processing or
|
|
429
434
|
# preprocessing.
|
|
430
|
-
instance.
|
|
435
|
+
instance.save()
|
|
431
436
|
|
|
432
437
|
# Also saves the SystemConfiguration and ExperimentConfiguration instances to the same folder using the paths
|
|
433
438
|
# resolved for the RawData instance above.
|
|
@@ -535,7 +540,7 @@ class SessionData(YamlConfig):
|
|
|
535
540
|
"""
|
|
536
541
|
self.raw_data.nk_path.unlink(missing_ok=True)
|
|
537
542
|
|
|
538
|
-
def
|
|
543
|
+
def save(self) -> None:
|
|
539
544
|
"""Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
|
|
540
545
|
|
|
541
546
|
This is used to save the data stored in the instance to disk so that it can be reused during further stages of
|
|
@@ -567,271 +572,305 @@ class ProcessingTracker(YamlConfig):
|
|
|
567
572
|
state between multiple processes in a thread-safe manner.
|
|
568
573
|
|
|
569
574
|
Primarily, this tracker class is used by all remote data processing pipelines in the lab to prevent race conditions
|
|
570
|
-
and make it impossible to run multiple processing runtimes at the same time.
|
|
575
|
+
and make it impossible to run multiple processing runtimes at the same time. It is also used to evaluate the status
|
|
576
|
+
(success / failure) of jobs running on remote compute servers.
|
|
577
|
+
|
|
578
|
+
Note:
|
|
579
|
+
In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
|
|
580
|
+
when a runtime is started, the tracker is switched into the 'running' (locked) state until it is unlocked,
|
|
581
|
+
aborted, or encounters an error. When the tracker is locked, only the same manager process as the one that
|
|
582
|
+
locked the tracker is allowed to work with session data. This feature allows executing complex processing
|
|
583
|
+
pipelines that use multiple concurrent and / or sequential processing jobs on the remote server.
|
|
584
|
+
|
|
585
|
+
This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
|
|
586
|
+
highest-level process that manages the runtime. When the runtime is executed on remote compute servers, the
|
|
587
|
+
manager process is typically the process running on the non-server machine (user PC) that executes the remote
|
|
588
|
+
processing job on the compute server (via SSH or similar protocol). The worker process(es) that run the
|
|
589
|
+
processing job(s) on the remote compute servers are NOT considered manager processes.
|
|
571
590
|
"""
|
|
572
591
|
|
|
573
592
|
file_path: Path
|
|
574
|
-
"""Stores the path to the .yaml file used to
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
"""Tracks whether the processing runtime managed by this tracker has
|
|
578
|
-
that calls the tracker."""
|
|
593
|
+
"""Stores the path to the .yaml file used to cache the tracker data on disk. The class instance functions as a
|
|
594
|
+
wrapper around the data stored inside the specified .yaml file."""
|
|
595
|
+
_complete: bool = False
|
|
596
|
+
"""Tracks whether the processing runtime managed by this tracker has finished successfully."""
|
|
579
597
|
_encountered_error: bool = False
|
|
580
|
-
"""Tracks whether the processing runtime managed by this tracker has encountered an error
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
"""Tracks whether the processing runtime managed by this tracker is currently running
|
|
584
|
-
|
|
598
|
+
"""Tracks whether the processing runtime managed by this tracker has encountered an error and has finished
|
|
599
|
+
unsuccessfully."""
|
|
600
|
+
_running: bool = False
|
|
601
|
+
"""Tracks whether the processing runtime managed by this tracker is currently running."""
|
|
602
|
+
_manager_id: int = -1
|
|
603
|
+
"""Stores the xxHash3-64 hash value that represents the unique identifier of the manager process that started the
|
|
604
|
+
runtime. The manager process is typically running on a remote control machine (computer) and is used to
|
|
605
|
+
support processing runtimes that are distributed over multiple separate batch jobs on the compute server. This
|
|
606
|
+
ID should be generated using the 'generate_manager_id()' function exposed by this library."""
|
|
585
607
|
_lock_path: str = field(init=False)
|
|
586
|
-
"""Stores the path to the .lock file
|
|
587
|
-
|
|
588
|
-
_started_runtime: bool = False
|
|
589
|
-
"""This internal service field tracks when the class instance is used to start a runtime. It is set automatically by
|
|
590
|
-
the ProcessingTracker instance and is used to prevent runtime errors from deadlocking the specific processing
|
|
591
|
-
pipeline tracked by the class instance."""
|
|
608
|
+
"""Stores the path to the .lock file used to ensure that only a single process can simultaneously access the data
|
|
609
|
+
stored inside the tracker file."""
|
|
592
610
|
|
|
593
611
|
def __post_init__(self) -> None:
|
|
594
|
-
# Generates the lock file for the target .yaml file
|
|
612
|
+
# Generates the .lock file path for the target tracker .yaml file.
|
|
595
613
|
if self.file_path is not None:
|
|
596
614
|
self._lock_path = str(self.file_path.with_suffix(self.file_path.suffix + ".lock"))
|
|
597
615
|
else:
|
|
598
616
|
self._lock_path = ""
|
|
599
617
|
|
|
600
|
-
def __del__(self) -> None:
|
|
601
|
-
"""If the instance as used to start a runtime, ensures that the instance properly marks the runtime as completed
|
|
602
|
-
or erred before being garbage-collected.
|
|
603
|
-
|
|
604
|
-
This is a security mechanism to prevent deadlocking the processed session and pipeline for future runtimes.
|
|
605
|
-
"""
|
|
606
|
-
if self._started_runtime and self._is_running:
|
|
607
|
-
self.error()
|
|
608
|
-
|
|
609
618
|
def _load_state(self) -> None:
|
|
610
619
|
"""Reads the current processing state from the wrapped .YAML file."""
|
|
611
620
|
if self.file_path.exists():
|
|
612
621
|
# Loads the data for the state values but does not replace the file path or lock attributes.
|
|
613
622
|
instance: ProcessingTracker = self.from_yaml(self.file_path) # type: ignore
|
|
614
|
-
self.
|
|
615
|
-
self._encountered_error = instance._encountered_error
|
|
616
|
-
self.
|
|
623
|
+
self._complete = copy.copy(instance._complete)
|
|
624
|
+
self._encountered_error = copy.copy(instance._encountered_error)
|
|
625
|
+
self._running = copy.copy(instance._running)
|
|
626
|
+
self._manager_id = copy.copy(instance._manager_id)
|
|
617
627
|
else:
|
|
618
|
-
# Otherwise, if the tracker file does not exist, generates a new .yaml file using default instance values
|
|
628
|
+
# Otherwise, if the tracker file does not exist, generates a new .yaml file using default instance values
|
|
629
|
+
# and saves it to disk using the specified tracker file path.
|
|
619
630
|
self._save_state()
|
|
620
631
|
|
|
621
632
|
def _save_state(self) -> None:
|
|
622
633
|
"""Saves the current processing state stored inside instance attributes to the specified .YAML file."""
|
|
623
|
-
# Resets the
|
|
634
|
+
# Resets the _lock_path and file_path to None before dumping the data to .YAML to avoid issues with loading it
|
|
624
635
|
# back.
|
|
625
636
|
original = copy.deepcopy(self)
|
|
626
637
|
original.file_path = None # type: ignore
|
|
627
638
|
original._lock_path = None # type: ignore
|
|
628
|
-
original._started_runtime = False # This field is only used by the instance stored in memory.
|
|
629
639
|
original.to_yaml(file_path=self.file_path)
|
|
630
640
|
|
|
631
|
-
def start(self) -> None:
|
|
632
|
-
"""Configures the tracker file to indicate that
|
|
641
|
+
def start(self, manager_id: int) -> None:
|
|
642
|
+
"""Configures the tracker file to indicate that a manager process is currently executing the tracked processing
|
|
643
|
+
runtime.
|
|
644
|
+
|
|
645
|
+
Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
|
|
646
|
+
accessible from the manager process that calls this method. Calling this method for an already running runtime
|
|
647
|
+
managed by the same process does not have any effect, so it is safe to call this method at the beginning of
|
|
648
|
+
each processing job that makes up the runtime.
|
|
633
649
|
|
|
634
|
-
|
|
635
|
-
|
|
650
|
+
Args:
|
|
651
|
+
manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
|
|
652
|
+
tracked by this tracker file.
|
|
636
653
|
|
|
637
654
|
Raises:
|
|
638
655
|
TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
|
|
639
656
|
"""
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
except Timeout:
|
|
669
|
-
message = (
|
|
670
|
-
f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
|
|
671
|
-
f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
|
|
672
|
-
f"10 minutes has passed."
|
|
673
|
-
)
|
|
674
|
-
console.error(message=message, error=Timeout)
|
|
675
|
-
raise Timeout(message) # Fallback to appease mypy, should not be reachable
|
|
657
|
+
# Acquires the lock
|
|
658
|
+
lock = FileLock(self._lock_path)
|
|
659
|
+
with lock.acquire(timeout=10.0):
|
|
660
|
+
# Loads tracker state from the .yaml file
|
|
661
|
+
self._load_state()
|
|
662
|
+
|
|
663
|
+
# If the runtime is already running from a different process, aborts with an error.
|
|
664
|
+
if self._running and manager_id != self._manager_id:
|
|
665
|
+
message = (
|
|
666
|
+
f"Unable to start the processing runtime from the manager process with id {manager_id}. The "
|
|
667
|
+
f"{self.file_path.name} tracker file indicates that the manager process with id {self._manager_id} "
|
|
668
|
+
f"is currently executing the tracked runtime. Only a single manager process is allowed to execute "
|
|
669
|
+
f"the runtime at the same time."
|
|
670
|
+
)
|
|
671
|
+
console.error(message=message, error=RuntimeError)
|
|
672
|
+
raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
|
|
673
|
+
|
|
674
|
+
# Otherwise, if the runtime is already running for the current manager process, returns without modifying
|
|
675
|
+
# the tracker data.
|
|
676
|
+
elif self._running and manager_id == self._manager_id:
|
|
677
|
+
return
|
|
678
|
+
|
|
679
|
+
# Otherwise, locks the runtime for the current manager process and updates the cached tracker data
|
|
680
|
+
self._running = True
|
|
681
|
+
self._manager_id = manager_id
|
|
682
|
+
self._complete = False
|
|
683
|
+
self._encountered_error = False
|
|
684
|
+
self._save_state()
|
|
676
685
|
|
|
677
|
-
def error(self) -> None:
|
|
686
|
+
def error(self, manager_id: int) -> None:
|
|
678
687
|
"""Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
|
|
679
688
|
to complete.
|
|
680
689
|
|
|
681
|
-
This method
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
690
|
+
This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
|
|
691
|
+
interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
|
|
692
|
+
interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
|
|
693
|
+
|
|
694
|
+
Args:
|
|
695
|
+
manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
|
|
696
|
+
runtime tracked by this tracker file has encountered an error.
|
|
685
697
|
|
|
686
698
|
Raises:
|
|
687
699
|
TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
|
|
688
700
|
"""
|
|
701
|
+
lock = FileLock(self._lock_path)
|
|
702
|
+
with lock.acquire(timeout=10.0):
|
|
703
|
+
# Loads tracker state from the .yaml file
|
|
704
|
+
self._load_state()
|
|
705
|
+
|
|
706
|
+
# If the runtime is not running, returns without doing anything
|
|
707
|
+
if not self._running:
|
|
708
|
+
return
|
|
709
|
+
|
|
710
|
+
# Ensures that only the active manager process can report runtime errors using the tracker file
|
|
711
|
+
if manager_id != self._manager_id:
|
|
712
|
+
message = (
|
|
713
|
+
f"Unable to report that the processing runtime has encountered an error from the manager process "
|
|
714
|
+
f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
|
|
715
|
+
f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
|
|
716
|
+
f"with the runtime."
|
|
717
|
+
)
|
|
718
|
+
console.error(message=message, error=RuntimeError)
|
|
719
|
+
raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
|
|
720
|
+
|
|
721
|
+
# Indicates that the runtime aborted with an error
|
|
722
|
+
self._running = False
|
|
723
|
+
self._manager_id = -1
|
|
724
|
+
self._complete = False
|
|
725
|
+
self._encountered_error = True
|
|
726
|
+
self._save_state()
|
|
689
727
|
|
|
690
|
-
|
|
691
|
-
# Acquires the lock
|
|
692
|
-
lock = FileLock(self._lock_path)
|
|
693
|
-
with lock.acquire(timeout=10.0):
|
|
694
|
-
# Loads tracker state from the .yaml file
|
|
695
|
-
self._load_state()
|
|
696
|
-
|
|
697
|
-
# If the runtime is not running, aborts with an error
|
|
698
|
-
if not self._is_running:
|
|
699
|
-
message = (
|
|
700
|
-
f"Unable to report that the processing runtime encountered an error. The {self.file_path.name} "
|
|
701
|
-
f"tracker file indicates that the runtime is currently NOT running. A runtime has to be "
|
|
702
|
-
f"actively running to set the tracker to an error state."
|
|
703
|
-
)
|
|
704
|
-
console.error(message=message, error=RuntimeError)
|
|
705
|
-
raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
|
|
706
|
-
|
|
707
|
-
# Otherwise, indicates that the runtime aborted with an error
|
|
708
|
-
self._is_running = False
|
|
709
|
-
self._is_complete = False
|
|
710
|
-
self._encountered_error = True
|
|
711
|
-
self._save_state()
|
|
712
|
-
|
|
713
|
-
# Disables the security flag
|
|
714
|
-
self._started_runtime = False
|
|
715
|
-
|
|
716
|
-
# If lock acquisition fails for any reason, aborts with an error
|
|
717
|
-
except Timeout:
|
|
718
|
-
message = (
|
|
719
|
-
f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
|
|
720
|
-
f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
|
|
721
|
-
f"10 minutes has passed."
|
|
722
|
-
)
|
|
723
|
-
console.error(message=message, error=Timeout)
|
|
724
|
-
raise Timeout(message) # Fallback to appease mypy, should not be reachable
|
|
725
|
-
|
|
726
|
-
def stop(self) -> None:
|
|
728
|
+
def stop(self, manager_id: int) -> None:
|
|
727
729
|
"""Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
|
|
728
730
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
731
|
+
This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
|
|
732
|
+
also configures the tracker file to indicate that the runtime has been completed successfully, which is used
|
|
733
|
+
by the manager processes to detect and handle processing completion.
|
|
734
|
+
|
|
735
|
+
Args:
|
|
736
|
+
manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
|
|
737
|
+
runtime tracked by this tracker file has been completed successfully.
|
|
732
738
|
|
|
733
739
|
Raises:
|
|
734
740
|
TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
|
|
735
741
|
"""
|
|
742
|
+
lock = FileLock(self._lock_path)
|
|
743
|
+
with lock.acquire(timeout=10.0):
|
|
744
|
+
# Loads tracker state from the .yaml file
|
|
745
|
+
self._load_state()
|
|
746
|
+
|
|
747
|
+
# If the runtime is not running, does not do anything
|
|
748
|
+
if not self._running:
|
|
749
|
+
return
|
|
750
|
+
|
|
751
|
+
# Ensures that only the active manager process can report runtime completion using the tracker file
|
|
752
|
+
if manager_id != self._manager_id:
|
|
753
|
+
message = (
|
|
754
|
+
f"Unable to report that the processing runtime has completed successfully from the manager process "
|
|
755
|
+
f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
|
|
756
|
+
f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
|
|
757
|
+
f"with the runtime."
|
|
758
|
+
)
|
|
759
|
+
console.error(message=message, error=RuntimeError)
|
|
760
|
+
raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
|
|
761
|
+
|
|
762
|
+
# Otherwise, marks the runtime as complete (stopped)
|
|
763
|
+
self._running = False
|
|
764
|
+
self._manager_id = -1
|
|
765
|
+
self._complete = True
|
|
766
|
+
self._encountered_error = False
|
|
767
|
+
self._save_state()
|
|
736
768
|
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
self._encountered_error = False
|
|
758
|
-
self._save_state()
|
|
759
|
-
|
|
760
|
-
# Disables the security flag
|
|
761
|
-
self._started_runtime = False
|
|
762
|
-
|
|
763
|
-
# If lock acquisition fails for any reason, aborts with an error
|
|
764
|
-
except Timeout:
|
|
765
|
-
message = (
|
|
766
|
-
f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
|
|
767
|
-
f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
|
|
768
|
-
f"10 minutes has passed."
|
|
769
|
-
)
|
|
770
|
-
console.error(message=message, error=Timeout)
|
|
771
|
-
raise Timeout(message) # Fallback to appease mypy, should not be reachable
|
|
769
|
+
def abort(self) -> None:
|
|
770
|
+
"""Resets the runtime tracker file to the default state.
|
|
771
|
+
|
|
772
|
+
This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
|
|
773
|
+
instance methods, this method can be called from any manager process, even if the runtime is already locked by
|
|
774
|
+
another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
|
|
775
|
+
runtime.
|
|
776
|
+
"""
|
|
777
|
+
lock = FileLock(self._lock_path)
|
|
778
|
+
with lock.acquire(timeout=10.0):
|
|
779
|
+
# Loads tracker state from the .yaml file
|
|
780
|
+
self._load_state()
|
|
781
|
+
|
|
782
|
+
# Resets the tracker file to the default state. Note, does not indicate that the runtime is complete nor
|
|
783
|
+
# that it has encountered an error.
|
|
784
|
+
self._running = False
|
|
785
|
+
self._manager_id = -1
|
|
786
|
+
self._complete = False
|
|
787
|
+
self._encountered_error = False
|
|
788
|
+
self._save_state()
|
|
772
789
|
|
|
773
790
|
@property
|
|
774
791
|
def is_complete(self) -> bool:
|
|
775
792
|
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
|
|
776
|
-
successfully
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
self._load_state()
|
|
783
|
-
return self._is_complete
|
|
784
|
-
|
|
785
|
-
# If lock acquisition fails for any reason, aborts with an error
|
|
786
|
-
except Timeout:
|
|
787
|
-
message = (
|
|
788
|
-
f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
|
|
789
|
-
f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
|
|
790
|
-
f"10 minutes has passed."
|
|
791
|
-
)
|
|
792
|
-
console.error(message=message, error=Timeout)
|
|
793
|
-
raise Timeout(message) # Fallback to appease mypy, should not be reachable
|
|
793
|
+
successfully and that the runtime is not currently ongoing."""
|
|
794
|
+
lock = FileLock(self._lock_path)
|
|
795
|
+
with lock.acquire(timeout=10.0):
|
|
796
|
+
# Loads tracker state from the .yaml file
|
|
797
|
+
self._load_state()
|
|
798
|
+
return self._complete
|
|
794
799
|
|
|
795
800
|
@property
|
|
796
801
|
def encountered_error(self) -> bool:
|
|
797
|
-
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
self._load_state()
|
|
805
|
-
return self._encountered_error
|
|
806
|
-
|
|
807
|
-
# If lock acquisition fails for any reason, aborts with an error
|
|
808
|
-
except Timeout:
|
|
809
|
-
message = (
|
|
810
|
-
f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
|
|
811
|
-
f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
|
|
812
|
-
f"10 minutes has passed."
|
|
813
|
-
)
|
|
814
|
-
console.error(message=message, error=Timeout)
|
|
815
|
-
raise Timeout(message) # Fallback to appease mypy, should not be reachable
|
|
802
|
+
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
|
|
803
|
+
to encountering an error."""
|
|
804
|
+
lock = FileLock(self._lock_path)
|
|
805
|
+
with lock.acquire(timeout=10.0):
|
|
806
|
+
# Loads tracker state from the .yaml file
|
|
807
|
+
self._load_state()
|
|
808
|
+
return self._encountered_error
|
|
816
809
|
|
|
817
810
|
@property
|
|
818
811
|
def is_running(self) -> bool:
|
|
819
812
|
"""Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
813
|
+
ongoing."""
|
|
814
|
+
lock = FileLock(self._lock_path)
|
|
815
|
+
with lock.acquire(timeout=10.0):
|
|
816
|
+
# Loads tracker state from the .yaml file
|
|
817
|
+
self._load_state()
|
|
818
|
+
return self._running
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
def get_processing_tracker(root: Path, file_name: TrackerFileNames | str) -> ProcessingTracker:
|
|
822
|
+
"""Initializes and returns the ProcessingTracker instance that manages the data stored inside the target processing
|
|
823
|
+
tracker file.
|
|
824
|
+
|
|
825
|
+
This function uses the input root path and tracker file name to first resolve the absolute path to the .yaml data
|
|
826
|
+
cache of the target processing tracker file and then wrap the file into a ProcessingTracker instance. All Sun lab
|
|
827
|
+
libraries that use ProcessingTracker instances use this function to access the necessary trackers.
|
|
828
|
+
|
|
829
|
+
Notes:
|
|
830
|
+
If the target file does not exist, this function will create the file as part of the ProcessingTracker
|
|
831
|
+
initialization.
|
|
832
|
+
|
|
833
|
+
This function also generates the corresponding .lock file to ensure that the data inside the processing tracker
|
|
834
|
+
is accessed by a single process at a time.
|
|
835
|
+
|
|
836
|
+
Args:
|
|
837
|
+
file_name: The name of the target processing tracker file. Has to be one of the names from the TrackerFileNames
|
|
838
|
+
enumeration.
|
|
839
|
+
root: The absolute path to the directory where the target file is stored or should be created.
|
|
840
|
+
|
|
841
|
+
Returns:
|
|
842
|
+
The initialized ProcessingTracker instance that manages the data stored in the target file.
|
|
843
|
+
"""
|
|
844
|
+
|
|
845
|
+
# Prevents using the function for unsupported tracker file names.
|
|
846
|
+
supported_files = tuple(TrackerFileNames)
|
|
847
|
+
if file_name not in supported_files:
|
|
848
|
+
message = (
|
|
849
|
+
f"Unable to construct the path to the tracker file {file_name}. The input name is not one of the supported"
|
|
850
|
+
f"names. Use one of the supported options provided by the TrackerFileNames enumeration."
|
|
851
|
+
)
|
|
852
|
+
console.error(message=message, error=ValueError)
|
|
853
|
+
|
|
854
|
+
# Constructs and returns the absolute path to the requested tracker file.
|
|
855
|
+
tracker_path = root.joinpath(file_name)
|
|
856
|
+
return ProcessingTracker(file_path=tracker_path)
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
def generate_manager_id() -> int:
|
|
860
|
+
"""Generates and returns a unique integer identifier that can be used to identify the manager process that calls
|
|
861
|
+
this function.
|
|
862
|
+
|
|
863
|
+
The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1
|
|
864
|
+
and 9999999999999. This ensures that the identifier is unique for each function call. The generated identifier
|
|
865
|
+
string is converted to a unique integer value using the xxHash-64 algorithm before it is returned to the caller.
|
|
866
|
+
|
|
867
|
+
Notes:
|
|
868
|
+
This function should be used to generate manager process identifiers for working with ProcessingTracker
|
|
869
|
+
instances from sl-shared-assets version 4.0.0 and above.
|
|
870
|
+
"""
|
|
871
|
+
timestamp = get_timestamp()
|
|
872
|
+
random_number = randint(1, 9999999999999)
|
|
873
|
+
manager_id = f"{timestamp}_{random_number}"
|
|
874
|
+
id_hash = xxh3_64()
|
|
875
|
+
id_hash.update(manager_id)
|
|
876
|
+
return id_hash.intdigest()
|