sl-shared-assets 3.1.2__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -7,11 +7,13 @@ libraries use these classes to work with all lab-generated data."""
7
7
 
8
8
  import copy
9
9
  from enum import StrEnum
10
+ from random import randint
10
11
  import shutil as sh
11
12
  from pathlib import Path
12
13
  from dataclasses import field, dataclass
13
14
 
14
- from filelock import Timeout, FileLock
15
+ from xxhash import xxh3_64
16
+ from filelock import FileLock
15
17
  from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
16
18
  from ataraxis_data_structures import YamlConfig
17
19
  from ataraxis_time.time_helpers import get_timestamp
@@ -46,6 +48,26 @@ class SessionTypes(StrEnum):
46
48
  activity data."""
47
49
 
48
50
 
51
+ class TrackerFileNames(StrEnum):
52
+ """Defines a set of processing tacker .yaml files supported by various Sun lab data preprocessing, processing, and
53
+ dataset formation pipelines.
54
+
55
+ This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
56
+ via the get_processing_tracker() function to generate ProcessingTracker instances.
57
+ """
58
+
59
+ BEHAVIOR = "behavior_processing_tracker.yaml"
60
+ """This file is used to track the state of the behavior log processing pipeline."""
61
+ SUITE2P = "suite2p_processing_tracker.yaml"
62
+ """This file is used to track the state of the single-day suite2p processing pipeline."""
63
+ DATASET = "dataset_formation_tracker.yaml"
64
+ """This file is used to track the state of the dataset formation pipeline."""
65
+ VIDEO = "video_processing_tracker.yaml"
66
+ """This file is used to track the state of the video (DeepLabCut) processing pipeline."""
67
+ INTEGRITY = "integrity_verification_tracker.yaml"
68
+ """This file is used to track the state of the data integrity verification pipeline."""
69
+
70
+
49
71
  @dataclass()
50
72
  class RawData:
51
73
  """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
@@ -133,10 +155,6 @@ class RawData:
133
155
  runtime initialization. Since runtime initialization is a complex process that may encounter a runtime error, the
134
156
  marker is used to discover sessions that failed to initialize. Since uninitialized sessions by definition do not
135
157
  contain any valuable data, they are marked for immediate deletion from all managed destinations."""
136
- integrity_verification_tracker_path: Path = Path()
137
- """Stores the path to the integrity_verification.yaml tracker file. This file stores the current state of the data
138
- integrity verification pipeline. It prevents more than one instance of the pipeline from working with the data
139
- at a given time and communicates the outcome (success or failure) of the most recent pipeline runtime."""
140
158
 
141
159
  def resolve_paths(self, root_directory_path: Path) -> None:
142
160
  """Resolves all paths managed by the class instance based on the input root directory path.
@@ -167,7 +185,6 @@ class RawData:
167
185
  self.telomere_path = self.raw_data_path.joinpath("telomere.bin")
168
186
  self.ubiquitin_path = self.raw_data_path.joinpath("ubiquitin.bin")
169
187
  self.nk_path = self.raw_data_path.joinpath("nk.bin")
170
- self.integrity_verification_tracker_path = self.raw_data_path.joinpath("integrity_verification_tracker.yaml")
171
188
 
172
189
  def make_directories(self) -> None:
173
190
  """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
@@ -203,15 +220,6 @@ class ProcessedData:
203
220
  behavior_data_path: Path = Path()
204
221
  """Stores the path to the directory that contains the non-video and non-brain-activity data extracted from
205
222
  .npz log files by the sl-behavior log processing pipeline."""
206
- suite2p_processing_tracker_path: Path = Path()
207
- """Stores the path to the suite2p_processing_tracker.yaml tracker file. This file stores the current state of
208
- processing the session with the sl-suite2p single-day pipeline."""
209
- behavior_processing_tracker_path: Path = Path()
210
- """Stores the path to the behavior_processing_tracker.yaml file. This file stores the current state of processing
211
- the session with the sl-behavior log-parsing pipeline."""
212
- video_processing_tracker_path: Path = Path()
213
- """Stores the path to the video_processing_tracker.yaml file. This file stores the current state of processing
214
- the session with the DeepLabCut-based video processing pipeline."""
215
223
  p53_path: Path = Path()
216
224
  """Stores the path to the p53.bin file. This file serves as a lock-in marker that determines whether the session is
217
225
  in the processing or dataset state. Specifically, if the file does not exist, the session data cannot be integrated
@@ -234,9 +242,6 @@ class ProcessedData:
234
242
  self.camera_data_path = self.processed_data_path.joinpath("camera_data")
235
243
  self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
236
244
  self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
237
- self.suite2p_processing_tracker_path = self.processed_data_path.joinpath("suite2p_processing_tracker.yaml")
238
- self.behavior_processing_tracker_path = self.processed_data_path.joinpath("behavior_processing_tracker.yaml")
239
- self.video_processing_tracker_path = self.processed_data_path.joinpath("video_processing_tracker.yaml")
240
245
  self.p53_path = self.processed_data_path.joinpath("p53.bin")
241
246
 
242
247
  def make_directories(self) -> None:
@@ -264,8 +269,8 @@ class SessionData(YamlConfig):
264
269
  Notes:
265
270
  This class is specifically designed for working with the data from a single session, performed by a single
266
271
  animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
267
- data through acquisition, preprocessing and processing stages of the Sun lab data workflow. This class serves as
268
- an entry point for all interactions with the managed session's data.
272
+ data through acquisition, preprocessing, and processing stages of the Sun lab data workflow. This class serves
273
+ as an entry point for all interactions with the managed session's data.
269
274
  """
270
275
 
271
276
  project_name: str
@@ -278,10 +283,10 @@ class SessionData(YamlConfig):
278
283
  """Stores the type of the session. Has to be set to one of the supported session types, defined in the SessionTypes
279
284
  enumeration exposed by the sl-shared-assets library.
280
285
  """
281
- acquisition_system: str | AcquisitionSystems
286
+ acquisition_system: str | AcquisitionSystems = AcquisitionSystems.MESOSCOPE_VR
282
287
  """Stores the name of the data acquisition system that acquired the data. Has to be set to one of the supported
283
288
  acquisition systems, defined in the AcquisitionSystems enumeration exposed by the sl-shared-assets library."""
284
- experiment_name: str | None
289
+ experiment_name: str | None = None
285
290
  """Stores the name of the experiment performed during the session. If the session_type field indicates that the
286
291
  session is an experiment, this field communicates the specific experiment configuration used by the session. During
287
292
  runtime, this name is used to load the specific experiment configuration data stored in a .yaml file with the same
@@ -427,7 +432,7 @@ class SessionData(YamlConfig):
427
432
 
428
433
  # Saves the configured instance data to the session's folder so that it can be reused during processing or
429
434
  # preprocessing.
430
- instance._save()
435
+ instance.save()
431
436
 
432
437
  # Also saves the SystemConfiguration and ExperimentConfiguration instances to the same folder using the paths
433
438
  # resolved for the RawData instance above.
@@ -535,7 +540,7 @@ class SessionData(YamlConfig):
535
540
  """
536
541
  self.raw_data.nk_path.unlink(missing_ok=True)
537
542
 
538
- def _save(self) -> None:
543
+ def save(self) -> None:
539
544
  """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
540
545
 
541
546
  This is used to save the data stored in the instance to disk so that it can be reused during further stages of
@@ -567,271 +572,305 @@ class ProcessingTracker(YamlConfig):
567
572
  state between multiple processes in a thread-safe manner.
568
573
 
569
574
  Primarily, this tracker class is used by all remote data processing pipelines in the lab to prevent race conditions
570
- and make it impossible to run multiple processing runtimes at the same time.
575
+ and make it impossible to run multiple processing runtimes at the same time. It is also used to evaluate the status
576
+ (success / failure) of jobs running on remote compute servers.
577
+
578
+ Note:
579
+ In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
580
+ when a runtime is started, the tracker is switched into the 'running' (locked) state until it is unlocked,
581
+ aborted, or encounters an error. When the tracker is locked, only the same manager process as the one that
582
+ locked the tracker is allowed to work with session data. This feature allows executing complex processing
583
+ pipelines that use multiple concurrent and / or sequential processing jobs on the remote server.
584
+
585
+ This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
586
+ highest-level process that manages the runtime. When the runtime is executed on remote compute servers, the
587
+ manager process is typically the process running on the non-server machine (user PC) that executes the remote
588
+ processing job on the compute server (via SSH or similar protocol). The worker process(es) that run the
589
+ processing job(s) on the remote compute servers are NOT considered manager processes.
571
590
  """
572
591
 
573
592
  file_path: Path
574
- """Stores the path to the .yaml file used to save the tracker data between runtimes. The class instance functions as
575
- a wrapper around the data stored inside the specified .yaml file."""
576
- _is_complete: bool = False
577
- """Tracks whether the processing runtime managed by this tracker has been successfully carried out for the session
578
- that calls the tracker."""
593
+ """Stores the path to the .yaml file used to cache the tracker data on disk. The class instance functions as a
594
+ wrapper around the data stored inside the specified .yaml file."""
595
+ _complete: bool = False
596
+ """Tracks whether the processing runtime managed by this tracker has finished successfully."""
579
597
  _encountered_error: bool = False
580
- """Tracks whether the processing runtime managed by this tracker has encountered an error while running for the
581
- session that calls the tracker."""
582
- _is_running: bool = False
583
- """Tracks whether the processing runtime managed by this tracker is currently running for the session that calls
584
- the tracker."""
598
+ """Tracks whether the processing runtime managed by this tracker has encountered an error and has finished
599
+ unsuccessfully."""
600
+ _running: bool = False
601
+ """Tracks whether the processing runtime managed by this tracker is currently running."""
602
+ _manager_id: int = -1
603
+ """Stores the xxHash3-64 hash value that represents the unique identifier of the manager process that started the
604
+ runtime. The manager process is typically running on a remote control machine (computer) and is used to
605
+ support processing runtimes that are distributed over multiple separate batch jobs on the compute server. This
606
+ ID should be generated using the 'generate_manager_id()' function exposed by this library."""
585
607
  _lock_path: str = field(init=False)
586
- """Stores the path to the .lock file for the target tracker .yaml file. This file is used to ensure that only one
587
- process can simultaneously read from or write to the wrapped .yaml file."""
588
- _started_runtime: bool = False
589
- """This internal service field tracks when the class instance is used to start a runtime. It is set automatically by
590
- the ProcessingTracker instance and is used to prevent runtime errors from deadlocking the specific processing
591
- pipeline tracked by the class instance."""
608
+ """Stores the path to the .lock file used to ensure that only a single process can simultaneously access the data
609
+ stored inside the tracker file."""
592
610
 
593
611
  def __post_init__(self) -> None:
594
- # Generates the lock file for the target .yaml file path.
612
+ # Generates the .lock file path for the target tracker .yaml file.
595
613
  if self.file_path is not None:
596
614
  self._lock_path = str(self.file_path.with_suffix(self.file_path.suffix + ".lock"))
597
615
  else:
598
616
  self._lock_path = ""
599
617
 
600
- def __del__(self) -> None:
601
- """If the instance as used to start a runtime, ensures that the instance properly marks the runtime as completed
602
- or erred before being garbage-collected.
603
-
604
- This is a security mechanism to prevent deadlocking the processed session and pipeline for future runtimes.
605
- """
606
- if self._started_runtime and self._is_running:
607
- self.error()
608
-
609
618
  def _load_state(self) -> None:
610
619
  """Reads the current processing state from the wrapped .YAML file."""
611
620
  if self.file_path.exists():
612
621
  # Loads the data for the state values but does not replace the file path or lock attributes.
613
622
  instance: ProcessingTracker = self.from_yaml(self.file_path) # type: ignore
614
- self._is_complete = instance._is_complete
615
- self._encountered_error = instance._encountered_error
616
- self._is_running = instance._is_running
623
+ self._complete = copy.copy(instance._complete)
624
+ self._encountered_error = copy.copy(instance._encountered_error)
625
+ self._running = copy.copy(instance._running)
626
+ self._manager_id = copy.copy(instance._manager_id)
617
627
  else:
618
- # Otherwise, if the tracker file does not exist, generates a new .yaml file using default instance values.
628
+ # Otherwise, if the tracker file does not exist, generates a new .yaml file using default instance values
629
+ # and saves it to disk using the specified tracker file path.
619
630
  self._save_state()
620
631
 
621
632
  def _save_state(self) -> None:
622
633
  """Saves the current processing state stored inside instance attributes to the specified .YAML file."""
623
- # Resets the _lock and file_path to None before dumping the data to .YAML to avoid issues with loading it
634
+ # Resets the _lock_path and file_path to None before dumping the data to .YAML to avoid issues with loading it
624
635
  # back.
625
636
  original = copy.deepcopy(self)
626
637
  original.file_path = None # type: ignore
627
638
  original._lock_path = None # type: ignore
628
- original._started_runtime = False # This field is only used by the instance stored in memory.
629
639
  original.to_yaml(file_path=self.file_path)
630
640
 
631
- def start(self) -> None:
632
- """Configures the tracker file to indicate that the tracked processing runtime is currently running.
641
+ def start(self, manager_id: int) -> None:
642
+ """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
643
+ runtime.
644
+
645
+ Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
646
+ accessible from the manager process that calls this method. Calling this method for an already running runtime
647
+ managed by the same process does not have any effect, so it is safe to call this method at the beginning of
648
+ each processing job that makes up the runtime.
633
649
 
634
- All further attempts to start the same processing runtime for the same session's data will automatically abort
635
- with an error.
650
+ Args:
651
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
652
+ tracked by this tracker file.
636
653
 
637
654
  Raises:
638
655
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
639
656
  """
640
- try:
641
- # Acquires the lock
642
- lock = FileLock(self._lock_path)
643
- with lock.acquire(timeout=10.0):
644
- # Loads tracker state from the .yaml file
645
- self._load_state()
646
-
647
- # If the runtime is already running, aborts with an error
648
- if self._is_running:
649
- message = (
650
- f"Unable to start the processing runtime. The {self.file_path.name} tracker file indicates "
651
- f"that the runtime is currently running from a different process. Only a single runtime "
652
- f"instance is allowed to run at the same time."
653
- )
654
- console.error(message=message, error=RuntimeError)
655
- raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
656
-
657
- # Otherwise, marks the runtime as running and saves the state back to the .yaml file.
658
- self._is_running = True
659
- self._is_complete = False
660
- self._encountered_error = False
661
- self._save_state()
662
-
663
- # Sets the start tracker flag to True, which ensures that the class tries to mark the runtime as
664
- # completed or erred before it being garbage-collected.
665
- self._started_runtime = True
666
-
667
- # If lock acquisition fails for any reason, aborts with an error
668
- except Timeout:
669
- message = (
670
- f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
671
- f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
672
- f"10 minutes has passed."
673
- )
674
- console.error(message=message, error=Timeout)
675
- raise Timeout(message) # Fallback to appease mypy, should not be reachable
657
+ # Acquires the lock
658
+ lock = FileLock(self._lock_path)
659
+ with lock.acquire(timeout=10.0):
660
+ # Loads tracker state from the .yaml file
661
+ self._load_state()
662
+
663
+ # If the runtime is already running from a different process, aborts with an error.
664
+ if self._running and manager_id != self._manager_id:
665
+ message = (
666
+ f"Unable to start the processing runtime from the manager process with id {manager_id}. The "
667
+ f"{self.file_path.name} tracker file indicates that the manager process with id {self._manager_id} "
668
+ f"is currently executing the tracked runtime. Only a single manager process is allowed to execute "
669
+ f"the runtime at the same time."
670
+ )
671
+ console.error(message=message, error=RuntimeError)
672
+ raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
673
+
674
+ # Otherwise, if the runtime is already running for the current manager process, returns without modifying
675
+ # the tracker data.
676
+ elif self._running and manager_id == self._manager_id:
677
+ return
678
+
679
+ # Otherwise, locks the runtime for the current manager process and updates the cached tracker data
680
+ self._running = True
681
+ self._manager_id = manager_id
682
+ self._complete = False
683
+ self._encountered_error = False
684
+ self._save_state()
676
685
 
677
- def error(self) -> None:
686
+ def error(self, manager_id: int) -> None:
678
687
  """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
679
688
  to complete.
680
689
 
681
- This method will only work for an active runtime. When called for an active runtime, it expects the runtime to
682
- be aborted with an error after the method returns. It configures the target tracker to allow other processes
683
- to restart the runtime at any point after this method returns, so it is UNSAFE to do any further processing
684
- from the process that calls this method.
690
+ This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
691
+ interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
692
+ interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
693
+
694
+ Args:
695
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
696
+ runtime tracked by this tracker file has encountered an error.
685
697
 
686
698
  Raises:
687
699
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
688
700
  """
701
+ lock = FileLock(self._lock_path)
702
+ with lock.acquire(timeout=10.0):
703
+ # Loads tracker state from the .yaml file
704
+ self._load_state()
705
+
706
+ # If the runtime is not running, returns without doing anything
707
+ if not self._running:
708
+ return
709
+
710
+ # Ensures that only the active manager process can report runtime errors using the tracker file
711
+ if manager_id != self._manager_id:
712
+ message = (
713
+ f"Unable to report that the processing runtime has encountered an error from the manager process "
714
+ f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
715
+ f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
716
+ f"with the runtime."
717
+ )
718
+ console.error(message=message, error=RuntimeError)
719
+ raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
720
+
721
+ # Indicates that the runtime aborted with an error
722
+ self._running = False
723
+ self._manager_id = -1
724
+ self._complete = False
725
+ self._encountered_error = True
726
+ self._save_state()
689
727
 
690
- try:
691
- # Acquires the lock
692
- lock = FileLock(self._lock_path)
693
- with lock.acquire(timeout=10.0):
694
- # Loads tracker state from the .yaml file
695
- self._load_state()
696
-
697
- # If the runtime is not running, aborts with an error
698
- if not self._is_running:
699
- message = (
700
- f"Unable to report that the processing runtime encountered an error. The {self.file_path.name} "
701
- f"tracker file indicates that the runtime is currently NOT running. A runtime has to be "
702
- f"actively running to set the tracker to an error state."
703
- )
704
- console.error(message=message, error=RuntimeError)
705
- raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
706
-
707
- # Otherwise, indicates that the runtime aborted with an error
708
- self._is_running = False
709
- self._is_complete = False
710
- self._encountered_error = True
711
- self._save_state()
712
-
713
- # Disables the security flag
714
- self._started_runtime = False
715
-
716
- # If lock acquisition fails for any reason, aborts with an error
717
- except Timeout:
718
- message = (
719
- f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
720
- f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
721
- f"10 minutes has passed."
722
- )
723
- console.error(message=message, error=Timeout)
724
- raise Timeout(message) # Fallback to appease mypy, should not be reachable
725
-
726
- def stop(self) -> None:
728
+ def stop(self, manager_id: int) -> None:
727
729
  """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
728
730
 
729
- After this method returns, it is UNSAFE to do any further processing from the process that calls this method.
730
- Any process that calls the 'start' method of this class is expected to also call this method or 'error' method
731
- at the end of the runtime.
731
+ This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
732
+ also configures the tracker file to indicate that the runtime has been completed successfully, which is used
733
+ by the manager processes to detect and handle processing completion.
734
+
735
+ Args:
736
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
737
+ runtime tracked by this tracker file has been completed successfully.
732
738
 
733
739
  Raises:
734
740
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
735
741
  """
742
+ lock = FileLock(self._lock_path)
743
+ with lock.acquire(timeout=10.0):
744
+ # Loads tracker state from the .yaml file
745
+ self._load_state()
746
+
747
+ # If the runtime is not running, does not do anything
748
+ if not self._running:
749
+ return
750
+
751
+ # Ensures that only the active manager process can report runtime completion using the tracker file
752
+ if manager_id != self._manager_id:
753
+ message = (
754
+ f"Unable to report that the processing runtime has completed successfully from the manager process "
755
+ f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
756
+ f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
757
+ f"with the runtime."
758
+ )
759
+ console.error(message=message, error=RuntimeError)
760
+ raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
761
+
762
+ # Otherwise, marks the runtime as complete (stopped)
763
+ self._running = False
764
+ self._manager_id = -1
765
+ self._complete = True
766
+ self._encountered_error = False
767
+ self._save_state()
736
768
 
737
- try:
738
- # Acquires the lock
739
- lock = FileLock(self._lock_path)
740
- with lock.acquire(timeout=10.0):
741
- # Loads tracker state from the .yaml file
742
- self._load_state()
743
-
744
- # If the runtime is not running, aborts with an error
745
- if not self._is_running:
746
- message = (
747
- f"Unable to stop (complete) the processing runtime. The {self.file_path.name} tracker file "
748
- f"indicates that the runtime is currently NOT running. A runtime has to be actively running to "
749
- f"mark it as complete and stop the runtime."
750
- )
751
- console.error(message=message, error=RuntimeError)
752
- raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
753
-
754
- # Otherwise, marks the runtime as complete (stopped)
755
- self._is_running = False
756
- self._is_complete = True
757
- self._encountered_error = False
758
- self._save_state()
759
-
760
- # Disables the security flag
761
- self._started_runtime = False
762
-
763
- # If lock acquisition fails for any reason, aborts with an error
764
- except Timeout:
765
- message = (
766
- f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
767
- f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
768
- f"10 minutes has passed."
769
- )
770
- console.error(message=message, error=Timeout)
771
- raise Timeout(message) # Fallback to appease mypy, should not be reachable
769
+ def abort(self) -> None:
770
+ """Resets the runtime tracker file to the default state.
771
+
772
+ This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
773
+ instance methods, this method can be called from any manager process, even if the runtime is already locked by
774
+ another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
775
+ runtime.
776
+ """
777
+ lock = FileLock(self._lock_path)
778
+ with lock.acquire(timeout=10.0):
779
+ # Loads tracker state from the .yaml file
780
+ self._load_state()
781
+
782
+ # Resets the tracker file to the default state. Note, does not indicate that the runtime is complete nor
783
+ # that it has encountered an error.
784
+ self._running = False
785
+ self._manager_id = -1
786
+ self._complete = False
787
+ self._encountered_error = False
788
+ self._save_state()
772
789
 
773
790
  @property
774
791
  def is_complete(self) -> bool:
775
792
  """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
776
- successfully at least once and that there is no ongoing processing that uses the target session."""
777
- try:
778
- # Acquires the lock
779
- lock = FileLock(self._lock_path)
780
- with lock.acquire(timeout=10.0):
781
- # Loads tracker state from the .yaml file
782
- self._load_state()
783
- return self._is_complete
784
-
785
- # If lock acquisition fails for any reason, aborts with an error
786
- except Timeout:
787
- message = (
788
- f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
789
- f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
790
- f"10 minutes has passed."
791
- )
792
- console.error(message=message, error=Timeout)
793
- raise Timeout(message) # Fallback to appease mypy, should not be reachable
793
+ successfully and that the runtime is not currently ongoing."""
794
+ lock = FileLock(self._lock_path)
795
+ with lock.acquire(timeout=10.0):
796
+ # Loads tracker state from the .yaml file
797
+ self._load_state()
798
+ return self._complete
794
799
 
795
800
  @property
796
801
  def encountered_error(self) -> bool:
797
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime for the target
798
- session has aborted due to encountering an error."""
799
- try:
800
- # Acquires the lock
801
- lock = FileLock(self._lock_path)
802
- with lock.acquire(timeout=10.0):
803
- # Loads tracker state from the .yaml file
804
- self._load_state()
805
- return self._encountered_error
806
-
807
- # If lock acquisition fails for any reason, aborts with an error
808
- except Timeout:
809
- message = (
810
- f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
811
- f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
812
- f"10 minutes has passed."
813
- )
814
- console.error(message=message, error=Timeout)
815
- raise Timeout(message) # Fallback to appease mypy, should not be reachable
802
+ """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
803
+ to encountering an error."""
804
+ lock = FileLock(self._lock_path)
805
+ with lock.acquire(timeout=10.0):
806
+ # Loads tracker state from the .yaml file
807
+ self._load_state()
808
+ return self._encountered_error
816
809
 
817
810
  @property
818
811
  def is_running(self) -> bool:
819
812
  """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
820
- running for the target session."""
821
- try:
822
- # Acquires the lock
823
- lock = FileLock(self._lock_path)
824
- with lock.acquire(timeout=10.0):
825
- # Loads tracker state from the .yaml file
826
- self._load_state()
827
- return self._is_running
828
-
829
- # If lock acquisition fails for any reason, aborts with an error
830
- except Timeout:
831
- message = (
832
- f"Unable to interface with the ProcessingTracker instance data cached inside the target .yaml file "
833
- f"{self.file_path.stem}. Specifically, unable to acquire the file lock before the timeout duration of "
834
- f"10 minutes has passed."
835
- )
836
- console.error(message=message, error=Timeout)
837
- raise Timeout(message) # Fallback to appease mypy, should not be reachable
813
+ ongoing."""
814
+ lock = FileLock(self._lock_path)
815
+ with lock.acquire(timeout=10.0):
816
+ # Loads tracker state from the .yaml file
817
+ self._load_state()
818
+ return self._running
819
+
820
+
821
+ def get_processing_tracker(root: Path, file_name: TrackerFileNames | str) -> ProcessingTracker:
822
+ """Initializes and returns the ProcessingTracker instance that manages the data stored inside the target processing
823
+ tracker file.
824
+
825
+ This function uses the input root path and tracker file name to first resolve the absolute path to the .yaml data
826
+ cache of the target processing tracker file and then wrap the file into a ProcessingTracker instance. All Sun lab
827
+ libraries that use ProcessingTracker instances use this function to access the necessary trackers.
828
+
829
+ Notes:
830
+ If the target file does not exist, this function will create the file as part of the ProcessingTracker
831
+ initialization.
832
+
833
+ This function also generates the corresponding .lock file to ensure that the data inside the processing tracker
834
+ is accessed by a single process at a time.
835
+
836
+ Args:
837
+ file_name: The name of the target processing tracker file. Has to be one of the names from the TrackerFileNames
838
+ enumeration.
839
+ root: The absolute path to the directory where the target file is stored or should be created.
840
+
841
+ Returns:
842
+ The initialized ProcessingTracker instance that manages the data stored in the target file.
843
+ """
844
+
845
+ # Prevents using the function for unsupported tracker file names.
846
+ supported_files = tuple(TrackerFileNames)
847
+ if file_name not in supported_files:
848
+ message = (
849
+ f"Unable to construct the path to the tracker file {file_name}. The input name is not one of the supported"
850
+ f"names. Use one of the supported options provided by the TrackerFileNames enumeration."
851
+ )
852
+ console.error(message=message, error=ValueError)
853
+
854
+ # Constructs and returns the absolute path to the requested tracker file.
855
+ tracker_path = root.joinpath(file_name)
856
+ return ProcessingTracker(file_path=tracker_path)
857
+
858
+
859
+ def generate_manager_id() -> int:
860
+ """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
861
+ this function.
862
+
863
+ The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1
864
+ and 9999999999999. This ensures that the identifier is unique for each function call. The generated identifier
865
+ string is converted to a unique integer value using the xxHash-64 algorithm before it is returned to the caller.
866
+
867
+ Notes:
868
+ This function should be used to generate manager process identifiers for working with ProcessingTracker
869
+ instances from sl-shared-assets version 4.0.0 and above.
870
+ """
871
+ timestamp = get_timestamp()
872
+ random_number = randint(1, 9999999999999)
873
+ manager_id = f"{timestamp}_{random_number}"
874
+ id_hash = xxh3_64()
875
+ id_hash.update(manager_id)
876
+ return id_hash.intdigest()