sl-shared-assets 4.0.1__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (39) hide show
  1. sl_shared_assets/__init__.py +45 -42
  2. sl_shared_assets/command_line_interfaces/__init__.py +3 -0
  3. sl_shared_assets/command_line_interfaces/configure.py +173 -0
  4. sl_shared_assets/command_line_interfaces/manage.py +226 -0
  5. sl_shared_assets/data_classes/__init__.py +33 -32
  6. sl_shared_assets/data_classes/configuration_data.py +267 -79
  7. sl_shared_assets/data_classes/session_data.py +226 -289
  8. sl_shared_assets/server/__init__.py +24 -4
  9. sl_shared_assets/server/job.py +6 -7
  10. sl_shared_assets/server/pipeline.py +570 -0
  11. sl_shared_assets/server/server.py +57 -25
  12. sl_shared_assets/tools/__init__.py +9 -8
  13. sl_shared_assets/tools/packaging_tools.py +14 -25
  14. sl_shared_assets/tools/project_management_tools.py +602 -523
  15. sl_shared_assets/tools/transfer_tools.py +88 -23
  16. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/METADATA +46 -203
  17. sl_shared_assets-5.0.0.dist-info/RECORD +23 -0
  18. sl_shared_assets-5.0.0.dist-info/entry_points.txt +3 -0
  19. sl_shared_assets/__init__.pyi +0 -91
  20. sl_shared_assets/cli.py +0 -501
  21. sl_shared_assets/cli.pyi +0 -106
  22. sl_shared_assets/data_classes/__init__.pyi +0 -75
  23. sl_shared_assets/data_classes/configuration_data.pyi +0 -235
  24. sl_shared_assets/data_classes/runtime_data.pyi +0 -157
  25. sl_shared_assets/data_classes/session_data.pyi +0 -379
  26. sl_shared_assets/data_classes/surgery_data.pyi +0 -89
  27. sl_shared_assets/server/__init__.pyi +0 -11
  28. sl_shared_assets/server/job.pyi +0 -205
  29. sl_shared_assets/server/server.pyi +0 -298
  30. sl_shared_assets/tools/__init__.pyi +0 -19
  31. sl_shared_assets/tools/ascension_tools.py +0 -265
  32. sl_shared_assets/tools/ascension_tools.pyi +0 -68
  33. sl_shared_assets/tools/packaging_tools.pyi +0 -58
  34. sl_shared_assets/tools/project_management_tools.pyi +0 -239
  35. sl_shared_assets/tools/transfer_tools.pyi +0 -53
  36. sl_shared_assets-4.0.1.dist-info/RECORD +0 -36
  37. sl_shared_assets-4.0.1.dist-info/entry_points.txt +0 -7
  38. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/WHEEL +0 -0
  39. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -7,12 +7,10 @@ libraries use these classes to work with all lab-generated data."""
7
7
 
8
8
  import copy
9
9
  from enum import StrEnum
10
- from random import randint
11
10
  import shutil as sh
12
11
  from pathlib import Path
13
12
  from dataclasses import field, dataclass
14
13
 
15
- from xxhash import xxh3_64
16
14
  from filelock import FileLock
17
15
  from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
18
16
  from ataraxis_data_structures import YamlConfig
@@ -48,26 +46,6 @@ class SessionTypes(StrEnum):
48
46
  activity data."""
49
47
 
50
48
 
51
- class TrackerFileNames(StrEnum):
52
- """Defines a set of processing tacker .yaml files supported by various Sun lab data preprocessing, processing, and
53
- dataset formation pipelines.
54
-
55
- This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
56
- via the get_processing_tracker() function to generate ProcessingTracker instances.
57
- """
58
-
59
- BEHAVIOR = "behavior_processing_tracker.yaml"
60
- """This file is used to track the state of the behavior log processing pipeline."""
61
- SUITE2P = "suite2p_processing_tracker.yaml"
62
- """This file is used to track the state of the single-day suite2p processing pipeline."""
63
- DATASET = "dataset_formation_tracker.yaml"
64
- """This file is used to track the state of the dataset formation pipeline."""
65
- VIDEO = "video_processing_tracker.yaml"
66
- """This file is used to track the state of the video (DeepLabCut) processing pipeline."""
67
- INTEGRITY = "integrity_verification_tracker.yaml"
68
- """This file is used to track the state of the data integrity verification pipeline."""
69
-
70
-
71
49
  @dataclass()
72
50
  class RawData:
73
51
  """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
@@ -155,6 +133,10 @@ class RawData:
155
133
  runtime initialization. Since runtime initialization is a complex process that may encounter a runtime error, the
156
134
  marker is used to discover sessions that failed to initialize. Since uninitialized sessions by definition do not
157
135
  contain any valuable data, they are marked for immediate deletion from all managed destinations."""
136
+ root_path: Path = Path()
137
+ """Stores the path to the root directory of the volume that stores raw data from all Sun lab projects. Primarily,
138
+ this is necessary for pipelines working with the data on the remote compute server to efficiently move it between
139
+ storage and working (processing) volumes."""
158
140
 
159
141
  def resolve_paths(self, root_directory_path: Path) -> None:
160
142
  """Resolves all paths managed by the class instance based on the input root directory path.
@@ -186,6 +168,10 @@ class RawData:
186
168
  self.ubiquitin_path = self.raw_data_path.joinpath("ubiquitin.bin")
187
169
  self.nk_path = self.raw_data_path.joinpath("nk.bin")
188
170
 
171
+ # Infers the path to the root raw data directory under which the session's project is stored. This assumes that
172
+ # the raw_data directory is found under root/project/animal/session_id/raw_data
173
+ self.root_path = root_directory_path.parents[3]
174
+
189
175
  def make_directories(self) -> None:
190
176
  """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
191
177
 
@@ -220,12 +206,10 @@ class ProcessedData:
220
206
  behavior_data_path: Path = Path()
221
207
  """Stores the path to the directory that contains the non-video and non-brain-activity data extracted from
222
208
  .npz log files by the sl-behavior log processing pipeline."""
223
- p53_path: Path = Path()
224
- """Stores the path to the p53.bin file. This file serves as a lock-in marker that determines whether the session is
225
- in the processing or dataset state. Specifically, if the file does not exist, the session data cannot be integrated
226
- into any dataset, as it may be actively worked on by processing pipelines. Conversely, if the marker exists,
227
- processing pipelines are not allowed to work with the session, as it may be actively integrated into one or more
228
- datasets."""
209
+ root_path: Path = Path()
210
+ """Stores the path to the root directory of the volume that stores processed data from all Sun lab projects.
211
+ Primarily, this is necessary for pipelines working with the data on the remote compute server to efficiently move it
212
+ between storage and working (processing) volumes."""
229
213
 
230
214
  def resolve_paths(self, root_directory_path: Path) -> None:
231
215
  """Resolves all paths managed by the class instance based on the input root directory path.
@@ -242,7 +226,10 @@ class ProcessedData:
242
226
  self.camera_data_path = self.processed_data_path.joinpath("camera_data")
243
227
  self.mesoscope_data_path = self.processed_data_path.joinpath("mesoscope_data")
244
228
  self.behavior_data_path = self.processed_data_path.joinpath("behavior_data")
245
- self.p53_path = self.processed_data_path.joinpath("p53.bin")
229
+
230
+ # Infers the path to the root processed data directory under which the session's project is stored. This
231
+ # assumes that the processed_data directory is found under root/project/animal/session_id/processed_data
232
+ self.root_path = root_directory_path.parents[3]
246
233
 
247
234
  def make_directories(self) -> None:
248
235
  """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
@@ -256,6 +243,48 @@ class ProcessedData:
256
243
  ensure_directory_exists(self.behavior_data_path)
257
244
 
258
245
 
246
+ @dataclass()
247
+ class TrackingData:
248
+ """Stores the paths to the directories and files that make up the 'tracking_data' session-specific directory.
249
+
250
+ The 'tracking_data' directory was added in version 5.0.0 to store the ProcessingTracker instance data and .lock
251
+ files for pipelines and tasks used to work with session data after acquisition.
252
+ """
253
+
254
+ tracking_data_path: Path = Path()
255
+ """Stores the path to the root tracking_data directory of the session. This directory stores the .yaml
256
+ ProcessingTracker files and the .lock FileLock files that jointly ensure that session's data is accessed in a
257
+ process- and thread-safe way while being processed by multiple different processes and pipelines."""
258
+ session_lock_path: Path = Path()
259
+ """Stores the path to the session_lock.yaml file for the session. This file is used to ensure that only a single
260
+ manager process has exclusive access to the session's data on the remote compute server. This ensures that multiple
261
+ data processing pipelines can safely run for the same session without compromising session data integrity. This
262
+ file is intended to be used through the SessionLock class."""
263
+
264
+ def resolve_paths(self, root_directory_path: Path) -> None:
265
+ """Resolves all paths managed by the class instance based on the input root directory path.
266
+
267
+ This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
268
+ hierarchy on any machine that instantiates the class.
269
+
270
+ Args:
271
+ root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
272
+ using the following hierarchy: root/project/animal/session_id
273
+ """
274
+ # Generates the managed paths
275
+ self.tracking_data_path = root_directory_path
276
+ self.session_lock_path = self.tracking_data_path.joinpath("session_lock.yaml")
277
+
278
+ def make_directories(self) -> None:
279
+ """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
280
+
281
+ This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
282
+ missing data directories.
283
+ """
284
+
285
+ ensure_directory_exists(self.tracking_data_path)
286
+
287
+
259
288
  @dataclass
260
289
  class SessionData(YamlConfig):
261
290
  """Stores and manages the data layout of a single Sun lab data acquisition session.
@@ -297,21 +326,44 @@ class SessionData(YamlConfig):
297
326
  """Stores the version of the sl-experiment library that was used to acquire the session data."""
298
327
  raw_data: RawData = field(default_factory=lambda: RawData())
299
328
  """Stores absolute paths to all directories and files that jointly make the session's raw data hierarchy. This
300
- directory structure is resolved for each machine that creates or loads the SessionData class to ensure that all
301
- Sun lab data can be accessed via the same API on any destination."""
329
+ hierarchy is initially resolved by the acquisition system that acquires the session and used to store all data
330
+ acquired during the session runtime."""
302
331
  processed_data: ProcessedData = field(default_factory=lambda: ProcessedData())
303
332
  """Stores absolute paths to all directories and files that jointly make the session's processed data hierarchy.
304
- Typically, this hierarchy is only used on the lab's processing server(s), but it can also be used to run local
305
- testing on end-user machines."""
333
+ Processed data encompasses all data generated from the raw data as part of data processing."""
334
+ source_data: RawData = field(default_factory=lambda: RawData())
335
+ """Stores absolute paths to the same data as the 'raw_data' field, but with all paths resolved relative to the
336
+ 'processed_data' root. On systems that use the same root for processed and raw data, the source and raw directories
337
+ are identical. On systems that use different root directories for processed and raw data, the source and raw
338
+ directories are different. This is used to optimize data processing on the remote compute server by temporarily
339
+ copying all session data to the fast processed data volume."""
340
+ archived_data: ProcessedData = field(default_factory=lambda: ProcessedData())
341
+ """Similar to the 'source_data' field, stores the absolute path to the same data as the 'processed_data' field, but
342
+ with all paths resolved relative to the 'raw_data' root. This path is used as part of the session data archiving
343
+ process to collect all session data (raw and processed) on the slow 'storage' volume of the remote compute server.
344
+ """
345
+ tracking_data: TrackingData = field(default_factory=lambda: TrackingData())
346
+ """Stores absolute paths to all directories and files that jointly make the session's tracking data hierarchy. This
347
+ hierarchy is used during all stages of data processing to track the processing progress and ensure only a single
348
+ manager process can modify the session's data at any given time, ensuring access safety."""
306
349
 
307
350
  def __post_init__(self) -> None:
308
- """Ensures raw_data and processed_data are always instances of RawData and ProcessedData."""
351
+ """Ensures raw_data, processed_data, and source_data are always instances of RawData and ProcessedData."""
309
352
  if not isinstance(self.raw_data, RawData):
310
353
  self.raw_data = RawData()
311
354
 
312
355
  if not isinstance(self.processed_data, ProcessedData):
313
356
  self.processed_data = ProcessedData()
314
357
 
358
+ if not isinstance(self.source_data, RawData):
359
+ self.raw_data = RawData()
360
+
361
+ if not isinstance(self.archived_data, ProcessedData):
362
+ self.archived_data = ProcessedData()
363
+
364
+ if not isinstance(self.tracking_data, TrackingData):
365
+ self.raw_data = RawData()
366
+
315
367
  @classmethod
316
368
  def create(
317
369
  cls,
@@ -415,6 +467,22 @@ class SessionData(YamlConfig):
415
467
  processed_data = ProcessedData()
416
468
  processed_data.resolve_paths(root_directory_path=session_path.joinpath("processed_data"))
417
469
 
470
+ # Added in version 5.0.0. While source data is not used when the session is created (and is set to the same
471
+ # directory as raw_data), it is created here for completeness.
472
+ source_data = RawData()
473
+ source_data.resolve_paths(root_directory_path=session_path.joinpath("source_data"))
474
+
475
+ # Added in version 5.0.0. While processed data is not used when the session is created (and is set to the same
476
+ # directory as processed_data), it is created here for completeness.
477
+ archived_data = ProcessedData()
478
+ archived_data.resolve_paths(root_directory_path=session_path.joinpath("archived_data"))
479
+
480
+ # Similar to source_data, tracking data uses the same root as raw_data and is not used during data acquisition.
481
+ # Tracking data is used during data processing on the remote compute server(s) to ensure multiple pipelines
482
+ # can work with the session's data without collision.
483
+ tracking_data = TrackingData()
484
+ tracking_data.resolve_paths(root_directory_path=session_path.joinpath("tracking_data"))
485
+
418
486
  # Packages the sections generated above into a SessionData instance
419
487
  # noinspection PyArgumentList
420
488
  instance = SessionData(
@@ -424,6 +492,7 @@ class SessionData(YamlConfig):
424
492
  session_type=session_type,
425
493
  acquisition_system=acquisition_system.name,
426
494
  raw_data=raw_data,
495
+ source_data=source_data,
427
496
  processed_data=processed_data,
428
497
  experiment_name=experiment_name,
429
498
  python_version=python_version,
@@ -460,7 +529,6 @@ class SessionData(YamlConfig):
460
529
  cls,
461
530
  session_path: Path,
462
531
  processed_data_root: Path | None = None,
463
- make_processed_data_directory: bool = False,
464
532
  ) -> "SessionData":
465
533
  """Loads the SessionData instance from the target session's session_data.yaml file.
466
534
 
@@ -478,55 +546,85 @@ class SessionData(YamlConfig):
478
546
  provide the path to the root project directory (directory that stores all Sun lab projects) on that
479
547
  drive. The method will automatically resolve the project/animal/session/processed_data hierarchy using
480
548
  this root path. If raw and processed data are kept on the same drive, keep this set to None.
481
- make_processed_data_directory: Determines whether this method should create the processed_data directory if
482
- it does not exist.
483
549
 
484
550
  Returns:
485
551
  An initialized SessionData instance for the session whose data is stored at the provided path.
486
552
 
487
553
  Raises:
488
- FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
554
+ FileNotFoundError: If multiple or no 'session_data.yaml' file instances are found under the input session
555
+ path directory.
489
556
 
490
557
  """
491
- # To properly initialize the SessionData instance, the provided path should contain the raw_data directory
492
- # with the session_data.yaml file.
493
- session_data_path = session_path.joinpath("raw_data", "session_data.yaml")
494
- if not session_data_path.exists():
558
+ # To properly initialize the SessionData instance, the provided path should contain a single session_data.yaml
559
+ # file at any hierarchy level.
560
+ session_data_files = [file for file in session_path.rglob("*session_data.yaml")]
561
+ if len(session_data_files) != 1:
495
562
  message = (
496
- f"Unable to load the SessionData class for the target session: {session_path.stem}. No "
497
- f"session_data.yaml file was found inside the raw_data folder of the session. This likely "
498
- f"indicates that the session runtime was interrupted before recording any data, or that the "
499
- f"session path does not point to a valid session."
563
+ f"Unable to load the SessionData class for the target session. Expected a single session_data.yaml "
564
+ f"file to be located under the directory tree specified by the input path: {session_path}. Instead, "
565
+ f"encountered {len(session_data_files)} candidate files. This indicates that the input path does not "
566
+ f"point to a valid session directory."
500
567
  )
501
568
  console.error(message=message, error=FileNotFoundError)
502
569
 
503
- # Loads class data from the .yaml file
570
+ # If a single candidate is found (as expected), extracts it from the list and uses it to resolve the
571
+ # session data hierarchy.
572
+ session_data_path = session_data_files.pop()
573
+
574
+ # Loads class data from the.yaml file
504
575
  instance: SessionData = cls.from_yaml(file_path=session_data_path) # type: ignore
505
576
 
506
577
  # The method assumes that the 'donor' .yaml file is always stored inside the raw_data directory of the session
507
- # to be processed. Since the directory itself might have moved (between or even within the same PC) relative to
508
- # where it was when the SessionData snapshot was generated, reconfigures the paths to all raw_data files using
509
- # the root from above.
510
- local_root = session_path.parents[2]
511
-
512
- # RAW DATA
513
- new_root = local_root.joinpath(instance.project_name, instance.animal_id, instance.session_name, "raw_data")
514
- instance.raw_data.resolve_paths(root_directory_path=new_root)
578
+ # to be processed. In turn, that directory is expected to be found under the path root/project/animal/session.
579
+ # The code below uses this heuristic to discover the raw data root based on the session data file path.
580
+ local_root = session_data_path.parents[4] # Raw data root session directory
515
581
 
516
582
  # Unless a different root is provided for processed data, it uses the same root as raw_data.
517
583
  if processed_data_root is None:
518
584
  processed_data_root = local_root
519
585
 
520
- # Regenerates the processed_data path depending on the root resolution above
586
+ # RAW DATA
587
+ instance.raw_data.resolve_paths(
588
+ root_directory_path=local_root.joinpath(
589
+ instance.project_name, instance.animal_id, instance.session_name, "raw_data"
590
+ )
591
+ )
592
+
593
+ # PROCESSED DATA
521
594
  instance.processed_data.resolve_paths(
522
595
  root_directory_path=processed_data_root.joinpath(
523
596
  instance.project_name, instance.animal_id, instance.session_name, "processed_data"
524
597
  )
525
598
  )
526
599
 
527
- # Generates processed data directories if requested and necessary
528
- if make_processed_data_directory:
529
- instance.processed_data.make_directories()
600
+ # SOURCE DATA
601
+ instance.source_data.resolve_paths(
602
+ root_directory_path=processed_data_root.joinpath(
603
+ instance.project_name, instance.animal_id, instance.session_name, "source_data"
604
+ )
605
+ )
606
+ # Note, since source data is populated as part of the 'preparation' runtime, does not make the directories.
607
+
608
+ # ARCHIVED DATA
609
+ instance.archived_data.resolve_paths(
610
+ root_directory_path=local_root.joinpath(
611
+ instance.project_name, instance.animal_id, instance.session_name, "archived_data"
612
+ )
613
+ )
614
+ # Similar to source_data, archived data is populated as part of the 'archiving' pipeline, so directories for
615
+ # this data are not resolved.
616
+
617
+ # If there is no archived processed data, ensures that processed data hierarchy exists.
618
+ if not instance.archived_data.processed_data_path.exists():
619
+ instance.processed_data.make_directories() # Ensures processed data directories exist
620
+
621
+ # TRACKING DATA
622
+ instance.tracking_data.resolve_paths(
623
+ root_directory_path=local_root.joinpath(
624
+ instance.project_name, instance.animal_id, instance.session_name, "tracking_data"
625
+ )
626
+ )
627
+ instance.tracking_data.make_directories() # Ensures tracking data directories exist
530
628
 
531
629
  # Returns the initialized SessionData instance to caller
532
630
  return instance
@@ -557,6 +655,9 @@ class SessionData(YamlConfig):
557
655
  # prevents the SessionData instance from being loaded from the disk.
558
656
  origin.raw_data = None # type: ignore
559
657
  origin.processed_data = None # type: ignore
658
+ origin.source_data = None # type: ignore
659
+ origin.archived_data = None # type: ignore
660
+ origin.tracking_data = None # type: ignore
560
661
 
561
662
  # Converts StringEnum instances to strings
562
663
  origin.session_type = str(origin.session_type)
@@ -567,310 +668,146 @@ class SessionData(YamlConfig):
567
668
 
568
669
 
569
670
  @dataclass()
570
- class ProcessingTracker(YamlConfig):
571
- """Wraps the .yaml file that tracks the state of a data processing runtime and provides tools for communicating the
572
- state between multiple processes in a thread-safe manner.
573
-
574
- Primarily, this tracker class is used by all remote data processing pipelines in the lab to prevent race conditions
575
- and make it impossible to run multiple processing runtimes at the same time. It is also used to evaluate the status
576
- (success / failure) of jobs running on remote compute servers.
577
-
578
- Note:
579
- In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
580
- when a runtime is started, the tracker is switched into the 'running' (locked) state until it is unlocked,
581
- aborted, or encounters an error. When the tracker is locked, only the same manager process as the one that
582
- locked the tracker is allowed to work with session data. This feature allows executing complex processing
583
- pipelines that use multiple concurrent and / or sequential processing jobs on the remote server.
584
-
585
- This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
586
- highest-level process that manages the runtime. When the runtime is executed on remote compute servers, the
587
- manager process is typically the process running on the non-server machine (user PC) that executes the remote
588
- processing job on the compute server (via SSH or similar protocol). The worker process(es) that run the
589
- processing job(s) on the remote compute servers are NOT considered manager processes.
671
+ class SessionLock(YamlConfig):
672
+ """Provides thread-safe session locking to ensure exclusive access during data processing.
673
+
674
+ This class manages a lock file that tracks which manager process currently has exclusive access to a session's data.
675
+ It prevents race conditions when multiple manager processes attempt to modify session data simultaneously.
676
+
677
+ The lock is identified by a manager process ID, allowing distributed processing across multiple jobs while
678
+ maintaining data integrity.
590
679
  """
591
680
 
592
681
  file_path: Path
593
- """Stores the path to the .yaml file used to cache the tracker data on disk. The class instance functions as a
594
- wrapper around the data stored inside the specified .yaml file."""
595
- _complete: bool = False
596
- """Tracks whether the processing runtime managed by this tracker has finished successfully."""
597
- _encountered_error: bool = False
598
- """Tracks whether the processing runtime managed by this tracker has encountered an error and has finished
599
- unsuccessfully."""
600
- _running: bool = False
601
- """Tracks whether the processing runtime managed by this tracker is currently running."""
682
+ """Stores the absolute path to the .yaml file that stores the lock state on disk."""
683
+
602
684
  _manager_id: int = -1
603
- """Stores the xxHash3-64 hash value that represents the unique identifier of the manager process that started the
604
- runtime. The manager process is typically running on a remote control machine (computer) and is used to
605
- support processing runtimes that are distributed over multiple separate batch jobs on the compute server. This
606
- ID should be generated using the 'generate_manager_id()' function exposed by this library."""
685
+ """Stores the unique identifier of the manager process that holds the lock. A value of -1 indicates no lock."""
686
+
607
687
  _lock_path: str = field(init=False)
608
- """Stores the path to the .lock file used to ensure that only a single process can simultaneously access the data
609
- stored inside the tracker file."""
688
+ """Stores the absolute path to the .lock file ensuring thread-safe access to the lock state."""
610
689
 
611
690
  def __post_init__(self) -> None:
612
- # Generates the .lock file path for the target tracker .yaml file.
691
+ """Initializes the lock file path based on the .yaml file path."""
613
692
  if self.file_path is not None:
614
693
  self._lock_path = str(self.file_path.with_suffix(self.file_path.suffix + ".lock"))
615
694
  else:
616
695
  self._lock_path = ""
617
696
 
618
697
  def _load_state(self) -> None:
619
- """Reads the current processing state from the wrapped .YAML file."""
698
+ """Loads the current lock state from the .yaml file."""
620
699
  if self.file_path.exists():
621
- # Loads the data for the state values but does not replace the file path or lock attributes.
622
- instance: ProcessingTracker = self.from_yaml(self.file_path) # type: ignore
623
- self._complete = copy.copy(instance._complete)
624
- self._encountered_error = copy.copy(instance._encountered_error)
625
- self._running = copy.copy(instance._running)
700
+ instance: SessionLock = self.from_yaml(self.file_path) # type: ignore
626
701
  self._manager_id = copy.copy(instance._manager_id)
627
702
  else:
628
- # Otherwise, if the tracker file does not exist, generates a new .yaml file using default instance values
629
- # and saves it to disk using the specified tracker file path.
703
+ # Creates a new lock file with the default state (unlocked)
630
704
  self._save_state()
631
705
 
632
706
  def _save_state(self) -> None:
633
- """Saves the current processing state stored inside instance attributes to the specified .YAML file."""
634
- # Resets the _lock_path and file_path to None before dumping the data to .YAML to avoid issues with loading it
635
- # back.
707
+ """Saves the current lock state to the .yaml file."""
708
+ # Creates a copy without file paths for clean serialization
636
709
  original = copy.deepcopy(self)
637
710
  original.file_path = None # type: ignore
638
711
  original._lock_path = None # type: ignore
639
712
  original.to_yaml(file_path=self.file_path)
640
713
 
641
- def start(self, manager_id: int) -> None:
642
- """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
643
- runtime.
644
-
645
- Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
646
- accessible from the manager process that calls this method. Calling this method for an already running runtime
647
- managed by the same process does not have any effect, so it is safe to call this method at the beginning of
648
- each processing job that makes up the runtime.
714
+ def acquire(self, manager_id: int) -> None:
715
+ """Acquires the session lock for exclusive access.
649
716
 
650
717
  Args:
651
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
652
- tracked by this tracker file.
718
+ manager_id: The unique identifier of the manager process requesting the lock.
653
719
 
654
720
  Raises:
655
- TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
721
+ TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
722
+ process.
723
+ RuntimeError: If the lock is held by another process and forcing lock acquisition is disabled.
656
724
  """
657
- # Acquires the lock
658
725
  lock = FileLock(self._lock_path)
659
726
  with lock.acquire(timeout=10.0):
660
- # Loads tracker state from the .yaml file
661
727
  self._load_state()
662
728
 
663
- # If the runtime is already running from a different process, aborts with an error.
664
- if self._running and manager_id != self._manager_id:
729
+ # Checks if the session is already locked by another process
730
+ if self._manager_id != -1 and self._manager_id != manager_id:
665
731
  message = (
666
- f"Unable to start the processing runtime from the manager process with id {manager_id}. The "
667
- f"{self.file_path.name} tracker file indicates that the manager process with id {self._manager_id} "
668
- f"is currently executing the tracked runtime. Only a single manager process is allowed to execute "
669
- f"the runtime at the same time."
732
+ f"Cannot acquire the session lock for manager process {manager_id}. The {self.file_path.name} "
733
+ f"session lock file indicates The lock is currently held by the manager process "
734
+ f"{self._manager_id}. Call the command that produced this error with the '--reset_lock' flag "
735
+ f"to override this safety feature or wait for the natural lock release."
670
736
  )
671
737
  console.error(message=message, error=RuntimeError)
672
- raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
738
+ raise RuntimeError(message)
673
739
 
674
- # Otherwise, if the runtime is already running for the current manager process, returns without modifying
675
- # the tracker data.
676
- elif self._running and manager_id == self._manager_id:
677
- return
678
-
679
- # Otherwise, locks the runtime for the current manager process and updates the cached tracker data
680
- self._running = True
740
+ # The lock is free or already owned by this manager. If the lock is free, locks the session for the current
741
+ # manager. If it is already owned by this manager, it does nothing.
681
742
  self._manager_id = manager_id
682
- self._complete = False
683
- self._encountered_error = False
684
743
  self._save_state()
685
744
 
686
- def error(self, manager_id: int) -> None:
687
- """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
688
- to complete.
689
-
690
- This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
691
- interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
692
- interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
745
+ def release(self, manager_id: int) -> None:
746
+ """Releases the session lock.
693
747
 
694
748
  Args:
695
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
696
- runtime tracked by this tracker file has encountered an error.
749
+ manager_id: The unique identifier of the manager process releasing the lock.
697
750
 
698
751
  Raises:
699
- TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
752
+ TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
753
+ process.
754
+ RuntimeError: If the lock is held by another process.
700
755
  """
701
756
  lock = FileLock(self._lock_path)
702
757
  with lock.acquire(timeout=10.0):
703
- # Loads tracker state from the .yaml file
704
758
  self._load_state()
705
759
 
706
- # If the runtime is not running, returns without doing anything
707
- if not self._running:
708
- return
709
-
710
- # Ensures that only the active manager process can report runtime errors using the tracker file
711
- if manager_id != self._manager_id:
760
+ if self._manager_id != manager_id:
712
761
  message = (
713
- f"Unable to report that the processing runtime has encountered an error from the manager process "
714
- f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
715
- f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
716
- f"with the runtime."
762
+ f"Unable to release the session lock from the manager with id {manager_id}. The "
763
+ f"{self.file_path.name} session lock file indicates that the lock is held by the process with "
764
+ f"id {self._manager_id}, preventing other processes from interfacing with the session lock."
717
765
  )
718
766
  console.error(message=message, error=RuntimeError)
719
767
  raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
720
768
 
721
- # Indicates that the runtime aborted with an error
722
- self._running = False
769
+ # Releases the lock
723
770
  self._manager_id = -1
724
- self._complete = False
725
- self._encountered_error = True
726
771
  self._save_state()
727
772
 
728
- def stop(self, manager_id: int) -> None:
729
- """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
730
-
731
- This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
732
- also configures the tracker file to indicate that the runtime has been completed successfully, which is used
733
- by the manager processes to detect and handle processing completion.
773
+ def force_release(self) -> None:
774
+ """Forcibly releases the lock regardless of ownership.
734
775
 
735
- Args:
736
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
737
- runtime tracked by this tracker file has been completed successfully.
776
+ This method should only be used for emergency recovery of deadlocked sessions. It can be called by any process
777
+ to unlock the session whose lock is managed by this instance.
738
778
 
739
779
  Raises:
740
- TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
780
+ TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
781
+ process.
741
782
  """
742
783
  lock = FileLock(self._lock_path)
743
784
  with lock.acquire(timeout=10.0):
744
- # Loads tracker state from the .yaml file
745
- self._load_state()
746
-
747
- # If the runtime is not running, does not do anything
748
- if not self._running:
749
- return
750
-
751
- # Ensures that only the active manager process can report runtime completion using the tracker file
752
- if manager_id != self._manager_id:
753
- message = (
754
- f"Unable to report that the processing runtime has completed successfully from the manager process "
755
- f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
756
- f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
757
- f"with the runtime."
758
- )
759
- console.error(message=message, error=RuntimeError)
760
- raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
761
-
762
- # Otherwise, marks the runtime as complete (stopped)
763
- self._running = False
764
785
  self._manager_id = -1
765
- self._complete = True
766
- self._encountered_error = False
767
786
  self._save_state()
768
787
 
769
- def abort(self) -> None:
770
- """Resets the runtime tracker file to the default state.
788
+ @property
789
+ def is_locked(self) -> bool:
790
+ """Returns True if the session is currently locked by any process, False otherwise.
771
791
 
772
- This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
773
- instance methods, this method can be called from any manager process, even if the runtime is already locked by
774
- another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
775
- runtime.
792
+ Raises:
793
+ TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
794
+ process.
776
795
  """
777
796
  lock = FileLock(self._lock_path)
778
797
  with lock.acquire(timeout=10.0):
779
- # Loads tracker state from the .yaml file
780
- self._load_state()
781
-
782
- # Resets the tracker file to the default state. Note, does not indicate that the runtime is complete nor
783
- # that it has encountered an error.
784
- self._running = False
785
- self._manager_id = -1
786
- self._complete = False
787
- self._encountered_error = False
788
- self._save_state()
789
-
790
- @property
791
- def is_complete(self) -> bool:
792
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
793
- successfully and that the runtime is not currently ongoing."""
794
- lock = FileLock(self._lock_path)
795
- with lock.acquire(timeout=10.0):
796
- # Loads tracker state from the .yaml file
797
798
  self._load_state()
798
- return self._complete
799
+ return self._manager_id != -1
799
800
 
800
801
  @property
801
- def encountered_error(self) -> bool:
802
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
803
- to encountering an error."""
804
- lock = FileLock(self._lock_path)
805
- with lock.acquire(timeout=10.0):
806
- # Loads tracker state from the .yaml file
807
- self._load_state()
808
- return self._encountered_error
802
+ def owner(self) -> int | None:
803
+ """Returns the unique identifier of the manager process that holds the lock if the session is locked or None if
804
+ the session is unlocked.
809
805
 
810
- @property
811
- def is_running(self) -> bool:
812
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
813
- ongoing."""
806
+ Raises:
807
+ TimeoutError: If the .lock file cannot be acquired for a long period of time due to being held by another
808
+ process.
809
+ """
814
810
  lock = FileLock(self._lock_path)
815
811
  with lock.acquire(timeout=10.0):
816
- # Loads tracker state from the .yaml file
817
812
  self._load_state()
818
- return self._running
819
-
820
-
821
- def get_processing_tracker(root: Path, file_name: TrackerFileNames | str) -> ProcessingTracker:
822
- """Initializes and returns the ProcessingTracker instance that manages the data stored inside the target processing
823
- tracker file.
824
-
825
- This function uses the input root path and tracker file name to first resolve the absolute path to the .yaml data
826
- cache of the target processing tracker file and then wrap the file into a ProcessingTracker instance. All Sun lab
827
- libraries that use ProcessingTracker instances use this function to access the necessary trackers.
828
-
829
- Notes:
830
- If the target file does not exist, this function will create the file as part of the ProcessingTracker
831
- initialization.
832
-
833
- This function also generates the corresponding .lock file to ensure that the data inside the processing tracker
834
- is accessed by a single process at a time.
835
-
836
- Args:
837
- file_name: The name of the target processing tracker file. Has to be one of the names from the TrackerFileNames
838
- enumeration.
839
- root: The absolute path to the directory where the target file is stored or should be created.
840
-
841
- Returns:
842
- The initialized ProcessingTracker instance that manages the data stored in the target file.
843
- """
844
-
845
- # Prevents using the function for unsupported tracker file names.
846
- supported_files = tuple(TrackerFileNames)
847
- if file_name not in supported_files:
848
- message = (
849
- f"Unable to construct the path to the tracker file {file_name}. The input name is not one of the supported"
850
- f"names. Use one of the supported options provided by the TrackerFileNames enumeration."
851
- )
852
- console.error(message=message, error=ValueError)
853
-
854
- # Constructs and returns the absolute path to the requested tracker file.
855
- tracker_path = root.joinpath(file_name)
856
- return ProcessingTracker(file_path=tracker_path)
857
-
858
-
859
- def generate_manager_id() -> int:
860
- """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
861
- this function.
862
-
863
- The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1
864
- and 9999999999999. This ensures that the identifier is unique for each function call. The generated identifier
865
- string is converted to a unique integer value using the xxHash-64 algorithm before it is returned to the caller.
866
-
867
- Notes:
868
- This function should be used to generate manager process identifiers for working with ProcessingTracker
869
- instances from sl-shared-assets version 4.0.0 and above.
870
- """
871
- timestamp = get_timestamp()
872
- random_number = randint(1, 9999999999999)
873
- manager_id = f"{timestamp}_{random_number}"
874
- id_hash = xxh3_64()
875
- id_hash.update(manager_id)
876
- return id_hash.intdigest()
813
+ return self._manager_id if self._manager_id != -1 else None