sl-shared-assets 3.1.3__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -29,6 +29,20 @@ class SessionTypes(StrEnum):
29
29
  MESOSCOPE_EXPERIMENT = "mesoscope experiment"
30
30
  WINDOW_CHECKING = "window checking"
31
31
 
32
+ class TrackerFileNames(StrEnum):
33
+ """Defines a set of processing tacker .yaml files supported by various Sun lab data preprocessing, processing, and
34
+ dataset formation pipelines.
35
+
36
+ This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
37
+ via the get_processing_tracker() function to generate ProcessingTracker instances.
38
+ """
39
+
40
+ BEHAVIOR = "behavior_processing_tracker.yaml"
41
+ SUITE2P = "suite2p_processing_tracker.yaml"
42
+ DATASET = "dataset_formation_tracker.yaml"
43
+ VIDEO = "video_processing_tracker.yaml"
44
+ INTEGRITY = "integrity_verification_tracker.yaml"
45
+
32
46
  @dataclass()
33
47
  class RawData:
34
48
  """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
@@ -60,7 +74,6 @@ class RawData:
60
74
  telomere_path: Path = ...
61
75
  ubiquitin_path: Path = ...
62
76
  nk_path: Path = ...
63
- integrity_verification_tracker_path: Path = ...
64
77
  def resolve_paths(self, root_directory_path: Path) -> None:
65
78
  """Resolves all paths managed by the class instance based on the input root directory path.
66
79
 
@@ -91,9 +104,6 @@ class ProcessedData:
91
104
  camera_data_path: Path = ...
92
105
  mesoscope_data_path: Path = ...
93
106
  behavior_data_path: Path = ...
94
- suite2p_processing_tracker_path: Path = ...
95
- behavior_processing_tracker_path: Path = ...
96
- video_processing_tracker_path: Path = ...
97
107
  p53_path: Path = ...
98
108
  def resolve_paths(self, root_directory_path: Path) -> None:
99
109
  """Resolves all paths managed by the class instance based on the input root directory path.
@@ -133,8 +143,8 @@ class SessionData(YamlConfig):
133
143
  animal_id: str
134
144
  session_name: str
135
145
  session_type: str | SessionTypes
136
- acquisition_system: str | AcquisitionSystems
137
- experiment_name: str | None
146
+ acquisition_system: str | AcquisitionSystems = ...
147
+ experiment_name: str | None = ...
138
148
  python_version: str = ...
139
149
  sl_experiment_version: str = ...
140
150
  raw_data: RawData = field(default_factory=Incomplete)
@@ -223,7 +233,7 @@ class SessionData(YamlConfig):
223
233
  that did not fully initialize during runtime. This service method is designed to be called by the sl-experiment
224
234
  library classes to remove the 'nk.bin' marker when it is safe to do so. It should not be called by end-users.
225
235
  """
226
- def _save(self) -> None:
236
+ def save(self) -> None:
227
237
  """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
228
238
 
229
239
  This is used to save the data stored in the instance to disk so that it can be reused during further stages of
@@ -237,66 +247,133 @@ class ProcessingTracker(YamlConfig):
237
247
  state between multiple processes in a thread-safe manner.
238
248
 
239
249
  Primarily, this tracker class is used by all remote data processing pipelines in the lab to prevent race conditions
240
- and make it impossible to run multiple processing runtimes at the same time.
250
+ and make it impossible to run multiple processing runtimes at the same time. It is also used to evaluate the status
251
+ (success / failure) of jobs running on remote compute servers.
252
+
253
+ Note:
254
+ In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
255
+ when a runtime is started, the tracker is switched into the 'running' (locked) state until it is unlocked,
256
+ aborted, or encounters an error. When the tracker is locked, only the same manager process as the one that
257
+ locked the tracker is allowed to work with session data. This feature allows executing complex processing
258
+ pipelines that use multiple concurrent and / or sequential processing jobs on the remote server.
259
+
260
+ This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
261
+ highest-level process that manages the runtime. When the runtime is executed on remote compute servers, the
262
+ manager process is typically the process running on the non-server machine (user PC) that executes the remote
263
+ processing job on the compute server (via SSH or similar protocol). The worker process(es) that run the
264
+ processing job(s) on the remote compute servers are NOT considered manager processes.
241
265
  """
242
266
 
243
267
  file_path: Path
244
- _is_complete: bool = ...
268
+ _complete: bool = ...
245
269
  _encountered_error: bool = ...
246
- _is_running: bool = ...
270
+ _running: bool = ...
271
+ _manager_id: int = ...
247
272
  _lock_path: str = field(init=False)
248
- _started_runtime: bool = ...
249
273
  def __post_init__(self) -> None: ...
250
- def __del__(self) -> None:
251
- """If the instance as used to start a runtime, ensures that the instance properly marks the runtime as completed
252
- or erred before being garbage-collected.
253
-
254
- This is a security mechanism to prevent deadlocking the processed session and pipeline for future runtimes.
255
- """
256
274
  def _load_state(self) -> None:
257
275
  """Reads the current processing state from the wrapped .YAML file."""
258
276
  def _save_state(self) -> None:
259
277
  """Saves the current processing state stored inside instance attributes to the specified .YAML file."""
260
- def start(self) -> None:
261
- """Configures the tracker file to indicate that the tracked processing runtime is currently running.
278
+ def start(self, manager_id: int) -> None:
279
+ """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
280
+ runtime.
281
+
282
+ Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
283
+ accessible from the manager process that calls this method. Calling this method for an already running runtime
284
+ managed by the same process does not have any effect, so it is safe to call this method at the beginning of
285
+ each processing job that makes up the runtime.
262
286
 
263
- All further attempts to start the same processing runtime for the same session's data will automatically abort
264
- with an error.
287
+ Args:
288
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
289
+ tracked by this tracker file.
265
290
 
266
291
  Raises:
267
292
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
268
293
  """
269
- def error(self) -> None:
294
+ def error(self, manager_id: int) -> None:
270
295
  """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
271
296
  to complete.
272
297
 
273
- This method will only work for an active runtime. When called for an active runtime, it expects the runtime to
274
- be aborted with an error after the method returns. It configures the target tracker to allow other processes
275
- to restart the runtime at any point after this method returns, so it is UNSAFE to do any further processing
276
- from the process that calls this method.
298
+ This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
299
+ interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
300
+ interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
301
+
302
+ Args:
303
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
304
+ runtime tracked by this tracker file has encountered an error.
277
305
 
278
306
  Raises:
279
307
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
280
308
  """
281
- def stop(self) -> None:
309
+ def stop(self, manager_id: int) -> None:
282
310
  """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
283
311
 
284
- After this method returns, it is UNSAFE to do any further processing from the process that calls this method.
285
- Any process that calls the 'start' method of this class is expected to also call this method or 'error' method
286
- at the end of the runtime.
312
+ This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
313
+ also configures the tracker file to indicate that the runtime has been completed successfully, which is used
314
+ by the manager processes to detect and handle processing completion.
315
+
316
+ Args:
317
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
318
+ runtime tracked by this tracker file has been completed successfully.
287
319
 
288
320
  Raises:
289
321
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
290
322
  """
323
+ def abort(self) -> None:
324
+ """Resets the runtime tracker file to the default state.
325
+
326
+ This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
327
+ instance methods, this method can be called from any manager process, even if the runtime is already locked by
328
+ another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
329
+ runtime.
330
+ """
291
331
  @property
292
332
  def is_complete(self) -> bool:
293
333
  """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
294
- successfully at least once and that there is no ongoing processing that uses the target session."""
334
+ successfully and that the runtime is not currently ongoing."""
295
335
  @property
296
336
  def encountered_error(self) -> bool:
297
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime for the target
298
- session has aborted due to encountering an error."""
337
+ """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
338
+ to encountering an error."""
299
339
  @property
300
340
  def is_running(self) -> bool:
301
341
  """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
302
- running for the target session."""
342
+ ongoing."""
343
+
344
+ def get_processing_tracker(root: Path, file_name: TrackerFileNames | str) -> ProcessingTracker:
345
+ """Initializes and returns the ProcessingTracker instance that manages the data stored inside the target processing
346
+ tracker file.
347
+
348
+ This function uses the input root path and tracker file name to first resolve the absolute path to the .yaml data
349
+ cache of the target processing tracker file and then wrap the file into a ProcessingTracker instance. All Sun lab
350
+ libraries that use ProcessingTracker instances use this function to access the necessary trackers.
351
+
352
+ Notes:
353
+ If the target file does not exist, this function will create the file as part of the ProcessingTracker
354
+ initialization.
355
+
356
+ This function also generates the corresponding .lock file to ensure that the data inside the processing tracker
357
+ is accessed by a single process at a time.
358
+
359
+ Args:
360
+ file_name: The name of the target processing tracker file. Has to be one of the names from the TrackerFileNames
361
+ enumeration.
362
+ root: The absolute path to the directory where the target file is stored or should be created.
363
+
364
+ Returns:
365
+ The initialized ProcessingTracker instance that manages the data stored in the target file.
366
+ """
367
+
368
+ def generate_manager_id() -> int:
369
+ """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
370
+ this function.
371
+
372
+ The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1
373
+ and 9999999999999. This ensures that the identifier is unique for each function call. The generated identifier
374
+ string is converted to a unique integer value using the xxHash-64 algorithm before it is returned to the caller.
375
+
376
+ Notes:
377
+ This function should be used to generate manager process identifiers for working with ProcessingTracker
378
+ instances from sl-shared-assets version 4.0.0 and above.
379
+ """
@@ -56,7 +56,7 @@ class ProcedureData:
56
56
 
57
57
  @dataclass
58
58
  class ImplantData:
59
- """Stores the information about a single implantation performed during the surgical intervention.
59
+ """Stores the information about a single implantation procedure performed during the surgical intervention.
60
60
 
61
61
  Multiple ImplantData instances are used at the same time if the surgery involved multiple implants.
62
62
  """
@@ -65,7 +65,7 @@ class ImplantData:
65
65
  """The descriptive name of the implant."""
66
66
  implant_target: str
67
67
  """The name of the brain region or cranium section targeted by the implant."""
68
- implant_code: int
68
+ implant_code: str
69
69
  """The manufacturer code or internal reference code for the implant. This code is used to identify the implant in
70
70
  additional datasheets and lab ordering documents."""
71
71
  implant_ap_coordinate_mm: float
@@ -89,7 +89,7 @@ class InjectionData:
89
89
  """The name of the brain region targeted by the injection."""
90
90
  injection_volume_nl: float
91
91
  """The volume of substance, in nanoliters, delivered during the injection."""
92
- injection_code: int
92
+ injection_code: str
93
93
  """The manufacturer code or internal reference code for the injected substance. This code is used to identify the
94
94
  substance in additional datasheets and lab ordering documents."""
95
95
  injection_ap_coordinate_mm: float
@@ -108,22 +108,22 @@ class DrugData:
108
108
 
109
109
  lactated_ringers_solution_volume_ml: float
110
110
  """Stores the volume of Lactated Ringer's Solution (LRS) administered during surgery, in ml."""
111
- lactated_ringers_solution_code: int
111
+ lactated_ringers_solution_code: str
112
112
  """Stores the manufacturer code or internal reference code for Lactated Ringer's Solution (LRS). This code is used
113
113
  to identify the LRS batch in additional datasheets and lab ordering documents."""
114
114
  ketoprofen_volume_ml: float
115
115
  """Stores the volume of ketoprofen diluted with saline administered during surgery, in ml."""
116
- ketoprofen_code: int
116
+ ketoprofen_code: str
117
117
  """Stores the manufacturer code or internal reference code for ketoprofen. This code is used to identify the
118
118
  ketoprofen batch in additional datasheets and lab ordering documents."""
119
119
  buprenorphine_volume_ml: float
120
120
  """Stores the volume of buprenorphine diluted with saline administered during surgery, in ml."""
121
- buprenorphine_code: int
121
+ buprenorphine_code: str
122
122
  """Stores the manufacturer code or internal reference code for buprenorphine. This code is used to identify the
123
123
  buprenorphine batch in additional datasheets and lab ordering documents."""
124
124
  dexamethasone_volume_ml: float
125
125
  """Stores the volume of dexamethasone diluted with saline administered during surgery, in ml."""
126
- dexamethasone_code: int
126
+ dexamethasone_code: str
127
127
  """Stores the manufacturer code or internal reference code for dexamethasone. This code is used to identify the
128
128
  dexamethasone batch in additional datasheets and lab ordering documents."""
129
129
 
@@ -30,14 +30,14 @@ class ProcedureData:
30
30
 
31
31
  @dataclass
32
32
  class ImplantData:
33
- """Stores the information about a single implantation performed during the surgical intervention.
33
+ """Stores the information about a single implantation procedure performed during the surgical intervention.
34
34
 
35
35
  Multiple ImplantData instances are used at the same time if the surgery involved multiple implants.
36
36
  """
37
37
 
38
38
  implant: str
39
39
  implant_target: str
40
- implant_code: int
40
+ implant_code: str
41
41
  implant_ap_coordinate_mm: float
42
42
  implant_ml_coordinate_mm: float
43
43
  implant_dv_coordinate_mm: float
@@ -52,7 +52,7 @@ class InjectionData:
52
52
  injection: str
53
53
  injection_target: str
54
54
  injection_volume_nl: float
55
- injection_code: int
55
+ injection_code: str
56
56
  injection_ap_coordinate_mm: float
57
57
  injection_ml_coordinate_mm: float
58
58
  injection_dv_coordinate_mm: float
@@ -64,13 +64,13 @@ class DrugData:
64
64
  """
65
65
 
66
66
  lactated_ringers_solution_volume_ml: float
67
- lactated_ringers_solution_code: int
67
+ lactated_ringers_solution_code: str
68
68
  ketoprofen_volume_ml: float
69
- ketoprofen_code: int
69
+ ketoprofen_code: str
70
70
  buprenorphine_volume_ml: float
71
- buprenorphine_code: int
71
+ buprenorphine_code: str
72
72
  dexamethasone_volume_ml: float
73
- dexamethasone_code: int
73
+ dexamethasone_code: str
74
74
 
75
75
  @dataclass
76
76
  class SurgeryData(YamlConfig):
@@ -393,8 +393,8 @@ class Server:
393
393
  # The file doesn't exist yet or job initialization failed
394
394
  if self.job_complete(job):
395
395
  message = (
396
- f"Remote jupyter server job {job.job_name} with id {job.job_id} encountered a startup and "
397
- f"was terminated prematurely."
396
+ f"Remote jupyter server job {job.job_name} with id {job.job_id} encountered a startup "
397
+ f"error and was terminated prematurely."
398
398
  )
399
399
  console.error(message, RuntimeError)
400
400
 
@@ -10,6 +10,8 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
10
10
  from tqdm import tqdm
11
11
  import xxhash
12
12
 
13
+ from ..data_classes import TrackerFileNames
14
+
13
15
  # Defines a 'blacklist' set of files. Primarily, this list contains the service files that may change after the session
14
16
  # data has been acquired. Therefore, it does not make sense to include them in the checksum, as they do not reflect the
15
17
  # data that should remain permanently unchanged. Note, make sure all service files are added to this set!
@@ -19,16 +21,13 @@ _excluded_files = {
19
21
  "telomere.bin",
20
22
  "p53.bin",
21
23
  "nk.bin",
22
- "suite2p_processing_tracker.yaml",
23
- "dataset_formation_tracker.yaml",
24
- "video_processing_tracker.yaml",
25
- "integrity_verification_tracker.yaml",
26
- "suite2p_processing_tracker.yaml.lock",
27
- "behavior_processing_tracker.yaml.lock",
28
- "video_processing_tracker.yaml.lock",
29
- "integrity_verification_tracker.yaml.lock",
30
24
  }
31
25
 
26
+ # Extends the exclusion set to include all tracker .yaml files and their concurrent access .lock files.
27
+ for name in tuple(TrackerFileNames):
28
+ _excluded_files.add(name)
29
+ _excluded_files.add(f"{name}.lock")
30
+
32
31
 
33
32
  def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
34
33
  """Calculates xxHash3-128 checksum for a single file and its path relative to the base directory.
@@ -2,6 +2,8 @@ from pathlib import Path
2
2
 
3
3
  from _typeshed import Incomplete
4
4
 
5
+ from ..data_classes import TrackerFileNames as TrackerFileNames
6
+
5
7
  _excluded_files: Incomplete
6
8
 
7
9
  def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]: