sl-shared-assets 4.0.1__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (39) hide show
  1. sl_shared_assets/__init__.py +45 -42
  2. sl_shared_assets/command_line_interfaces/__init__.py +3 -0
  3. sl_shared_assets/command_line_interfaces/configure.py +173 -0
  4. sl_shared_assets/command_line_interfaces/manage.py +226 -0
  5. sl_shared_assets/data_classes/__init__.py +33 -32
  6. sl_shared_assets/data_classes/configuration_data.py +267 -79
  7. sl_shared_assets/data_classes/session_data.py +226 -289
  8. sl_shared_assets/server/__init__.py +24 -4
  9. sl_shared_assets/server/job.py +6 -7
  10. sl_shared_assets/server/pipeline.py +570 -0
  11. sl_shared_assets/server/server.py +57 -25
  12. sl_shared_assets/tools/__init__.py +9 -8
  13. sl_shared_assets/tools/packaging_tools.py +14 -25
  14. sl_shared_assets/tools/project_management_tools.py +602 -523
  15. sl_shared_assets/tools/transfer_tools.py +88 -23
  16. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/METADATA +46 -203
  17. sl_shared_assets-5.0.0.dist-info/RECORD +23 -0
  18. sl_shared_assets-5.0.0.dist-info/entry_points.txt +3 -0
  19. sl_shared_assets/__init__.pyi +0 -91
  20. sl_shared_assets/cli.py +0 -501
  21. sl_shared_assets/cli.pyi +0 -106
  22. sl_shared_assets/data_classes/__init__.pyi +0 -75
  23. sl_shared_assets/data_classes/configuration_data.pyi +0 -235
  24. sl_shared_assets/data_classes/runtime_data.pyi +0 -157
  25. sl_shared_assets/data_classes/session_data.pyi +0 -379
  26. sl_shared_assets/data_classes/surgery_data.pyi +0 -89
  27. sl_shared_assets/server/__init__.pyi +0 -11
  28. sl_shared_assets/server/job.pyi +0 -205
  29. sl_shared_assets/server/server.pyi +0 -298
  30. sl_shared_assets/tools/__init__.pyi +0 -19
  31. sl_shared_assets/tools/ascension_tools.py +0 -265
  32. sl_shared_assets/tools/ascension_tools.pyi +0 -68
  33. sl_shared_assets/tools/packaging_tools.pyi +0 -58
  34. sl_shared_assets/tools/project_management_tools.pyi +0 -239
  35. sl_shared_assets/tools/transfer_tools.pyi +0 -53
  36. sl_shared_assets-4.0.1.dist-info/RECORD +0 -36
  37. sl_shared_assets-4.0.1.dist-info/entry_points.txt +0 -7
  38. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/WHEEL +0 -0
  39. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,8 +1,28 @@
1
- """This package provides the classes and methods used by all Sun lab libraries to submit remote jobs to the BioHPC
2
- and other compute servers. This package is also used across all Sun lab members' private code to interface with the
3
- shared server."""
1
+ """This package provides the classes and methods used by all Sun lab libraries to work with the data stored on remote
2
+ compute servers, such as the BioHPC server. It provides tools for submitting and monitoring jobs, running complex
3
+ processing pipelines and interactively working with the data via a Jupyter lab server."""
4
4
 
5
5
  from .job import Job, JupyterJob
6
6
  from .server import Server, ServerCredentials, generate_server_credentials
7
+ from .pipeline import (
8
+ ProcessingStatus,
9
+ TrackerFileNames,
10
+ ProcessingTracker,
11
+ ProcessingPipeline,
12
+ ProcessingPipelines,
13
+ generate_manager_id,
14
+ )
7
15
 
8
- __all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job", "JupyterJob"]
16
+ __all__ = [
17
+ "Job",
18
+ "JupyterJob",
19
+ "ProcessingPipeline",
20
+ "ProcessingPipelines",
21
+ "ProcessingStatus",
22
+ "ProcessingTracker",
23
+ "Server",
24
+ "ServerCredentials",
25
+ "TrackerFileNames",
26
+ "generate_manager_id",
27
+ "generate_server_credentials",
28
+ ]
@@ -7,7 +7,6 @@ Since version 3.0.0, this module also provides the specialized JupyterJob class
7
7
  notebook servers.
8
8
  """
9
9
 
10
- # noinspection PyProtectedMember
11
10
  import re
12
11
  from pathlib import Path
13
12
  import datetime
@@ -49,7 +48,7 @@ class _JupyterConnectionInfo:
49
48
 
50
49
 
51
50
  class Job:
52
- """Aggregates the data of a single SLURM-managed job to be executed on the Sun lab BioHPC cluster.
51
+ """Aggregates the data of a single SLURM-managed job to be executed on the Sun lab's remote compute server.
53
52
 
54
53
  This class provides the API for constructing any server-side job in the Sun lab. Internally, it wraps an instance
55
54
  of a Slurm class to package the job data into the format expected by the SLURM job manager. All jobs managed by this
@@ -222,7 +221,7 @@ class JupyterJob(Job):
222
221
  connection_info: Stores the JupyterConnectionInfo instance after the Jupyter server is instantiated.
223
222
  host: Stores the hostname of the remote server.
224
223
  user: Stores the username used to connect with the remote server.
225
- connection_info_file: The absolute path to the file that stores connection information, relative to the remote
224
+ connection_info_file: The absolute path to the file that stores connection information relative to the remote
226
225
  server root.
227
226
  _command: Stores the shell command for launching the Jupyter server.
228
227
  """
@@ -273,12 +272,12 @@ class JupyterJob(Job):
273
272
  """Builds the command to launch the Jupyter notebook server on the remote Sun lab server."""
274
273
 
275
274
  # Gets the hostname of the compute node and caches it in the connection data file. Also caches the port name.
276
- self.add_command('echo "COMPUTE_NODE: $(hostname)" > {}'.format(self.connection_info_file))
277
- self.add_command('echo "PORT: {}" >> {}'.format(self.port, self.connection_info_file))
275
+ self.add_command(f'echo "COMPUTE_NODE: $(hostname)" > {self.connection_info_file}')
276
+ self.add_command(f'echo "PORT: {self.port}" >> {self.connection_info_file}')
278
277
 
279
278
  # Generates a random access token for security and caches it in the connection data file.
280
279
  self.add_command("TOKEN=$(openssl rand -hex 24)")
281
- self.add_command('echo "TOKEN: $TOKEN" >> {}'.format(self.connection_info_file))
280
+ self.add_command(f'echo "TOKEN: $TOKEN" >> {self.connection_info_file}')
282
281
 
283
282
  # Builds Jupyter startup command.
284
283
  jupyter_cmd = [
@@ -312,7 +311,7 @@ class JupyterJob(Job):
312
311
  information to be parsed.
313
312
  """
314
313
 
315
- with open(info_file, "r") as f:
314
+ with info_file.open() as f:
316
315
  content = f.read()
317
316
 
318
317
  # Extracts information using regex
@@ -0,0 +1,570 @@
1
+ """This module provides tools and classes for running complex data processing pipelines on remote compute servers.
2
+ A Pipeline represents a higher unit of abstraction relative to the Job class, often leveraging multiple sequential or
3
+ parallel processing jobs to conduct the required processing."""
4
+
5
+ import copy
6
+ from enum import IntEnum, StrEnum
7
+ from random import randint
8
+ import shutil as sh
9
+ from pathlib import Path
10
+ from dataclasses import field, dataclass
11
+
12
+ from xxhash import xxh3_64
13
+ from filelock import FileLock
14
+ from ataraxis_base_utilities import console, ensure_directory_exists
15
+ from ataraxis_data_structures import YamlConfig
16
+ from ataraxis_time.time_helpers import get_timestamp
17
+
18
+ from .job import Job
19
+ from .server import Server
20
+
21
+
22
+ class TrackerFileNames(StrEnum):
23
+ """Defines a set of processing tacker .yaml files used by the Sun lab data preprocessing, processing, and dataset
24
+ formation pipelines to track the progress of the remotely executed pipelines.
25
+
26
+ This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
27
+ via the get_processing_tracker() function to generate ProcessingTracker instances.
28
+
29
+ Notes:
30
+ The elements in this enumeration match the elements in the ProcessingPipelines enumeration, since each valid
31
+ ProcessingPipeline instance has an associated ProcessingTracker file instance.
32
+ """
33
+
34
+ MANIFEST = "manifest_generation_tracker.yaml"
35
+ """This file is used to track the state of the project manifest generation pipeline."""
36
+ CHECKSUM = "checksum_resolution_tracker.yaml"
37
+ """This file is used to track the state of the checksum resolution pipeline."""
38
+ PREPARATION = "processing_preparation_tracker.yaml"
39
+ """This file is used to track the state of the data processing preparation pipeline."""
40
+ BEHAVIOR = "behavior_processing_tracker.yaml"
41
+ """This file is used to track the state of the behavior log processing pipeline."""
42
+ SUITE2P = "suite2p_processing_tracker.yaml"
43
+ """This file is used to track the state of the single-day suite2p processing pipeline."""
44
+ VIDEO = "video_processing_tracker.yaml"
45
+ """This file is used to track the state of the video (DeepLabCut) processing pipeline."""
46
+ FORGING = "dataset_forging_tracker.yaml"
47
+ """This file is used to track the state of the dataset creation (forging) pipeline."""
48
+ MULTIDAY = "multiday_processing_tracker.yaml"
49
+ """This file is used to track the state of the multiday suite2p processing pipeline."""
50
+ ARCHIVING = "data_archiving_tracker.yaml"
51
+ """This file is used to track the state of the data archiving pipeline."""
52
+
53
+
54
+ class ProcessingPipelines(StrEnum):
55
+ """Defines the set of processing pipelines currently supported in the Sun lab.
56
+
57
+ All processing pipelines currently supported by the lab codebase are defined in this enumeration. Primarily,
58
+ the elements from this enumeration are used in terminal messages and data logging entries to identify the pipelines
59
+ to the user.
60
+
61
+ Notes:
62
+ The elements in this enumeration match the elements in the ProcessingTracker enumeration, since each valid
63
+ ProcessingPipeline instance has an associated ProcessingTracker file instance.
64
+
65
+ The order of pipelines in this enumeration loosely follows the sequence in which they are executed during the
66
+ lifetime of the Sun lab data on the remote compute server.
67
+ """
68
+
69
+ MANIFEST = "manifest generation"
70
+ """Project manifest generation pipeline. This pipeline is generally not used in most runtime contexts. It allows
71
+ manually regenerating the project manifest .feather file, which is typically only used during testing. All other
72
+ pipeline automatically conduct the manifest (re)generation at the end of their runtime."""
73
+ CHECKSUM = "checksum resolution"
74
+ """Checksum resolution pipeline. Primarily, it is used to verify that the raw data has been transferred to the
75
+ remote storage server from the main acquisition system PC intact. This pipeline is sometimes also used to
76
+ regenerate (re-checksum) the data stored on the remote compute server."""
77
+ PREPARATION = "processing preparation"
78
+ """Data processing preparation pipeline. Since the compute server uses a two-volume design with a slow (HDD) storage
79
+ volume and a fast (NVME) working volume, to optimize data processing performance, the data needs to be transferred
80
+ to the working volume before processing. This pipeline copies the raw data for the target session from the storage
81
+ volume to the working volume."""
82
+ BEHAVIOR = "behavior processing"
83
+ """Behavior processing pipeline. This pipeline is used to process .npz log files to extract animal behavior data
84
+ acquired during a single session (day). The processed logs also contain the timestamps use to synchronize behavior
85
+ to video and mesoscope frame data, and experiment configuration and task information."""
86
+ SUITE2P = "single-day suite2p processing"
87
+ """Single-day suite2p pipeline. This pipeline is used to extract the cell activity data from 2-photon imaging data
88
+ acquired during a single session (day)."""
89
+ VIDEO = "video processing"
90
+ """DeepLabCut (Video) processing pipeline. This pipeline is used to extract animal pose estimation data from the
91
+ behavior video frames acquired during a single session (day)."""
92
+ MULTIDAY = "multi-day suite2p processing"
93
+ """Multi-day suite2p processing (cell tracking) pipeline. This pipeline is used to track cells processed with the
94
+ single-day suite2p pipelines across multiple days. It is executed for all sessions marked for integration into the
95
+ same dataset as the first step of dataset creation."""
96
+ FORGING = "dataset forging"
97
+ """Dataset creation (forging) pipeline. This pipeline typically runs after the multi-day pipeline. It extracts and
98
+ integrates the processed data from various sources such as brain activity, behavior, videos, etc., into a unified
99
+ dataset."""
100
+ ARCHIVING = "data archiving"
101
+ """Data archiving pipeline. To conserve the (limited) space on the fast working volume, once the data has been
102
+ processed and integrated into a stable dataset, the processed data folder is moved to the storage volume and all
103
+ folders under the root session folder on the processed data volume are deleted."""
104
+
105
+
106
+ class ProcessingStatus(IntEnum):
107
+ """Maps integer-based processing pipeline status (state) codes to human-readable names.
108
+
109
+ This enumeration is used to track and communicate the progress of Sun lab processing pipelines as they are executed
110
+ by the remote compute server. Specifically, the codes from this enumeration are used by the ProcessingPipeline
111
+ class to communicate the status of the managed pipelines to external processes.
112
+
113
+ Notes:
114
+ The status codes from this enumeration track the state of the pipeline as a whole, instead of tracking the
115
+ state of each job that comprises the pipeline.
116
+ """
117
+
118
+ RUNNING = 0
119
+ """The pipeline is currently running on the remote server. It may be executed (in progress) or waiting for
120
+ the required resources to become available (queued)."""
121
+ SUCCEEDED = 1
122
+ """The server has successfully completed the processing pipeline."""
123
+ FAILED = 2
124
+ """The server has failed to complete the pipeline due to a runtime error."""
125
+ ABORTED = 3
126
+ """The pipeline execution has been aborted prematurely, either by the manager process or due to an overriding
127
+ request from another user."""
128
+
129
+
130
+ @dataclass()
131
+ class ProcessingTracker(YamlConfig):
132
+ """Wraps the .yaml file that tracks the state of a data processing pipeline and provides tools for communicating the
133
+ state between multiple processes in a thread-safe manner.
134
+
135
+ This class is used by all data processing pipelines running on the remote compute server(s) to prevent race
136
+ conditions and ensure that pipelines have exclusive access to the processed data. It is also used to evaluate the
137
+ status (success / failure) of each pipeline as they are executed by the remote server.
138
+
139
+ Note:
140
+ In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
141
+ when a pipeline starts running on the remote server, its tracker is switched into the 'running' (locked) state
142
+ until the pipeline completes, aborts, or encounters an error. When the tracker is locked, all modifications to
143
+ the tracker or processed data have to originate from the same process that started the pipeline that locked the
144
+ tracker file. This feature supports running complex processing pipelines that use multiple concurrent and / or
145
+ sequential processing jobs on the remote server.
146
+
147
+ This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
148
+ highest-level process that manages the tracked pipeline. When a pipeline runs on remote compute servers, the
149
+ manager process is typically the process running on the non-server machine (user PC) that submits the remote
150
+ processing jobs to the compute server (via SSH or similar protocol). The worker process(es) that run the
151
+ processing job(s) on the remote compute servers are NOT considered manager processes.
152
+ """
153
+
154
+ file_path: Path
155
+ """Stores the path to the .yaml file used to cache the tracker data on disk. The class instance functions as a
156
+ wrapper around the data stored inside the specified .yaml file."""
157
+ _complete: bool = False
158
+ """Tracks whether the processing runtime managed by this tracker has finished successfully."""
159
+ _encountered_error: bool = False
160
+ """Tracks whether the processing runtime managed by this tracker has encountered an error and has finished
161
+ unsuccessfully."""
162
+ _running: bool = False
163
+ """Tracks whether the processing runtime managed by this tracker is currently running."""
164
+ _manager_id: int = -1
165
+ """Stores the xxHash3-64 hash value that represents the unique identifier of the manager process that started the
166
+ runtime. The manager process is typically running on a remote control machine (computer) and is used to
167
+ support processing runtimes that are distributed over multiple separate batch jobs on the compute server. This
168
+ ID should be generated using the 'generate_manager_id()' function exposed by this library."""
169
+ _lock_path: str = field(init=False)
170
+ """Stores the path to the .lock file used to ensure that only a single process can simultaneously access the data
171
+ stored inside the tracker file."""
172
+
173
+ def __post_init__(self) -> None:
174
+ # Generates the .lock file path for the target tracker .yaml file.
175
+ if self.file_path is not None:
176
+ self._lock_path = str(self.file_path.with_suffix(self.file_path.suffix + ".lock"))
177
+
178
+ # Ensures that the input processing tracker file name is supported.
179
+ if self.file_path.name not in tuple(TrackerFileNames):
180
+ message = (
181
+ f"Unsupported processing tracker file encountered when instantiating a ProcessingTracker "
182
+ f"instance: {self.file_path}. Currently, only the following tracker file names are "
183
+ f"supported: {', '.join(tuple(TrackerFileNames))}."
184
+ )
185
+ console.error(message=message, error=ValueError)
186
+
187
+ else:
188
+ self._lock_path = ""
189
+
190
+ def _load_state(self) -> None:
191
+ """Reads the current processing state from the wrapped .YAML file."""
192
+ if self.file_path.exists():
193
+ # Loads the data for the state values but does not replace the file path or lock attributes.
194
+ instance: ProcessingTracker = self.from_yaml(self.file_path) # type: ignore
195
+ self._complete = copy.copy(instance._complete)
196
+ self._encountered_error = copy.copy(instance._encountered_error)
197
+ self._running = copy.copy(instance._running)
198
+ self._manager_id = copy.copy(instance._manager_id)
199
+ else:
200
+ # Otherwise, if the tracker file does not exist, generates a new .yaml file using default instance values
201
+ # and saves it to disk using the specified tracker file path.
202
+ self._save_state()
203
+
204
+ def _save_state(self) -> None:
205
+ """Saves the current processing state stored inside instance attributes to the specified .YAML file."""
206
+ # Resets the _lock_path and file_path to None before dumping the data to .YAML to avoid issues with loading it
207
+ # back.
208
+ original = copy.deepcopy(self)
209
+ original.file_path = None # type: ignore
210
+ original._lock_path = None # type: ignore
211
+ original.to_yaml(file_path=self.file_path)
212
+
213
+ def start(self, manager_id: int) -> None:
214
+ """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
215
+ runtime.
216
+
217
+ Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
218
+ accessible from the manager process that calls this method. Calling this method for an already running runtime
219
+ managed by the same process does not have any effect, so it is safe to call this method at the beginning of
220
+ each processing job that makes up the runtime.
221
+
222
+ Args:
223
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
224
+ tracked by this tracker file.
225
+
226
+ Raises:
227
+ TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
228
+ """
229
+ # Acquires the lock
230
+ lock = FileLock(self._lock_path)
231
+ with lock.acquire(timeout=10.0):
232
+ # Loads tracker state from the .yaml file
233
+ self._load_state()
234
+
235
+ # If the runtime is already running from a different process, aborts with an error.
236
+ if self._running and manager_id != self._manager_id:
237
+ message = (
238
+ f"Unable to start the processing runtime from the manager process with id {manager_id}. The "
239
+ f"{self.file_path.name} tracker file indicates that the manager process with id {self._manager_id} "
240
+ f"is currently executing the tracked runtime. Only a single manager process is allowed to execute "
241
+ f"the runtime at the same time."
242
+ )
243
+ console.error(message=message, error=RuntimeError)
244
+ raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
245
+
246
+ # Otherwise, if the runtime is already running for the current manager process, returns without modifying
247
+ # the tracker data.
248
+ elif self._running and manager_id == self._manager_id:
249
+ return
250
+
251
+ # Otherwise, locks the runtime for the current manager process and updates the cached tracker data
252
+ self._running = True
253
+ self._manager_id = manager_id
254
+ self._complete = False
255
+ self._encountered_error = False
256
+ self._save_state()
257
+
258
+ def error(self, manager_id: int) -> None:
259
+ """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
260
+ to complete.
261
+
262
+ This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
263
+ interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
264
+ interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
265
+
266
+ Args:
267
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
268
+ runtime tracked by this tracker file has encountered an error.
269
+
270
+ Raises:
271
+ TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
272
+ """
273
+ lock = FileLock(self._lock_path)
274
+ with lock.acquire(timeout=10.0):
275
+ # Loads tracker state from the .yaml file
276
+ self._load_state()
277
+
278
+ # If the runtime is not running, returns without doing anything
279
+ if not self._running:
280
+ return
281
+
282
+ # Ensures that only the active manager process can report runtime errors using the tracker file
283
+ if manager_id != self._manager_id:
284
+ message = (
285
+ f"Unable to report that the processing runtime has encountered an error from the manager process "
286
+ f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
287
+ f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
288
+ f"with the runtime."
289
+ )
290
+ console.error(message=message, error=RuntimeError)
291
+ raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
292
+
293
+ # Indicates that the runtime aborted with an error
294
+ self._running = False
295
+ self._manager_id = -1
296
+ self._complete = False
297
+ self._encountered_error = True
298
+ self._save_state()
299
+
300
+ def stop(self, manager_id: int) -> None:
301
+ """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
302
+
303
+ This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
304
+ also configures the tracker file to indicate that the runtime has been completed successfully, which is used
305
+ by the manager processes to detect and handle processing completion.
306
+
307
+ Args:
308
+ manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
309
+ runtime tracked by this tracker file has been completed successfully.
310
+
311
+ Raises:
312
+ TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
313
+ """
314
+ lock = FileLock(self._lock_path)
315
+ with lock.acquire(timeout=10.0):
316
+ # Loads tracker state from the .yaml file
317
+ self._load_state()
318
+
319
+ # If the runtime is not running, does not do anything
320
+ if not self._running:
321
+ return
322
+
323
+ # Ensures that only the active manager process can report runtime completion using the tracker file
324
+ if manager_id != self._manager_id:
325
+ message = (
326
+ f"Unable to report that the processing runtime has completed successfully from the manager process "
327
+ f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
328
+ f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
329
+ f"with the runtime."
330
+ )
331
+ console.error(message=message, error=RuntimeError)
332
+ raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
333
+
334
+ # Otherwise, marks the runtime as complete (stopped)
335
+ self._running = False
336
+ self._manager_id = -1
337
+ self._complete = True
338
+ self._encountered_error = False
339
+ self._save_state()
340
+
341
+ def abort(self) -> None:
342
+ """Resets the runtime tracker file to the default state.
343
+
344
+ This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
345
+ instance methods, this method can be called from any manager process, even if the runtime is already locked by
346
+ another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
347
+ runtime.
348
+ """
349
+ lock = FileLock(self._lock_path)
350
+ with lock.acquire(timeout=10.0):
351
+ # Loads tracker state from the .yaml file.
352
+ self._load_state()
353
+
354
+ # Resets the tracker file to the default state. Note, does not indicate that the runtime completed nor
355
+ # that it has encountered an error.
356
+ self._running = False
357
+ self._manager_id = -1
358
+ self._complete = False
359
+ self._encountered_error = False
360
+ self._save_state()
361
+
362
+ @property
363
+ def is_complete(self) -> bool:
364
+ """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
365
+ successfully and that the runtime is not currently ongoing."""
366
+ lock = FileLock(self._lock_path)
367
+ with lock.acquire(timeout=10.0):
368
+ # Loads tracker state from the .yaml file
369
+ self._load_state()
370
+ return self._complete
371
+
372
+ @property
373
+ def encountered_error(self) -> bool:
374
+ """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
375
+ to encountering an error."""
376
+ lock = FileLock(self._lock_path)
377
+ with lock.acquire(timeout=10.0):
378
+ # Loads tracker state from the .yaml file
379
+ self._load_state()
380
+ return self._encountered_error
381
+
382
+ @property
383
+ def is_running(self) -> bool:
384
+ """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
385
+ ongoing."""
386
+ lock = FileLock(self._lock_path)
387
+ with lock.acquire(timeout=10.0):
388
+ # Loads tracker state from the .yaml file
389
+ self._load_state()
390
+ return self._running
391
+
392
+
393
+ @dataclass()
394
+ class ProcessingPipeline:
395
+ """Encapsulates access to a processing pipeline running on the remote compute server.
396
+
397
+ This class functions as an interface for all data processing pipelines running on Sun lab compute servers. It is
398
+ pipeline-type-agnostic and works for all data processing pipelines supported by this library. After instantiation,
399
+ the class automatically handles all interactions with the server necessary to run the remote processing pipeline and
400
+ verify the runtime outcome via the runtime_cycle() method that has to be called cyclically until the pipeline is
401
+ complete.
402
+
403
+ Notes:
404
+ Each pipeline may be executed in one or more stages, each stage using one or more parallel jobs. As such, each
405
+ pipeline can be seen as an execution graph that sequentially submits batches of jobs to the remote server. The
406
+ processing graph for each pipeline is fully resolved at the instantiation of this class instance, so each
407
+ instance contains the necessary data to run the entire processing pipeline.
408
+
409
+ The minimum self-contained unit of the processing pipeline is a single job. Since jobs can depend on the output
410
+ of other jobs, they are organized into stages based on the dependency graph between jobs. Combined with cluster
411
+ management software, such as SLURM, this class can efficiently execute processing pipelines on scalable compute
412
+ clusters.
413
+ """
414
+
415
+ pipeline_type: ProcessingPipelines
416
+ """Stores the name of the processing pipeline managed by this instance. Primarily, this is used to identify the
417
+ pipeline to the user in terminal messages and logs."""
418
+ server: Server
419
+ """Stores the reference to the Server object that maintains bidirectional communication with the remote server
420
+ running the pipeline."""
421
+ manager_id: int
422
+ """The unique identifier for the manager process that constructs and manages the runtime of the tracked pipeline.
423
+ This is used to ensure that only a single pipeline instance can work with each session's data at the same time on
424
+ the remote server."""
425
+ jobs: dict[int, tuple[tuple[Job, Path], ...]]
426
+ """Stores the dictionary that maps the pipeline processing stage integer-codes to two-element tuples. Each tuple
427
+ stores the Job objects and the paths to their remote working directories to be submitted to the server at each
428
+ stage."""
429
+ remote_tracker_path: Path
430
+ """The path to the pipeline's processing tracker .yaml file stored on the remote compute server."""
431
+ local_tracker_path: Path
432
+ """The path to the pipeline's processing tracker .yaml file on the local machine. The remote file is pulled to
433
+ this location when the instance verifies the outcome of each tracked pipeline's processing stage."""
434
+ session: str
435
+ """The ID of the session whose data is being processed by the tracked pipeline."""
436
+ animal: str
437
+ """The ID of the animal whose data is being processed by the tracked pipeline."""
438
+ project: str
439
+ """The name of the project whose data is being processed by the tracked pipeline."""
440
+ keep_job_logs: bool = False
441
+ """Determines whether to keep the logs for the jobs making up the pipeline execution graph or (default) to remove
442
+ them after pipeline successfully ends its runtime. If the pipeline fails to complete its runtime, the logs are kept
443
+ regardless of this setting."""
444
+ pipeline_status: ProcessingStatus | int = ProcessingStatus.RUNNING
445
+ """Stores the current status of the tracked remote pipeline. This field is updated each time runtime_cycle()
446
+ instance method is called."""
447
+ _pipeline_stage: int = 0
448
+ """Stores the current stage of the tracked pipeline. This field is monotonically incremented by the runtime_cycle()
449
+ method to sequentially submit batches of jobs to the server in a processing-stage-driven fashion."""
450
+
451
+ def __post_init__(self) -> None:
452
+ """Carries out the necessary filesystem setup tasks to support pipeline execution."""
453
+
454
+ # Ensures that the input processing tracker file name is supported.
455
+ if self.pipeline_type not in tuple(ProcessingPipelines):
456
+ message = (
457
+ f"Unsupported processing pipeline type encountered when instantiating a ProcessingPipeline "
458
+ f"instance: {self.pipeline_type}. Currently, only the following pipeline types are "
459
+ f"supported: {', '.join(tuple(ProcessingPipelines))}."
460
+ )
461
+ console.error(message=message, error=ValueError)
462
+
463
+ ensure_directory_exists(self.local_tracker_path) # Ensures that the local temporary directory exists
464
+
465
+ def runtime_cycle(self) -> None:
466
+ """Checks the current status of the tracked pipeline and, if necessary, submits additional batches of jobs to
467
+ the remote server to progress the pipeline.
468
+
469
+ This method is the main entry point for all interactions with the processing pipeline managed by this instance.
470
+ It checks the current state of the pipeline, advances the pipeline's processing stage, and submits the necessary
471
+ jobs to the remote server. The runtime manager process should call this method repeatedly (cyclically) to run
472
+ the pipeline until the 'is_running' property of the instance returns True.
473
+
474
+ Notes:
475
+ While the 'is_running' property can be used to determine whether the pipeline is still running, to resolve
476
+ the final status of the pipeline (success or failure), the manager process should access the
477
+ 'status' instance property.
478
+ """
479
+
480
+ # This clause is executed the first time the method is called for the newly initialized pipeline tracker
481
+ # instance. It submits the first batch of processing jobs (first stage) to the remote server. For one-stage
482
+ # pipelines, this is the only time when pipeline jobs are submitted to the server.
483
+ if self._pipeline_stage == 0:
484
+ self._pipeline_stage += 1
485
+ self._submit_jobs()
486
+
487
+ # Waits until all jobs submitted to the server as part of the current processing stage are completed before
488
+ # advancing further.
489
+ for job, _ in self.jobs[self._pipeline_stage]: # Ignores working directories as part of this iteration.
490
+ if not self.server.job_complete(job=job):
491
+ return
492
+
493
+ # If all jobs for the current processing stage have completed, checks the pipeline's processing tracker file to
494
+ # determine if all jobs completed successfully.
495
+ self.server.pull_file(remote_file_path=self.remote_tracker_path, local_file_path=self.local_tracker_path)
496
+ tracker = ProcessingTracker(self.local_tracker_path)
497
+
498
+ # If the stage failed due to encountering an error, removes the local tracker copy and marks the pipeline
499
+ # as 'failed'. It is expected that the pipeline state is then handed by the manager process to notify the
500
+ # user about the runtime failure.
501
+ if tracker.encountered_error:
502
+ sh.rmtree(self.local_tracker_path.parent) # Removes local temporary data
503
+ self.pipeline_status = ProcessingStatus.FAILED # Updates the processing status to 'failed'
504
+
505
+ # If this was the last processing stage, the tracker indicates that the processing has been completed. In this
506
+ # case, initializes the shutdown sequence:
507
+ elif tracker.is_complete:
508
+ sh.rmtree(self.local_tracker_path.parent) # Removes local temporary data
509
+ self.pipeline_status = ProcessingStatus.SUCCEEDED # Updates the job status to 'succeeded'
510
+
511
+ # If the pipeline was configured to remove logs after completing successfully, removes the runtime log for
512
+ # each job submitted as part of this pipeline from the remote server.
513
+ if not self.keep_job_logs:
514
+ for stage_jobs in self.jobs.values():
515
+ for _, directory in stage_jobs: # Ignores job objects as part of this iteration.
516
+ self.server.remove(remote_path=directory, recursive=True, is_dir=True)
517
+
518
+ # If the processing is not complete (according to the tracker), this indicates that the pipeline has more
519
+ # stages to execute. In this case, increments the processing stage tracker and submits the next batch of jobs
520
+ # to the server.
521
+ elif tracker.is_running:
522
+ self._pipeline_stage += 1
523
+ self._submit_jobs()
524
+
525
+ # The final and the rarest state: the pipeline was aborted before it finished the runtime. Generally, this state
526
+ # should not be encountered during most runtimes.
527
+ else:
528
+ self.pipeline_status = ProcessingStatus.ABORTED
529
+
530
+ def _submit_jobs(self) -> None:
531
+ """This worker method submits the processing jobs for the currently active processing stage to the remote
532
+ server.
533
+
534
+ It is used internally by the runtime_cycle() method to iteratively execute all stages of the managed processing
535
+ pipeline on the remote server.
536
+ """
537
+ for job, _ in self.jobs[self._pipeline_stage]:
538
+ self.server.submit_job(job=job, verbose=False) # Silences terminal printouts
539
+
540
+ @property
541
+ def is_running(self) -> bool:
542
+ """Returns True if the pipeline is currently running, False otherwise."""
543
+ if self.pipeline_status == ProcessingStatus.RUNNING:
544
+ return True
545
+ return False
546
+
547
+ @property
548
+ def status(self) -> ProcessingStatus:
549
+ """Returns the current status of the pipeline packaged into a ProcessingStatus instance."""
550
+ return ProcessingStatus(self.pipeline_status)
551
+
552
+
553
+ def generate_manager_id() -> int:
554
+ """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
555
+ this function.
556
+
557
+ The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1
558
+ and 9999999999999. This ensures that the identifier is unique for each function call. The generated identifier
559
+ string is converted to a unique integer value using the xxHash-64 algorithm before it is returned to the caller.
560
+
561
+ Notes:
562
+ This function should be used to generate manager process identifiers for working with ProcessingTracker
563
+ instances from sl-shared-assets version 4.0.0 and above.
564
+ """
565
+ timestamp = get_timestamp()
566
+ random_number = randint(1, 9999999999999)
567
+ manager_id = f"{timestamp}_{random_number}"
568
+ id_hash = xxh3_64()
569
+ id_hash.update(manager_id)
570
+ return id_hash.intdigest()