sl-shared-assets 5.0.0__py3-none-any.whl → 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
- """This module provides tools and classes for running complex data processing pipelines on remote compute servers.
2
- A Pipeline represents a higher unit of abstraction relative to the Job class, often leveraging multiple sequential or
3
- parallel processing jobs to conduct the required processing."""
1
+ """This module provides tools used to run complex data processing pipelines on remote compute servers. A processing
2
+ pipeline represents a higher unit of abstraction relative to the Job class, often leveraging multiple sequential or
3
+ parallel jobs to process the data."""
4
4
 
5
5
  import copy
6
6
  from enum import IntEnum, StrEnum
@@ -20,11 +20,8 @@ from .server import Server
20
20
 
21
21
 
22
22
  class TrackerFileNames(StrEnum):
23
- """Defines a set of processing tacker .yaml files used by the Sun lab data preprocessing, processing, and dataset
24
- formation pipelines to track the progress of the remotely executed pipelines.
25
-
26
- This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
27
- via the get_processing_tracker() function to generate ProcessingTracker instances.
23
+ """Stores the names of the processing tacker .yaml files used by the Sun lab data preprocessing, processing, and
24
+ dataset formation pipelines to track the pipeline's progress.
28
25
 
29
26
  Notes:
30
27
  The elements in this enumeration match the elements in the ProcessingPipelines enumeration, since each valid
@@ -52,18 +49,14 @@ class TrackerFileNames(StrEnum):
52
49
 
53
50
 
54
51
  class ProcessingPipelines(StrEnum):
55
- """Defines the set of processing pipelines currently supported in the Sun lab.
56
-
57
- All processing pipelines currently supported by the lab codebase are defined in this enumeration. Primarily,
58
- the elements from this enumeration are used in terminal messages and data logging entries to identify the pipelines
59
- to the user.
52
+ """Stores the names of the data processing pipelines currently used in the lab.
60
53
 
61
54
  Notes:
62
- The elements in this enumeration match the elements in the ProcessingTracker enumeration, since each valid
55
+ The elements in this enumeration match the elements in the TrackerFileNames enumeration, since each valid
63
56
  ProcessingPipeline instance has an associated ProcessingTracker file instance.
64
57
 
65
58
  The order of pipelines in this enumeration loosely follows the sequence in which they are executed during the
66
- lifetime of the Sun lab data on the remote compute server.
59
+ Sun lab data workflow.
67
60
  """
68
61
 
69
62
  MANIFEST = "manifest generation"
@@ -72,8 +65,8 @@ class ProcessingPipelines(StrEnum):
72
65
  pipeline automatically conduct the manifest (re)generation at the end of their runtime."""
73
66
  CHECKSUM = "checksum resolution"
74
67
  """Checksum resolution pipeline. Primarily, it is used to verify that the raw data has been transferred to the
75
- remote storage server from the main acquisition system PC intact. This pipeline is sometimes also used to
76
- regenerate (re-checksum) the data stored on the remote compute server."""
68
+ remote storage server from the main acquisition system PC intact. This pipeline is also used to regenerate
69
+ (re-checksum) the data stored on the remote compute server."""
77
70
  PREPARATION = "processing preparation"
78
71
  """Data processing preparation pipeline. Since the compute server uses a two-volume design with a slow (HDD) storage
79
72
  volume and a fast (NVME) working volume, to optimize data processing performance, the data needs to be transferred
@@ -81,8 +74,7 @@ class ProcessingPipelines(StrEnum):
81
74
  volume to the working volume."""
82
75
  BEHAVIOR = "behavior processing"
83
76
  """Behavior processing pipeline. This pipeline is used to process .npz log files to extract animal behavior data
84
- acquired during a single session (day). The processed logs also contain the timestamps use to synchronize behavior
85
- to video and mesoscope frame data, and experiment configuration and task information."""
77
+ acquired during a single session (day)."""
86
78
  SUITE2P = "single-day suite2p processing"
87
79
  """Single-day suite2p pipeline. This pipeline is used to extract the cell activity data from 2-photon imaging data
88
80
  acquired during a single session (day)."""
@@ -91,24 +83,22 @@ class ProcessingPipelines(StrEnum):
91
83
  behavior video frames acquired during a single session (day)."""
92
84
  MULTIDAY = "multi-day suite2p processing"
93
85
  """Multi-day suite2p processing (cell tracking) pipeline. This pipeline is used to track cells processed with the
94
- single-day suite2p pipelines across multiple days. It is executed for all sessions marked for integration into the
95
- same dataset as the first step of dataset creation."""
86
+ single-day suite2p pipelines across multiple days."""
96
87
  FORGING = "dataset forging"
97
88
  """Dataset creation (forging) pipeline. This pipeline typically runs after the multi-day pipeline. It extracts and
98
- integrates the processed data from various sources such as brain activity, behavior, videos, etc., into a unified
99
- dataset."""
89
+ integrates the processed data from all sources into a unified dataset."""
100
90
  ARCHIVING = "data archiving"
101
- """Data archiving pipeline. To conserve the (limited) space on the fast working volume, once the data has been
102
- processed and integrated into a stable dataset, the processed data folder is moved to the storage volume and all
103
- folders under the root session folder on the processed data volume are deleted."""
91
+ """Data archiving pipeline. To conserve the (limited) space on the remote compute server's fast working volume,
92
+ once the data has been processed and integrated into a stable dataset, the processed data folder is moved to the
93
+ storage volume. After the data is moved, all folders under the root session folder on the processed data volume are
94
+ deleted to free up the processing volume space."""
104
95
 
105
96
 
106
97
  class ProcessingStatus(IntEnum):
107
98
  """Maps integer-based processing pipeline status (state) codes to human-readable names.
108
99
 
109
- This enumeration is used to track and communicate the progress of Sun lab processing pipelines as they are executed
110
- by the remote compute server. Specifically, the codes from this enumeration are used by the ProcessingPipeline
111
- class to communicate the status of the managed pipelines to external processes.
100
+ The codes from this enumeration are used by the ProcessingPipeline class to communicate the status of the managed
101
+ pipelines to manager processes that oversee the execution of each pipeline.
112
102
 
113
103
  Notes:
114
104
  The status codes from this enumeration track the state of the pipeline as a whole, instead of tracking the
@@ -129,46 +119,51 @@ class ProcessingStatus(IntEnum):
129
119
 
130
120
  @dataclass()
131
121
  class ProcessingTracker(YamlConfig):
132
- """Wraps the .yaml file that tracks the state of a data processing pipeline and provides tools for communicating the
133
- state between multiple processes in a thread-safe manner.
122
+ """Wraps the .yaml file that tracks the state of a data processing pipeline and provides tools for communicating
123
+ this state between multiple processes in a thread-safe manner.
134
124
 
135
125
  This class is used by all data processing pipelines running on the remote compute server(s) to prevent race
136
- conditions and ensure that pipelines have exclusive access to the processed data. It is also used to evaluate the
137
- status (success / failure) of each pipeline as they are executed by the remote server.
126
+ conditions. It is also used to evaluate the status (success / failure) of each pipeline as they are executed by the
127
+ remote server.
138
128
 
139
129
  Note:
140
- In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
141
- when a pipeline starts running on the remote server, its tracker is switched into the 'running' (locked) state
142
- until the pipeline completes, aborts, or encounters an error. When the tracker is locked, all modifications to
143
- the tracker or processed data have to originate from the same process that started the pipeline that locked the
144
- tracker file. This feature supports running complex processing pipelines that use multiple concurrent and / or
145
- sequential processing jobs on the remote server.
146
-
147
- This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
130
+ This instance frequently refers to the 'manager process' in method documentation. A 'manager process' is the
148
131
  highest-level process that manages the tracked pipeline. When a pipeline runs on remote compute servers, the
149
132
  manager process is typically the process running on the non-server machine (user PC) that submits the remote
150
- processing jobs to the compute server (via SSH or similar protocol). The worker process(es) that run the
151
- processing job(s) on the remote compute servers are NOT considered manager processes.
133
+ processing jobs to the compute server. The worker process(es) that run the processing job(s) on the remote
134
+ compute servers are not considered manager processes.
135
+
136
+ The processing trackers work similar to 'lock' files. When a pipeline starts running on the remote server, its
137
+ tracker is switched into the 'running' (locked) state until the pipeline completes, aborts, or encounters an
138
+ error. When the tracker is locked, all modifications to the tracker have to originate from the same manager
139
+ process that started the pipeline. This feature supports running complex processing pipelines that use multiple
140
+ concurrent and / or sequential processing jobs on the remote server.
152
141
  """
153
142
 
154
143
  file_path: Path
155
144
  """Stores the path to the .yaml file used to cache the tracker data on disk. The class instance functions as a
156
145
  wrapper around the data stored inside the specified .yaml file."""
157
146
  _complete: bool = False
158
- """Tracks whether the processing runtime managed by this tracker has finished successfully."""
147
+ """Tracks whether the processing pipeline managed by this tracker has finished successfully."""
159
148
  _encountered_error: bool = False
160
- """Tracks whether the processing runtime managed by this tracker has encountered an error and has finished
149
+ """Tracks whether the processing pipeline managed by this tracker has encountered an error and has finished
161
150
  unsuccessfully."""
162
151
  _running: bool = False
163
- """Tracks whether the processing runtime managed by this tracker is currently running."""
152
+ """Tracks whether the processing pipeline managed by this tracker is currently running."""
164
153
  _manager_id: int = -1
165
154
  """Stores the xxHash3-64 hash value that represents the unique identifier of the manager process that started the
166
- runtime. The manager process is typically running on a remote control machine (computer) and is used to
155
+ pipeline. The manager process is typically running on a remote control machine (computer) and is used to
167
156
  support processing runtimes that are distributed over multiple separate batch jobs on the compute server. This
168
157
  ID should be generated using the 'generate_manager_id()' function exposed by this library."""
169
158
  _lock_path: str = field(init=False)
170
159
  """Stores the path to the .lock file used to ensure that only a single process can simultaneously access the data
171
160
  stored inside the tracker file."""
161
+ _job_count: int = 1
162
+ """Stores the total number of jobs to be executed as part of the tracked pipeline. This is used to
163
+ determine when the tracked pipeline is fully complete when tracking intermediate job outcomes."""
164
+ _completed_jobs: int = 0
165
+ """Stores the total number of jobs completed by the tracked pipeline. This is used together with the '_job_count'
166
+ field to determine when the tracked pipeline is fully complete."""
172
167
 
173
168
  def __post_init__(self) -> None:
174
169
  # Generates the .lock file path for the target tracker .yaml file.
@@ -210,18 +205,19 @@ class ProcessingTracker(YamlConfig):
210
205
  original._lock_path = None # type: ignore
211
206
  original.to_yaml(file_path=self.file_path)
212
207
 
213
- def start(self, manager_id: int) -> None:
208
+ def start(self, manager_id: int, job_count: int = 1) -> None:
214
209
  """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
215
- runtime.
210
+ pipeline.
216
211
 
217
- Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
218
- accessible from the manager process that calls this method. Calling this method for an already running runtime
219
- managed by the same process does not have any effect, so it is safe to call this method at the beginning of
220
- each processing job that makes up the runtime.
212
+ Calling this method locks the tracked session and processing pipeline combination to only be accessible from the
213
+ manager process that calls this method. Calling this method for an already running pipeline managed by the same
214
+ process does not have any effect, so it is safe to call this method at the beginning of each processing job that
215
+ makes up the pipeline.
221
216
 
222
217
  Args:
223
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
224
- tracked by this tracker file.
218
+ manager_id: The unique identifier of the manager process which attempts to start the pipeline tracked by
219
+ this tracker file.
220
+ job_count: The total number of jobs to be executed as part of the tracked pipeline.
225
221
 
226
222
  Raises:
227
223
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
@@ -232,40 +228,41 @@ class ProcessingTracker(YamlConfig):
232
228
  # Loads tracker state from the .yaml file
233
229
  self._load_state()
234
230
 
235
- # If the runtime is already running from a different process, aborts with an error.
231
+ # If the pipeline is already running from a different process, aborts with an error.
236
232
  if self._running and manager_id != self._manager_id:
237
233
  message = (
238
- f"Unable to start the processing runtime from the manager process with id {manager_id}. The "
234
+ f"Unable to start the processing pipeline from the manager process with id {manager_id}. The "
239
235
  f"{self.file_path.name} tracker file indicates that the manager process with id {self._manager_id} "
240
- f"is currently executing the tracked runtime. Only a single manager process is allowed to execute "
241
- f"the runtime at the same time."
236
+ f"is currently executing the tracked pipeline. Only a single manager process is allowed to execute "
237
+ f"the pipeline at the same time."
242
238
  )
243
239
  console.error(message=message, error=RuntimeError)
244
240
  raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
245
241
 
246
- # Otherwise, if the runtime is already running for the current manager process, returns without modifying
242
+ # Otherwise, if the pipeline is already running for the current manager process, returns without modifying
247
243
  # the tracker data.
248
244
  elif self._running and manager_id == self._manager_id:
249
245
  return
250
246
 
251
- # Otherwise, locks the runtime for the current manager process and updates the cached tracker data
247
+ # Otherwise, locks the pipeline for the current manager process and updates the cached tracker data
252
248
  self._running = True
253
249
  self._manager_id = manager_id
254
250
  self._complete = False
255
251
  self._encountered_error = False
252
+ self._job_count = job_count
256
253
  self._save_state()
257
254
 
258
255
  def error(self, manager_id: int) -> None:
259
- """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
256
+ """Configures the tracker file to indicate that the tracked processing pipeline encountered an error and failed
260
257
  to complete.
261
258
 
262
- This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
263
- interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
264
- interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
259
+ This method unlocks the pipeline, allowing other manager processes to interface with the tracked pipeline. It
260
+ also updates the tracker file to reflect that the pipeline was interrupted due to an error, which is used by the
261
+ manager processes to detect and handle processing failures.
265
262
 
266
263
  Args:
267
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
268
- runtime tracked by this tracker file has encountered an error.
264
+ manager_id: The unique identifier of the manager process which attempts to report that the pipeline tracked
265
+ by this tracker file has encountered an error.
269
266
 
270
267
  Raises:
271
268
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
@@ -275,22 +272,22 @@ class ProcessingTracker(YamlConfig):
275
272
  # Loads tracker state from the .yaml file
276
273
  self._load_state()
277
274
 
278
- # If the runtime is not running, returns without doing anything
275
+ # If the pipeline is not running, returns without doing anything
279
276
  if not self._running:
280
277
  return
281
278
 
282
- # Ensures that only the active manager process can report runtime errors using the tracker file
279
+ # Ensures that only the active manager process can report pipeline errors using the tracker file
283
280
  if manager_id != self._manager_id:
284
281
  message = (
285
- f"Unable to report that the processing runtime has encountered an error from the manager process "
286
- f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
282
+ f"Unable to report that the processing pipeline has encountered an error from the manager process "
283
+ f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the pipeline is "
287
284
  f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
288
- f"with the runtime."
285
+ f"with the pipeline."
289
286
  )
290
287
  console.error(message=message, error=RuntimeError)
291
288
  raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
292
289
 
293
- # Indicates that the runtime aborted with an error
290
+ # Indicates that the pipeline aborted with an error
294
291
  self._running = False
295
292
  self._manager_id = -1
296
293
  self._complete = False
@@ -298,15 +295,19 @@ class ProcessingTracker(YamlConfig):
298
295
  self._save_state()
299
296
 
300
297
  def stop(self, manager_id: int) -> None:
301
- """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
298
+ """Configures the tracker file to indicate that the tracked processing pipeline has been completed successfully.
302
299
 
303
- This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
304
- also configures the tracker file to indicate that the runtime has been completed successfully, which is used
300
+ This method unlocks the pipeline, allowing other manager processes to interface with the tracked pipeline. It
301
+ also configures the tracker file to indicate that the pipeline has been completed successfully, which is used
305
302
  by the manager processes to detect and handle processing completion.
306
303
 
304
+ Notes:
305
+ This method tracks how many jobs executed as part of the tracked pipeline have been completed and only
306
+ marks the pipeline as complete if all it's processing jobs have been completed.
307
+
307
308
  Args:
308
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
309
- runtime tracked by this tracker file has been completed successfully.
309
+ manager_id: The unique identifier of the manager process which attempts to report that the pipeline tracked
310
+ by this tracker file has been completed successfully.
310
311
 
311
312
  Raises:
312
313
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
@@ -316,53 +317,59 @@ class ProcessingTracker(YamlConfig):
316
317
  # Loads tracker state from the .yaml file
317
318
  self._load_state()
318
319
 
319
- # If the runtime is not running, does not do anything
320
+ # If the pipeline is not running, does not do anything
320
321
  if not self._running:
321
322
  return
322
323
 
323
- # Ensures that only the active manager process can report runtime completion using the tracker file
324
+ # Ensures that only the active manager process can report pipeline completion using the tracker file
324
325
  if manager_id != self._manager_id:
325
326
  message = (
326
- f"Unable to report that the processing runtime has completed successfully from the manager process "
327
- f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
328
- f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
329
- f"with the runtime."
327
+ f"Unable to report that the processing pipeline has completed successfully from the manager "
328
+ f"process with id {manager_id}. The {self.file_path.name} tracker file indicates that the pipeline "
329
+ f"is managed by the process with id {self._manager_id}, preventing other processes from "
330
+ f"interfacing with the pipeline."
330
331
  )
331
332
  console.error(message=message, error=RuntimeError)
332
333
  raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
333
334
 
334
- # Otherwise, marks the runtime as complete (stopped)
335
- self._running = False
336
- self._manager_id = -1
337
- self._complete = True
338
- self._encountered_error = False
339
- self._save_state()
335
+ # Increments completed job tracker
336
+ self._completed_jobs += 1
337
+
338
+ # If the pipeline has completed all required jobs, marks the pipeline as complete (stopped)
339
+ if self._completed_jobs >= self._job_count:
340
+ self._running = False
341
+ self._manager_id = -1
342
+ self._complete = True
343
+ self._encountered_error = False
344
+ self._save_state()
340
345
 
341
346
  def abort(self) -> None:
342
- """Resets the runtime tracker file to the default state.
347
+ """Resets the pipeline tracker file to the default state.
343
348
 
344
- This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
345
- instance methods, this method can be called from any manager process, even if the runtime is already locked by
346
- another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
347
- runtime.
349
+ This method can be used to reset the pipeline tracker file, regardless of the current pipeline state. Unlike
350
+ other instance methods, this method can be called from any manager process, even if the pipeline is already
351
+ locked by another process. This method is only intended to be used in the case of emergency to unlock a
352
+ deadlocked pipeline.
348
353
  """
349
354
  lock = FileLock(self._lock_path)
350
355
  with lock.acquire(timeout=10.0):
351
356
  # Loads tracker state from the .yaml file.
352
357
  self._load_state()
353
358
 
354
- # Resets the tracker file to the default state. Note, does not indicate that the runtime completed nor
359
+ # Resets the tracker file to the default state. Note, does not indicate that the pipeline completed nor
355
360
  # that it has encountered an error.
356
361
  self._running = False
357
362
  self._manager_id = -1
363
+ self._completed_jobs = 0
364
+ self._job_count = 1
358
365
  self._complete = False
359
366
  self._encountered_error = False
360
367
  self._save_state()
361
368
 
362
369
  @property
363
370
  def is_complete(self) -> bool:
364
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
365
- successfully and that the runtime is not currently ongoing."""
371
+ """Returns True if the tracker wrapped by the instance indicates that the processing pipeline has been completed
372
+ successfully and that the pipeline is not currently ongoing."""
366
373
  lock = FileLock(self._lock_path)
367
374
  with lock.acquire(timeout=10.0):
368
375
  # Loads tracker state from the .yaml file
@@ -371,7 +378,7 @@ class ProcessingTracker(YamlConfig):
371
378
 
372
379
  @property
373
380
  def encountered_error(self) -> bool:
374
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
381
+ """Returns True if the tracker wrapped by the instance indicates that the processing pipeline has aborted due
375
382
  to encountering an error."""
376
383
  lock = FileLock(self._lock_path)
377
384
  with lock.acquire(timeout=10.0):
@@ -381,7 +388,7 @@ class ProcessingTracker(YamlConfig):
381
388
 
382
389
  @property
383
390
  def is_running(self) -> bool:
384
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
391
+ """Returns True if the tracker wrapped by the instance indicates that the processing pipeline is currently
385
392
  ongoing."""
386
393
  lock = FileLock(self._lock_path)
387
394
  with lock.acquire(timeout=10.0):
@@ -392,19 +399,19 @@ class ProcessingTracker(YamlConfig):
392
399
 
393
400
  @dataclass()
394
401
  class ProcessingPipeline:
395
- """Encapsulates access to a processing pipeline running on the remote compute server.
402
+ """Provides an interface to construct and execute data processing pipelines on the target remote compute server.
396
403
 
397
404
  This class functions as an interface for all data processing pipelines running on Sun lab compute servers. It is
398
- pipeline-type-agnostic and works for all data processing pipelines supported by this library. After instantiation,
399
- the class automatically handles all interactions with the server necessary to run the remote processing pipeline and
405
+ pipeline-type-agnostic and works for all data processing pipelines used in the lab. After instantiation, the class
406
+ automatically handles all interactions with the server necessary to run the remote processing pipeline and
400
407
  verify the runtime outcome via the runtime_cycle() method that has to be called cyclically until the pipeline is
401
408
  complete.
402
409
 
403
410
  Notes:
404
- Each pipeline may be executed in one or more stages, each stage using one or more parallel jobs. As such, each
405
- pipeline can be seen as an execution graph that sequentially submits batches of jobs to the remote server. The
406
- processing graph for each pipeline is fully resolved at the instantiation of this class instance, so each
407
- instance contains the necessary data to run the entire processing pipeline.
411
+ Each pipeline is executed as a series of one or more stages with each stage using one or more parallel jobs.
412
+ Therefore, each pipeline can be seen as an execution graph that sequentially submits batches of jobs to the
413
+ remote server. The processing graph for each pipeline is fully resolved at the instantiation of this class, so
414
+ each instance contains the necessary data to run the entire processing pipeline.
408
415
 
409
416
  The minimum self-contained unit of the processing pipeline is a single job. Since jobs can depend on the output
410
417
  of other jobs, they are organized into stages based on the dependency graph between jobs. Combined with cluster
@@ -416,27 +423,24 @@ class ProcessingPipeline:
416
423
  """Stores the name of the processing pipeline managed by this instance. Primarily, this is used to identify the
417
424
  pipeline to the user in terminal messages and logs."""
418
425
  server: Server
419
- """Stores the reference to the Server object that maintains bidirectional communication with the remote server
420
- running the pipeline."""
426
+ """Store the reference to the Server object used to interface with the remote server running the pipeline."""
421
427
  manager_id: int
422
- """The unique identifier for the manager process that constructs and manages the runtime of the tracked pipeline.
423
- This is used to ensure that only a single pipeline instance can work with each session's data at the same time on
424
- the remote server."""
428
+ """The unique identifier for the manager process that constructs and manages the runtime of the tracked pipeline."""
425
429
  jobs: dict[int, tuple[tuple[Job, Path], ...]]
426
430
  """Stores the dictionary that maps the pipeline processing stage integer-codes to two-element tuples. Each tuple
427
- stores the Job objects and the paths to their remote working directories to be submitted to the server at each
428
- stage."""
431
+ stores the Job object and the path to its remote working directory to be submitted to the server as part of that
432
+ executing that stage."""
429
433
  remote_tracker_path: Path
430
- """The path to the pipeline's processing tracker .yaml file stored on the remote compute server."""
434
+ """Stores the path to the pipeline's processing tracker .yaml file stored on the remote compute server."""
431
435
  local_tracker_path: Path
432
- """The path to the pipeline's processing tracker .yaml file on the local machine. The remote file is pulled to
433
- this location when the instance verifies the outcome of each tracked pipeline's processing stage."""
436
+ """Stores the path to the pipeline's processing tracker .yaml file on the local machine. The remote file is
437
+ pulled to this location when the instance verifies the outcome of the tracked processing pipeline."""
434
438
  session: str
435
- """The ID of the session whose data is being processed by the tracked pipeline."""
439
+ """Stores the ID of the session whose data is being processed by the tracked pipeline."""
436
440
  animal: str
437
- """The ID of the animal whose data is being processed by the tracked pipeline."""
441
+ """Stores the ID of the animal whose data is being processed by the tracked pipeline."""
438
442
  project: str
439
- """The name of the project whose data is being processed by the tracked pipeline."""
443
+ """Stores the name of the project whose data is being processed by the tracked pipeline."""
440
444
  keep_job_logs: bool = False
441
445
  """Determines whether to keep the logs for the jobs making up the pipeline execution graph or (default) to remove
442
446
  them after pipeline successfully ends its runtime. If the pipeline fails to complete its runtime, the logs are kept
@@ -551,7 +555,7 @@ class ProcessingPipeline:
551
555
 
552
556
 
553
557
  def generate_manager_id() -> int:
554
- """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
558
+ """Generates and returns a unique integer value that can be used to identify the manager process that calls
555
559
  this function.
556
560
 
557
561
  The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1