sl-shared-assets 5.0.1__py3-none-any.whl → 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
- """This module provides tools and classes for running complex data processing pipelines on remote compute servers.
2
- A Pipeline represents a higher unit of abstraction relative to the Job class, often leveraging multiple sequential or
3
- parallel processing jobs to conduct the required processing."""
1
+ """This module provides tools used to run complex data processing pipelines on remote compute servers. A processing
2
+ pipeline represents a higher unit of abstraction relative to the Job class, often leveraging multiple sequential or
3
+ parallel jobs to process the data."""
4
4
 
5
5
  import copy
6
6
  from enum import IntEnum, StrEnum
@@ -20,11 +20,8 @@ from .server import Server
20
20
 
21
21
 
22
22
  class TrackerFileNames(StrEnum):
23
- """Defines a set of processing tacker .yaml files used by the Sun lab data preprocessing, processing, and dataset
24
- formation pipelines to track the progress of the remotely executed pipelines.
25
-
26
- This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
27
- via the get_processing_tracker() function to generate ProcessingTracker instances.
23
+ """Stores the names of the processing tacker .yaml files used by the Sun lab data preprocessing, processing, and
24
+ dataset formation pipelines to track the pipeline's progress.
28
25
 
29
26
  Notes:
30
27
  The elements in this enumeration match the elements in the ProcessingPipelines enumeration, since each valid
@@ -52,18 +49,14 @@ class TrackerFileNames(StrEnum):
52
49
 
53
50
 
54
51
  class ProcessingPipelines(StrEnum):
55
- """Defines the set of processing pipelines currently supported in the Sun lab.
56
-
57
- All processing pipelines currently supported by the lab codebase are defined in this enumeration. Primarily,
58
- the elements from this enumeration are used in terminal messages and data logging entries to identify the pipelines
59
- to the user.
52
+ """Stores the names of the data processing pipelines currently used in the lab.
60
53
 
61
54
  Notes:
62
- The elements in this enumeration match the elements in the ProcessingTracker enumeration, since each valid
55
+ The elements in this enumeration match the elements in the TrackerFileNames enumeration, since each valid
63
56
  ProcessingPipeline instance has an associated ProcessingTracker file instance.
64
57
 
65
58
  The order of pipelines in this enumeration loosely follows the sequence in which they are executed during the
66
- lifetime of the Sun lab data on the remote compute server.
59
+ Sun lab data workflow.
67
60
  """
68
61
 
69
62
  MANIFEST = "manifest generation"
@@ -72,8 +65,8 @@ class ProcessingPipelines(StrEnum):
72
65
  pipeline automatically conduct the manifest (re)generation at the end of their runtime."""
73
66
  CHECKSUM = "checksum resolution"
74
67
  """Checksum resolution pipeline. Primarily, it is used to verify that the raw data has been transferred to the
75
- remote storage server from the main acquisition system PC intact. This pipeline is sometimes also used to
76
- regenerate (re-checksum) the data stored on the remote compute server."""
68
+ remote storage server from the main acquisition system PC intact. This pipeline is also used to regenerate
69
+ (re-checksum) the data stored on the remote compute server."""
77
70
  PREPARATION = "processing preparation"
78
71
  """Data processing preparation pipeline. Since the compute server uses a two-volume design with a slow (HDD) storage
79
72
  volume and a fast (NVME) working volume, to optimize data processing performance, the data needs to be transferred
@@ -81,8 +74,7 @@ class ProcessingPipelines(StrEnum):
81
74
  volume to the working volume."""
82
75
  BEHAVIOR = "behavior processing"
83
76
  """Behavior processing pipeline. This pipeline is used to process .npz log files to extract animal behavior data
84
- acquired during a single session (day). The processed logs also contain the timestamps use to synchronize behavior
85
- to video and mesoscope frame data, and experiment configuration and task information."""
77
+ acquired during a single session (day)."""
86
78
  SUITE2P = "single-day suite2p processing"
87
79
  """Single-day suite2p pipeline. This pipeline is used to extract the cell activity data from 2-photon imaging data
88
80
  acquired during a single session (day)."""
@@ -91,24 +83,22 @@ class ProcessingPipelines(StrEnum):
91
83
  behavior video frames acquired during a single session (day)."""
92
84
  MULTIDAY = "multi-day suite2p processing"
93
85
  """Multi-day suite2p processing (cell tracking) pipeline. This pipeline is used to track cells processed with the
94
- single-day suite2p pipelines across multiple days. It is executed for all sessions marked for integration into the
95
- same dataset as the first step of dataset creation."""
86
+ single-day suite2p pipelines across multiple days."""
96
87
  FORGING = "dataset forging"
97
88
  """Dataset creation (forging) pipeline. This pipeline typically runs after the multi-day pipeline. It extracts and
98
- integrates the processed data from various sources such as brain activity, behavior, videos, etc., into a unified
99
- dataset."""
89
+ integrates the processed data from all sources into a unified dataset."""
100
90
  ARCHIVING = "data archiving"
101
- """Data archiving pipeline. To conserve the (limited) space on the fast working volume, once the data has been
102
- processed and integrated into a stable dataset, the processed data folder is moved to the storage volume and all
103
- folders under the root session folder on the processed data volume are deleted."""
91
+ """Data archiving pipeline. To conserve the (limited) space on the remote compute server's fast working volume,
92
+ once the data has been processed and integrated into a stable dataset, the processed data folder is moved to the
93
+ storage volume. After the data is moved, all folders under the root session folder on the processed data volume are
94
+ deleted to free up the processing volume space."""
104
95
 
105
96
 
106
97
  class ProcessingStatus(IntEnum):
107
98
  """Maps integer-based processing pipeline status (state) codes to human-readable names.
108
99
 
109
- This enumeration is used to track and communicate the progress of Sun lab processing pipelines as they are executed
110
- by the remote compute server. Specifically, the codes from this enumeration are used by the ProcessingPipeline
111
- class to communicate the status of the managed pipelines to external processes.
100
+ The codes from this enumeration are used by the ProcessingPipeline class to communicate the status of the managed
101
+ pipelines to manager processes that oversee the execution of each pipeline.
112
102
 
113
103
  Notes:
114
104
  The status codes from this enumeration track the state of the pipeline as a whole, instead of tracking the
@@ -129,41 +119,40 @@ class ProcessingStatus(IntEnum):
129
119
 
130
120
  @dataclass()
131
121
  class ProcessingTracker(YamlConfig):
132
- """Wraps the .yaml file that tracks the state of a data processing pipeline and provides tools for communicating the
133
- state between multiple processes in a thread-safe manner.
122
+ """Wraps the .yaml file that tracks the state of a data processing pipeline and provides tools for communicating
123
+ this state between multiple processes in a thread-safe manner.
134
124
 
135
125
  This class is used by all data processing pipelines running on the remote compute server(s) to prevent race
136
- conditions and ensure that pipelines have exclusive access to the processed data. It is also used to evaluate the
137
- status (success / failure) of each pipeline as they are executed by the remote server.
126
+ conditions. It is also used to evaluate the status (success / failure) of each pipeline as they are executed by the
127
+ remote server.
138
128
 
139
129
  Note:
140
- In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
141
- when a pipeline starts running on the remote server, its tracker is switched into the 'running' (locked) state
142
- until the pipeline completes, aborts, or encounters an error. When the tracker is locked, all modifications to
143
- the tracker or processed data have to originate from the same process that started the pipeline that locked the
144
- tracker file. This feature supports running complex processing pipelines that use multiple concurrent and / or
145
- sequential processing jobs on the remote server.
146
-
147
- This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
130
+ This instance frequently refers to the 'manager process' in method documentation. A 'manager process' is the
148
131
  highest-level process that manages the tracked pipeline. When a pipeline runs on remote compute servers, the
149
132
  manager process is typically the process running on the non-server machine (user PC) that submits the remote
150
- processing jobs to the compute server (via SSH or similar protocol). The worker process(es) that run the
151
- processing job(s) on the remote compute servers are NOT considered manager processes.
133
+ processing jobs to the compute server. The worker process(es) that run the processing job(s) on the remote
134
+ compute servers are not considered manager processes.
135
+
136
+ The processing trackers work similar to 'lock' files. When a pipeline starts running on the remote server, its
137
+ tracker is switched into the 'running' (locked) state until the pipeline completes, aborts, or encounters an
138
+ error. When the tracker is locked, all modifications to the tracker have to originate from the same manager
139
+ process that started the pipeline. This feature supports running complex processing pipelines that use multiple
140
+ concurrent and / or sequential processing jobs on the remote server.
152
141
  """
153
142
 
154
143
  file_path: Path
155
144
  """Stores the path to the .yaml file used to cache the tracker data on disk. The class instance functions as a
156
145
  wrapper around the data stored inside the specified .yaml file."""
157
146
  _complete: bool = False
158
- """Tracks whether the processing runtime managed by this tracker has finished successfully."""
147
+ """Tracks whether the processing pipeline managed by this tracker has finished successfully."""
159
148
  _encountered_error: bool = False
160
- """Tracks whether the processing runtime managed by this tracker has encountered an error and has finished
149
+ """Tracks whether the processing pipeline managed by this tracker has encountered an error and has finished
161
150
  unsuccessfully."""
162
151
  _running: bool = False
163
- """Tracks whether the processing runtime managed by this tracker is currently running."""
152
+ """Tracks whether the processing pipeline managed by this tracker is currently running."""
164
153
  _manager_id: int = -1
165
154
  """Stores the xxHash3-64 hash value that represents the unique identifier of the manager process that started the
166
- runtime. The manager process is typically running on a remote control machine (computer) and is used to
155
+ pipeline. The manager process is typically running on a remote control machine (computer) and is used to
167
156
  support processing runtimes that are distributed over multiple separate batch jobs on the compute server. This
168
157
  ID should be generated using the 'generate_manager_id()' function exposed by this library."""
169
158
  _lock_path: str = field(init=False)
@@ -218,20 +207,17 @@ class ProcessingTracker(YamlConfig):
218
207
 
219
208
  def start(self, manager_id: int, job_count: int = 1) -> None:
220
209
  """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
221
- runtime.
210
+ pipeline.
222
211
 
223
- Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
224
- accessible from the manager process that calls this method. Calling this method for an already running runtime
225
- managed by the same process does not have any effect, so it is safe to call this method at the beginning of
226
- each processing job that makes up the runtime.
212
+ Calling this method locks the tracked session and processing pipeline combination to only be accessible from the
213
+ manager process that calls this method. Calling this method for an already running pipeline managed by the same
214
+ process does not have any effect, so it is safe to call this method at the beginning of each processing job that
215
+ makes up the pipeline.
227
216
 
228
217
  Args:
229
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
230
- tracked by this tracker file.
231
- job_count: The total number of jobs to be executed as part of the tracked pipeline. This is used to make
232
- the stop() method properly track the end of the pipeline as a whole, rather than the end of intermediate
233
- jobs. Primarily, this is used by multi-job pipelines where all jobs are submitted as part of a single
234
- phase and the job completion order cannot be known in-advance.
218
+ manager_id: The unique identifier of the manager process which attempts to start the pipeline tracked by
219
+ this tracker file.
220
+ job_count: The total number of jobs to be executed as part of the tracked pipeline.
235
221
 
236
222
  Raises:
237
223
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
@@ -242,23 +228,23 @@ class ProcessingTracker(YamlConfig):
242
228
  # Loads tracker state from the .yaml file
243
229
  self._load_state()
244
230
 
245
- # If the runtime is already running from a different process, aborts with an error.
231
+ # If the pipeline is already running from a different process, aborts with an error.
246
232
  if self._running and manager_id != self._manager_id:
247
233
  message = (
248
- f"Unable to start the processing runtime from the manager process with id {manager_id}. The "
234
+ f"Unable to start the processing pipeline from the manager process with id {manager_id}. The "
249
235
  f"{self.file_path.name} tracker file indicates that the manager process with id {self._manager_id} "
250
- f"is currently executing the tracked runtime. Only a single manager process is allowed to execute "
251
- f"the runtime at the same time."
236
+ f"is currently executing the tracked pipeline. Only a single manager process is allowed to execute "
237
+ f"the pipeline at the same time."
252
238
  )
253
239
  console.error(message=message, error=RuntimeError)
254
240
  raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
255
241
 
256
- # Otherwise, if the runtime is already running for the current manager process, returns without modifying
242
+ # Otherwise, if the pipeline is already running for the current manager process, returns without modifying
257
243
  # the tracker data.
258
244
  elif self._running and manager_id == self._manager_id:
259
245
  return
260
246
 
261
- # Otherwise, locks the runtime for the current manager process and updates the cached tracker data
247
+ # Otherwise, locks the pipeline for the current manager process and updates the cached tracker data
262
248
  self._running = True
263
249
  self._manager_id = manager_id
264
250
  self._complete = False
@@ -267,16 +253,16 @@ class ProcessingTracker(YamlConfig):
267
253
  self._save_state()
268
254
 
269
255
  def error(self, manager_id: int) -> None:
270
- """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
256
+ """Configures the tracker file to indicate that the tracked processing pipeline encountered an error and failed
271
257
  to complete.
272
258
 
273
- This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
274
- interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
275
- interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
259
+ This method unlocks the pipeline, allowing other manager processes to interface with the tracked pipeline. It
260
+ also updates the tracker file to reflect that the pipeline was interrupted due to an error, which is used by the
261
+ manager processes to detect and handle processing failures.
276
262
 
277
263
  Args:
278
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
279
- runtime tracked by this tracker file has encountered an error.
264
+ manager_id: The unique identifier of the manager process which attempts to report that the pipeline tracked
265
+ by this tracker file has encountered an error.
280
266
 
281
267
  Raises:
282
268
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
@@ -286,22 +272,22 @@ class ProcessingTracker(YamlConfig):
286
272
  # Loads tracker state from the .yaml file
287
273
  self._load_state()
288
274
 
289
- # If the runtime is not running, returns without doing anything
275
+ # If the pipeline is not running, returns without doing anything
290
276
  if not self._running:
291
277
  return
292
278
 
293
- # Ensures that only the active manager process can report runtime errors using the tracker file
279
+ # Ensures that only the active manager process can report pipeline errors using the tracker file
294
280
  if manager_id != self._manager_id:
295
281
  message = (
296
- f"Unable to report that the processing runtime has encountered an error from the manager process "
297
- f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
282
+ f"Unable to report that the processing pipeline has encountered an error from the manager process "
283
+ f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the pipeline is "
298
284
  f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
299
- f"with the runtime."
285
+ f"with the pipeline."
300
286
  )
301
287
  console.error(message=message, error=RuntimeError)
302
288
  raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
303
289
 
304
- # Indicates that the runtime aborted with an error
290
+ # Indicates that the pipeline aborted with an error
305
291
  self._running = False
306
292
  self._manager_id = -1
307
293
  self._complete = False
@@ -309,15 +295,19 @@ class ProcessingTracker(YamlConfig):
309
295
  self._save_state()
310
296
 
311
297
  def stop(self, manager_id: int) -> None:
312
- """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
298
+ """Configures the tracker file to indicate that the tracked processing pipeline has been completed successfully.
313
299
 
314
- This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
315
- also configures the tracker file to indicate that the runtime has been completed successfully, which is used
300
+ This method unlocks the pipeline, allowing other manager processes to interface with the tracked pipeline. It
301
+ also configures the tracker file to indicate that the pipeline has been completed successfully, which is used
316
302
  by the manager processes to detect and handle processing completion.
317
303
 
304
+ Notes:
305
+ This method tracks how many jobs executed as part of the tracked pipeline have been completed and only
306
+ marks the pipeline as complete if all it's processing jobs have been completed.
307
+
318
308
  Args:
319
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
320
- runtime tracked by this tracker file has been completed successfully.
309
+ manager_id: The unique identifier of the manager process which attempts to report that the pipeline tracked
310
+ by this tracker file has been completed successfully.
321
311
 
322
312
  Raises:
323
313
  TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
@@ -327,17 +317,17 @@ class ProcessingTracker(YamlConfig):
327
317
  # Loads tracker state from the .yaml file
328
318
  self._load_state()
329
319
 
330
- # If the runtime is not running, does not do anything
320
+ # If the pipeline is not running, does not do anything
331
321
  if not self._running:
332
322
  return
333
323
 
334
- # Ensures that only the active manager process can report runtime completion using the tracker file
324
+ # Ensures that only the active manager process can report pipeline completion using the tracker file
335
325
  if manager_id != self._manager_id:
336
326
  message = (
337
- f"Unable to report that the processing runtime has completed successfully from the manager process "
338
- f"with id {manager_id}. The {self.file_path.name} tracker file indicates that the runtime is "
339
- f"managed by the process with id {self._manager_id}, preventing other processes from interfacing "
340
- f"with the runtime."
327
+ f"Unable to report that the processing pipeline has completed successfully from the manager "
328
+ f"process with id {manager_id}. The {self.file_path.name} tracker file indicates that the pipeline "
329
+ f"is managed by the process with id {self._manager_id}, preventing other processes from "
330
+ f"interfacing with the pipeline."
341
331
  )
342
332
  console.error(message=message, error=RuntimeError)
343
333
  raise RuntimeError(message) # Fallback to appease mypy, should not be reachable
@@ -345,7 +335,7 @@ class ProcessingTracker(YamlConfig):
345
335
  # Increments completed job tracker
346
336
  self._completed_jobs += 1
347
337
 
348
- # If the pipeline has completed all required jobs, marks the runtime as complete (stopped)
338
+ # If the pipeline has completed all required jobs, marks the pipeline as complete (stopped)
349
339
  if self._completed_jobs >= self._job_count:
350
340
  self._running = False
351
341
  self._manager_id = -1
@@ -354,30 +344,32 @@ class ProcessingTracker(YamlConfig):
354
344
  self._save_state()
355
345
 
356
346
  def abort(self) -> None:
357
- """Resets the runtime tracker file to the default state.
347
+ """Resets the pipeline tracker file to the default state.
358
348
 
359
- This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
360
- instance methods, this method can be called from any manager process, even if the runtime is already locked by
361
- another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
362
- runtime.
349
+ This method can be used to reset the pipeline tracker file, regardless of the current pipeline state. Unlike
350
+ other instance methods, this method can be called from any manager process, even if the pipeline is already
351
+ locked by another process. This method is only intended to be used in the case of emergency to unlock a
352
+ deadlocked pipeline.
363
353
  """
364
354
  lock = FileLock(self._lock_path)
365
355
  with lock.acquire(timeout=10.0):
366
356
  # Loads tracker state from the .yaml file.
367
357
  self._load_state()
368
358
 
369
- # Resets the tracker file to the default state. Note, does not indicate that the runtime completed nor
359
+ # Resets the tracker file to the default state. Note, does not indicate that the pipeline completed nor
370
360
  # that it has encountered an error.
371
361
  self._running = False
372
362
  self._manager_id = -1
363
+ self._completed_jobs = 0
364
+ self._job_count = 1
373
365
  self._complete = False
374
366
  self._encountered_error = False
375
367
  self._save_state()
376
368
 
377
369
  @property
378
370
  def is_complete(self) -> bool:
379
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
380
- successfully and that the runtime is not currently ongoing."""
371
+ """Returns True if the tracker wrapped by the instance indicates that the processing pipeline has been completed
372
+ successfully and that the pipeline is not currently ongoing."""
381
373
  lock = FileLock(self._lock_path)
382
374
  with lock.acquire(timeout=10.0):
383
375
  # Loads tracker state from the .yaml file
@@ -386,7 +378,7 @@ class ProcessingTracker(YamlConfig):
386
378
 
387
379
  @property
388
380
  def encountered_error(self) -> bool:
389
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
381
+ """Returns True if the tracker wrapped by the instance indicates that the processing pipeline has aborted due
390
382
  to encountering an error."""
391
383
  lock = FileLock(self._lock_path)
392
384
  with lock.acquire(timeout=10.0):
@@ -396,7 +388,7 @@ class ProcessingTracker(YamlConfig):
396
388
 
397
389
  @property
398
390
  def is_running(self) -> bool:
399
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
391
+ """Returns True if the tracker wrapped by the instance indicates that the processing pipeline is currently
400
392
  ongoing."""
401
393
  lock = FileLock(self._lock_path)
402
394
  with lock.acquire(timeout=10.0):
@@ -407,19 +399,19 @@ class ProcessingTracker(YamlConfig):
407
399
 
408
400
  @dataclass()
409
401
  class ProcessingPipeline:
410
- """Encapsulates access to a processing pipeline running on the remote compute server.
402
+ """Provides an interface to construct and execute data processing pipelines on the target remote compute server.
411
403
 
412
404
  This class functions as an interface for all data processing pipelines running on Sun lab compute servers. It is
413
- pipeline-type-agnostic and works for all data processing pipelines supported by this library. After instantiation,
414
- the class automatically handles all interactions with the server necessary to run the remote processing pipeline and
405
+ pipeline-type-agnostic and works for all data processing pipelines used in the lab. After instantiation, the class
406
+ automatically handles all interactions with the server necessary to run the remote processing pipeline and
415
407
  verify the runtime outcome via the runtime_cycle() method that has to be called cyclically until the pipeline is
416
408
  complete.
417
409
 
418
410
  Notes:
419
- Each pipeline may be executed in one or more stages, each stage using one or more parallel jobs. As such, each
420
- pipeline can be seen as an execution graph that sequentially submits batches of jobs to the remote server. The
421
- processing graph for each pipeline is fully resolved at the instantiation of this class instance, so each
422
- instance contains the necessary data to run the entire processing pipeline.
411
+ Each pipeline is executed as a series of one or more stages with each stage using one or more parallel jobs.
412
+ Therefore, each pipeline can be seen as an execution graph that sequentially submits batches of jobs to the
413
+ remote server. The processing graph for each pipeline is fully resolved at the instantiation of this class, so
414
+ each instance contains the necessary data to run the entire processing pipeline.
423
415
 
424
416
  The minimum self-contained unit of the processing pipeline is a single job. Since jobs can depend on the output
425
417
  of other jobs, they are organized into stages based on the dependency graph between jobs. Combined with cluster
@@ -431,27 +423,24 @@ class ProcessingPipeline:
431
423
  """Stores the name of the processing pipeline managed by this instance. Primarily, this is used to identify the
432
424
  pipeline to the user in terminal messages and logs."""
433
425
  server: Server
434
- """Stores the reference to the Server object that maintains bidirectional communication with the remote server
435
- running the pipeline."""
426
+ """Store the reference to the Server object used to interface with the remote server running the pipeline."""
436
427
  manager_id: int
437
- """The unique identifier for the manager process that constructs and manages the runtime of the tracked pipeline.
438
- This is used to ensure that only a single pipeline instance can work with each session's data at the same time on
439
- the remote server."""
428
+ """The unique identifier for the manager process that constructs and manages the runtime of the tracked pipeline."""
440
429
  jobs: dict[int, tuple[tuple[Job, Path], ...]]
441
430
  """Stores the dictionary that maps the pipeline processing stage integer-codes to two-element tuples. Each tuple
442
- stores the Job objects and the paths to their remote working directories to be submitted to the server at each
443
- stage."""
431
+ stores the Job object and the path to its remote working directory to be submitted to the server as part of that
432
+ executing that stage."""
444
433
  remote_tracker_path: Path
445
- """The path to the pipeline's processing tracker .yaml file stored on the remote compute server."""
434
+ """Stores the path to the pipeline's processing tracker .yaml file stored on the remote compute server."""
446
435
  local_tracker_path: Path
447
- """The path to the pipeline's processing tracker .yaml file on the local machine. The remote file is pulled to
448
- this location when the instance verifies the outcome of each tracked pipeline's processing stage."""
436
+ """Stores the path to the pipeline's processing tracker .yaml file on the local machine. The remote file is
437
+ pulled to this location when the instance verifies the outcome of the tracked processing pipeline."""
449
438
  session: str
450
- """The ID of the session whose data is being processed by the tracked pipeline."""
439
+ """Stores the ID of the session whose data is being processed by the tracked pipeline."""
451
440
  animal: str
452
- """The ID of the animal whose data is being processed by the tracked pipeline."""
441
+ """Stores the ID of the animal whose data is being processed by the tracked pipeline."""
453
442
  project: str
454
- """The name of the project whose data is being processed by the tracked pipeline."""
443
+ """Stores the name of the project whose data is being processed by the tracked pipeline."""
455
444
  keep_job_logs: bool = False
456
445
  """Determines whether to keep the logs for the jobs making up the pipeline execution graph or (default) to remove
457
446
  them after pipeline successfully ends its runtime. If the pipeline fails to complete its runtime, the logs are kept
@@ -566,7 +555,7 @@ class ProcessingPipeline:
566
555
 
567
556
 
568
557
  def generate_manager_id() -> int:
569
- """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
558
+ """Generates and returns a unique integer value that can be used to identify the manager process that calls
570
559
  this function.
571
560
 
572
561
  The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1