sl-shared-assets 2.0.0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -1,34 +1,42 @@
1
1
  from pathlib import Path
2
- from dataclasses import dataclass
2
+ from dataclasses import field, dataclass
3
3
 
4
+ from _typeshed import Incomplete
4
5
  from simple_slurm import Slurm as Slurm
5
6
  from paramiko.client import SSHClient as SSHClient
6
7
  from ataraxis_data_structures import YamlConfig
7
8
 
8
- from .job import Job as Job
9
+ from .job import (
10
+ Job as Job,
11
+ JupyterJob as JupyterJob,
12
+ )
9
13
 
10
14
  def generate_server_credentials(
11
15
  output_directory: Path,
12
16
  username: str,
13
17
  password: str,
14
18
  host: str = "cbsuwsun.biohpc.cornell.edu",
15
- raw_data_root: str = "/workdir/sun_data",
16
- processed_data_root: str = "/storage/sun_data",
19
+ storage_root: str = "/local/workdir",
20
+ working_root: str = "/local/storage",
21
+ shared_directory_name: str = "sun_data",
17
22
  ) -> None:
18
23
  """Generates a new server_credentials.yaml file under the specified directory, using input information.
19
24
 
20
25
  This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
21
- only used when setting up new host-computers in the lab.
26
+ only used when setting up new host-computers or users in the lab.
22
27
 
23
28
  Args:
24
29
  output_directory: The directory where to save the generated server_credentials.yaml file.
25
30
  username: The username to use for server authentication.
26
31
  password: The password to use for server authentication.
27
32
  host: The hostname or IP address of the server to connect to.
28
- raw_data_root: The path to the root directory used to store the raw data from all Sun lab projects on the
29
- server.
30
- processed_data_root: The path to the root directory used to store the processed data from all Sun lab projects
31
- on the server.
33
+ storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
34
+ top-level (root) directory of the HDD RAID volume.
35
+ working_root: The path to the root working (fast) server directory. Typically, this is the path to the
36
+ top-level (root) directory of the NVME RAID volume. If the server uses the same volume for both storage and
37
+ working directories, enter the same path under both 'storage_root' and 'working_root'.
38
+ shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
39
+ and working server volumes.
32
40
  """
33
41
  @dataclass()
34
42
  class ServerCredentials(YamlConfig):
@@ -43,8 +51,15 @@ class ServerCredentials(YamlConfig):
43
51
  username: str = ...
44
52
  password: str = ...
45
53
  host: str = ...
46
- raw_data_root: str = ...
47
- processed_data_root: str = ...
54
+ storage_root: str = ...
55
+ working_root: str = ...
56
+ shared_directory_name: str = ...
57
+ raw_data_root: str = field(init=False, default_factory=Incomplete)
58
+ processed_data_root: str = field(init=False, default_factory=Incomplete)
59
+ user_data_root: str = field(init=False, default_factory=Incomplete)
60
+ user_working_root: str = field(init=False, default_factory=Incomplete)
61
+ def __post_init__(self) -> None:
62
+ """Statically resolves the paths to end-point directories using provided root directories."""
48
63
 
49
64
  class Server:
50
65
  """Encapsulates access to the Sun lab BioHPC processing server.
@@ -75,7 +90,79 @@ class Server:
75
90
  def __init__(self, credentials_path: Path) -> None: ...
76
91
  def __del__(self) -> None:
77
92
  """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
78
- def submit_job(self, job: Job) -> Job:
93
+ def create_job(
94
+ self, job_name: str, conda_environment: str, cpus_to_use: int = 10, ram_gb: int = 10, time_limit: int = 60
95
+ ) -> Job:
96
+ """Creates and returns a new Job instance.
97
+
98
+ Use this method to generate Job objects for all headless jobs that need to be run on the remote server. The
99
+ generated Job is a precursor that requires further configuration by the user before it can be submitted to the
100
+ server for execution.
101
+
102
+ Args:
103
+ job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
104
+ printouts to identify the job to human operators.
105
+ conda_environment: The name of the conda environment to activate on the server before running the job logic.
106
+ The environment should contain the necessary Python packages and CLIs to support running the job's
107
+ logic.
108
+ cpus_to_use: The number of CPUs to use for the job.
109
+ ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
110
+ time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this
111
+ time period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime
112
+ limits to prevent jobs from hogging the server in case of runtime or algorithm errors.
113
+
114
+ Returns:
115
+ The initialized Job instance pre-filled with SLURM configuration data and conda activation commands. Modify
116
+ the returned instance with any additional commands as necessary for the job to fulfill its intended
117
+ purpose. Note, the Job requires submission via submit_job() to be executed by the server.
118
+ """
119
+ def launch_jupyter_server(
120
+ self,
121
+ job_name: str,
122
+ conda_environment: str,
123
+ notebook_directory: Path,
124
+ cpus_to_use: int = 2,
125
+ ram_gb: int = 32,
126
+ time_limit: int = 240,
127
+ port: int = 0,
128
+ jupyter_args: str = "",
129
+ ) -> JupyterJob:
130
+ """Launches a Jupyter notebook server on the target remote Sun lab server.
131
+
132
+ Use this method to run interactive Jupyter sessions on the remote server under SLURM control. Unlike the
133
+ create_job(), this method automatically submits the job for execution as part of its runtime. Therefore, the
134
+ returned JupyterJob instance should only be used to query information about how to connect to the remote
135
+ Jupyter server.
136
+
137
+ Args:
138
+ job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in
139
+ terminal printouts to identify the job to human operators.
140
+ conda_environment: The name of the conda environment to activate on the server before running the job logic.
141
+ The environment should contain the necessary Python packages and CLIs to support running the job's
142
+ logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
143
+ port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
144
+ 8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
145
+ user sessions.
146
+ notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
147
+ access to items stored in or under this directory. For most runtimes, this should be set to the user's
148
+ root data or working directory.
149
+ cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to
150
+ avoid interfering with headless data processing jobs.
151
+ ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
152
+ avoid interfering with headless data processing jobs.
153
+ time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
154
+ session.
155
+ jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
156
+
157
+ Returns:
158
+ The initialized JupyterJob instance that stores information on how to connect to the created Jupyter server.
159
+ Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
160
+
161
+ Raises:
162
+ TimeoutError: If the target Jupyter server doesn't start within 120 minutes from this method being called.
163
+ RuntimeError: If job submission fails for any reason.
164
+ """
165
+ def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
79
166
  """Submits the input job to the managed BioHPC server via SLURM job manager.
80
167
 
81
168
  This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
@@ -92,7 +179,7 @@ class Server:
92
179
  Raises:
93
180
  RuntimeError: If job submission to the server fails.
94
181
  """
95
- def job_complete(self, job: Job) -> bool:
182
+ def job_complete(self, job: Job | JupyterJob) -> bool:
96
183
  """Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
97
184
  to an error.
98
185
 
@@ -105,6 +192,16 @@ class Server:
105
192
  ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
106
193
  submitted to the server.
107
194
  """
195
+ def abort_job(self, job: Job | JupyterJob) -> None:
196
+ """Aborts the target job if it is currently running on the server.
197
+
198
+ Use this method to immediately abort running or queued jobs, without waiting for the timeout guard. If the job
199
+ is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
200
+ do nothing.
201
+
202
+ Args:
203
+ job: The Job object that needs to be aborted.
204
+ """
108
205
  def pull_file(self, local_file_path: Path, remote_file_path: Path) -> None:
109
206
  """Moves the specified file from the remote server to the local machine.
110
207
 
@@ -126,18 +223,50 @@ class Server:
126
223
  remote_path: The path to the file or directory on the remote server to be removed.
127
224
  is_dir: Determines whether the input path represents a directory or a file.
128
225
  """
226
+ def create_directory(self, remote_path: Path, parents: bool = True) -> None:
227
+ """Creates the specified directory tree on the managed remote server via SFTP.
228
+
229
+ This method creates directories on the remote server, with options to create parent directories and handle
230
+ existing directories gracefully.
231
+
232
+ Args:
233
+ remote_path: The absolute path to the directory to create on the remote server, relative to the server
234
+ root.
235
+ parents: Determines whether to create parent directories, if they are missing. Otherwise, if parents do not
236
+ exist, raises a FileNotFoundError.
237
+
238
+ Notes:
239
+ This method silently assumes that it is fine if the directory already exists and treats it as a successful
240
+ runtime end-point.
241
+ """
242
+ def exists(self, remote_path: Path) -> bool:
243
+ """Returns True if the target file or directory exists on the remote server."""
129
244
  def close(self) -> None:
130
245
  """Closes the SSH connection to the server.
131
246
 
132
247
  This method has to be called before destroying the class instance to ensure proper resource cleanup.
133
248
  """
134
249
  @property
135
- def raw_data_root(self) -> str:
250
+ def raw_data_root(self) -> Path:
136
251
  """Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
137
252
  accessible through this class.
138
253
  """
139
254
  @property
140
- def processed_data_root(self) -> str:
255
+ def processed_data_root(self) -> Path:
141
256
  """Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
142
257
  server accessible through this class.
143
258
  """
259
+ @property
260
+ def user_data_root(self) -> Path:
261
+ """Returns the absolute path to the directory used to store user-specific data on the server accessible through
262
+ this class."""
263
+ @property
264
+ def user_working_root(self) -> Path:
265
+ """Returns the absolute path to the user-specific working (fast) directory on the server accessible through
266
+ this class."""
267
+ @property
268
+ def host(self) -> str:
269
+ """Returns the hostname or IP address of the server accessible through this class."""
270
+ @property
271
+ def user(self) -> str:
272
+ """Returns the username used to authenticate with the server."""
@@ -4,7 +4,7 @@ integrity of the data. The tools from this package are used by most other data p
4
4
  from .transfer_tools import transfer_directory
5
5
  from .ascension_tools import ascend_tyche_data
6
6
  from .packaging_tools import calculate_directory_checksum
7
- from .project_management_tools import verify_session_checksum, generate_project_manifest
7
+ from .project_management_tools import resolve_p53_marker, verify_session_checksum, generate_project_manifest
8
8
 
9
9
  __all__ = [
10
10
  "transfer_directory",
@@ -12,4 +12,5 @@ __all__ = [
12
12
  "ascend_tyche_data",
13
13
  "verify_session_checksum",
14
14
  "generate_project_manifest",
15
+ "resolve_p53_marker",
15
16
  ]
@@ -2,6 +2,7 @@ from .transfer_tools import transfer_directory as transfer_directory
2
2
  from .ascension_tools import ascend_tyche_data as ascend_tyche_data
3
3
  from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
4
4
  from .project_management_tools import (
5
+ resolve_p53_marker as resolve_p53_marker,
5
6
  verify_session_checksum as verify_session_checksum,
6
7
  generate_project_manifest as generate_project_manifest,
7
8
  )
@@ -12,4 +13,5 @@ __all__ = [
12
13
  "ascend_tyche_data",
13
14
  "verify_session_checksum",
14
15
  "generate_project_manifest",
16
+ "resolve_p53_marker",
15
17
  ]
@@ -17,13 +17,12 @@ _excluded_files = {
17
17
  "ax_checksum.txt",
18
18
  "ubiquitin.bin",
19
19
  "telomere.bin",
20
+ "p53.bin",
20
21
  "suite2p_processing_tracker.yaml",
21
22
  "dataset_formation_tracker.yaml",
22
- "behavior_processing_tracker.yaml",
23
23
  "video_processing_tracker.yaml",
24
24
  "integrity_verification_tracker.yaml",
25
25
  "suite2p_processing_tracker.yaml.lock",
26
- "dataset_formation_tracker.yaml.lock",
27
26
  "behavior_processing_tracker.yaml.lock",
28
27
  "video_processing_tracker.yaml.lock",
29
28
  "integrity_verification_tracker.yaml.lock",
@@ -76,11 +76,11 @@ class ProjectManifest:
76
76
  "session",
77
77
  "type",
78
78
  "complete",
79
- "integrity_verification",
80
- "suite2p_processing",
81
- "behavior_processing",
82
- "video_processing",
83
- "dataset_formation",
79
+ "integrity",
80
+ "suite2p",
81
+ "behavior",
82
+ "video",
83
+ "dataset",
84
84
  ]
85
85
 
86
86
  # Retrieves the data
@@ -93,7 +93,7 @@ class ProjectManifest:
93
93
  animal = str(animal)
94
94
  else:
95
95
  animal = int(animal)
96
- df = df.filter(pl.col("animal") == animal)
96
+ df = df.filter(pl.col("animal") == animal)
97
97
 
98
98
  # Ensures the data displays properly
99
99
  with pl.Config(
@@ -157,7 +157,13 @@ class ProjectManifest:
157
157
  """
158
158
  return tuple(self._data.select("session").sort("session").to_series().to_list())
159
159
 
160
- def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
160
+ def get_sessions_for_animal(
161
+ self,
162
+ animal: str | int,
163
+ exclude_incomplete: bool = True,
164
+ dataset_ready_only: bool = False,
165
+ not_dataset_ready_only: bool = False,
166
+ ) -> tuple[str, ...]:
161
167
  """Returns all session IDs for the target animal.
162
168
 
163
169
  This provides a tuple of all sessions performed by the target animal as part of the target project.
@@ -166,6 +172,11 @@ class ProjectManifest:
166
172
  animal: The ID of the animal for which to get the session data.
167
173
  exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
168
174
  list.
175
+ dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
176
+ the output list. Enabling this option only shows sessions that can be integrated into a dataset.
177
+ not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
178
+ as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
179
+ enabled, the 'dataset_ready_only' option takes precedence.
169
180
 
170
181
  Raises:
171
182
  ValueError: If the specified animal is not found in the manifest file.
@@ -188,6 +199,12 @@ class ProjectManifest:
188
199
  if exclude_incomplete:
189
200
  data = data.filter(pl.col("complete") == 1)
190
201
 
202
+ # Optionally filters sessions based on their readiness for dataset integration.
203
+ if dataset_ready_only: # Dataset-ready option always takes precedence
204
+ data = data.filter(pl.col("dataset") == 1)
205
+ elif not_dataset_ready_only:
206
+ data = data.filter(pl.col("dataset") == 0)
207
+
191
208
  # Formats and returns session IDs to the caller
192
209
  sessions = data.select("session").sort("session").to_series().to_list()
193
210
  return tuple(sessions)
@@ -203,8 +220,8 @@ class ProjectManifest:
203
220
 
204
221
  Returns:
205
222
  A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
206
- 'intensity_verification', 'suite2p_processing', 'behavior_processing', 'video_processing',
207
- 'dataset_formation'.
223
+ 'intensity_verification', 'suite2p', 'behavior', 'video',
224
+ 'dataset'.
208
225
  """
209
226
 
210
227
  df = self._data
@@ -264,12 +281,12 @@ def generate_project_manifest(
264
281
  # Determines whether the session data is complete (ran for the intended duration and has all expected data).
265
282
  "complete": [],
266
283
  # Determines whether the session data integrity has been verified upon transfer to a storage machine.
267
- "integrity_verification": [],
268
- "suite2p_processing": [], # Determines whether the session has been processed with the single-day s2p pipeline.
284
+ "integrity": [],
285
+ "suite2p": [], # Determines whether the session has been processed with the single-day s2p pipeline.
269
286
  # Determines whether the session has been processed with the behavior extraction pipeline.
270
- "behavior_processing": [],
271
- "video_processing": [], # Determines whether the session has been processed with the DeepLabCut pipeline.
272
- "dataset_formation": [], # Determines whether the session's data has been integrated into a dataset.
287
+ "behavior": [],
288
+ "video": [], # Determines whether the session has been processed with the DeepLabCut pipeline.
289
+ "dataset": [], # Determines whether the session's data is ready to be integrated into a dataset.
273
290
  }
274
291
 
275
292
  # Loops over each session of every animal in the project and extracts session ID information and information
@@ -336,33 +353,34 @@ def generate_project_manifest(
336
353
 
337
354
  # Data verification status
338
355
  tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
339
- manifest["integrity_verification"].append(tracker.is_complete)
356
+ manifest["integrity"].append(tracker.is_complete)
340
357
 
341
358
  # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing is
342
359
  # disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its data may
343
360
  # be corrupted.
344
- if not manifest["complete"][-1] or not manifest["integrity_verification"][-1]:
345
- manifest["suite2p_processing"].append(False)
346
- manifest["dataset_formation"].append(False)
347
- manifest["behavior_processing"].append(False)
348
- manifest["video_processing"].append(False)
361
+ if not manifest["complete"][-1] or not manifest["integrity"][-1]:
362
+ manifest["suite2p"].append(False)
363
+ manifest["dataset"].append(False)
364
+ manifest["behavior"].append(False)
365
+ manifest["video"].append(False)
349
366
  continue # Cycles to the next session
350
367
 
351
- # Suite2p (single-day) status
368
+ # Suite2p (single-day) processing status.
352
369
  tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
353
- manifest["suite2p_processing"].append(tracker.is_complete)
370
+ manifest["suite2p"].append(tracker.is_complete)
354
371
 
355
- # Dataset formation (integration) status. Tracks whether the session has been added to any dataset(s).
356
- tracker = ProcessingTracker(file_path=session_data.processed_data.dataset_formation_tracker_path)
357
- manifest["dataset_formation"].append(tracker.is_complete)
358
-
359
- # Dataset formation (integration) status. Tracks whether the session has been added to any dataset(s).
372
+ # Behavior data processing status.
360
373
  tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
361
- manifest["behavior_processing"].append(tracker.is_complete)
374
+ manifest["behavior"].append(tracker.is_complete)
362
375
 
363
376
  # DeepLabCut (video) processing status.
364
377
  tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
365
- manifest["video_processing"].append(tracker.is_complete)
378
+ manifest["video"].append(tracker.is_complete)
379
+
380
+ # Tracks whether the session's data is ready for dataset integration. To be considered ready, the data must be
381
+ # successfully processed with all relevant pipelines. Any session currently being processed with any processing
382
+ # pipeline is considered NOT ready.
383
+ manifest["dataset"].append(session_data.processed_data.p53_path.exists())
366
384
 
367
385
  # If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
368
386
  # them as strings. The latter options are primarily kept for compatibility with Tyche data
@@ -382,11 +400,11 @@ def generate_project_manifest(
382
400
  "type": pl.String,
383
401
  "notes": pl.String,
384
402
  "complete": pl.UInt8,
385
- "integrity_verification": pl.UInt8,
386
- "suite2p_processing": pl.UInt8,
387
- "dataset_formation": pl.UInt8,
388
- "behavior_processing": pl.UInt8,
389
- "video_processing": pl.UInt8,
403
+ "integrity": pl.UInt8,
404
+ "suite2p": pl.UInt8,
405
+ "dataset": pl.UInt8,
406
+ "behavior": pl.UInt8,
407
+ "video": pl.UInt8,
390
408
  }
391
409
  df = pl.DataFrame(manifest, schema=schema, strict=False)
392
410
 
@@ -468,3 +486,101 @@ def verify_session_checksum(
468
486
  # runtime finished with an error to prevent deadlocking the runtime.
469
487
  if tracker.is_running:
470
488
  tracker.error()
489
+
490
+
491
+ def resolve_p53_marker(
492
+ session_path: Path,
493
+ create_processed_data_directory: bool = True,
494
+ processed_data_root: None | Path = None,
495
+ remove: bool = False,
496
+ ) -> None:
497
+ """Depending on configuration, either creates or removes the p53.bin marker file for the target session.
498
+
499
+ The marker file statically determines whether the session can be targeted by data processing or dataset formation
500
+ pipelines.
501
+
502
+ Notes:
503
+ Since dataset integration relies on data processing outputs, it is essential to prevent processing pipelines
504
+ from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
505
+ that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
506
+
507
+ For the p53.bin marker to be created, the session must currently not undergo any processing and must be
508
+ successfully processed with the minimal set of pipelines for its session type. Removing the p53.bin marker does
509
+ not have any dependencies and will be executed even if the session is currently undergoing dataset integration.
510
+ Due to this limitation, it is only possible to call this function with the 'remove' flag manually (via the
511
+ dedicated CLI).
512
+
513
+ Args:
514
+ session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
515
+ input session directory must contain the 'raw_data' subdirectory.
516
+ create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
517
+ processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
518
+ the root directory where to store the processed data from all projects, and it will be automatically
519
+ modified to include the project name, the animal name, and the session ID.
520
+ remove: Determines whether this function is called to create or remove the p53.bin marker.
521
+ """
522
+
523
+ # Loads session data layout. If configured to do so, also creates the processed data hierarchy
524
+ session_data = SessionData.load(
525
+ session_path=session_path,
526
+ processed_data_root=processed_data_root,
527
+ make_processed_data_directory=create_processed_data_directory,
528
+ )
529
+
530
+ # If the p53.bin marker exists and the runtime is configured to remove it, removes the marker file. If the runtime
531
+ # is configured to create the marker, aborts the runtime (as the marker already exists).
532
+ if session_data.processed_data.p53_path.exists():
533
+ if remove:
534
+ session_data.processed_data.p53_path.unlink()
535
+ return # Ends remove runtime
536
+
537
+ return # Ends create runtime
538
+
539
+ # If the marker does not exist and the function is called in 'remove' mode, aborts the runtime
540
+ elif remove:
541
+ return # Ends remove runtime
542
+
543
+ # The rest of the runtime deals with determining whether it is safe to create the marker file.
544
+ # Queries the type of the processed session
545
+ session_type = session_data.session_type
546
+
547
+ # If the session type is not supported, aborts with an error
548
+ if session_type not in _valid_session_types:
549
+ message = (
550
+ f"Unable to determine the mandatory processing pipelines for session {session_data.session_name} of animal "
551
+ f"{session_data.animal_id} and project {session_data.processed_data}. The type of the session "
552
+ f"{session_type} is not one of the supported session types: {', '.join(_valid_session_types)}."
553
+ )
554
+ console.error(message=message, error=ValueError)
555
+
556
+ # Window checking sessions are not designed to be integrated into datasets, so they cannot be marked with p53.bin
557
+ # file. Similarly, any incomplete session is automatically excluded from dataset formation.
558
+ if session_type == "window checking" or not session_data.raw_data.telomere_path.exists():
559
+ return
560
+
561
+ # Training sessions collect similar data and share processing pipeline requirements
562
+ if session_type == "lick training" or session_type == "run training":
563
+ # If the session has not been successfully processed with the behavior processing pipeline, aborts without
564
+ # creating the marker file. Also ensures that the video tracking pipeline is not actively running, although it
565
+ # is not required
566
+ behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
567
+ video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
568
+ if not behavior_tracker.is_complete or video_tracker.is_running:
569
+ # Note, training runtimes do not require suite2p processing.
570
+ return
571
+
572
+ # Mesoscope experiment sessions require additional processing with suite2p
573
+ if session_type == "mesoscope experiment":
574
+ behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
575
+ suite2p_tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
576
+ video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
577
+
578
+ # Similar to above, if the session is not processed with the behavior pipeline or the suite2p pipeline, aborts
579
+ # without creating the marker file. Video tracker is not required for p53 marker creation, but the video
580
+ # tracking pipeline must not be actively running.
581
+ if not behavior_tracker.is_complete or not suite2p_tracker.is_complete or video_tracker.is_running:
582
+ return
583
+
584
+ # If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
585
+ # file, preventing the session from being processed again as long as the marker exists.
586
+ session_data.processed_data.p53_path.touch()
@@ -69,7 +69,13 @@ class ProjectManifest:
69
69
  This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
70
70
  of the target project.
71
71
  """
72
- def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
72
+ def get_sessions_for_animal(
73
+ self,
74
+ animal: str | int,
75
+ exclude_incomplete: bool = True,
76
+ dataset_ready_only: bool = False,
77
+ not_dataset_ready_only: bool = False,
78
+ ) -> tuple[str, ...]:
73
79
  """Returns all session IDs for the target animal.
74
80
 
75
81
  This provides a tuple of all sessions performed by the target animal as part of the target project.
@@ -78,6 +84,11 @@ class ProjectManifest:
78
84
  animal: The ID of the animal for which to get the session data.
79
85
  exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
80
86
  list.
87
+ dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
88
+ the output list. Enabling this option only shows sessions that can be integrated into a dataset.
89
+ not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
90
+ as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
91
+ enabled, the 'dataset_ready_only' option takes precedence.
81
92
 
82
93
  Raises:
83
94
  ValueError: If the specified animal is not found in the manifest file.
@@ -93,8 +104,8 @@ class ProjectManifest:
93
104
 
94
105
  Returns:
95
106
  A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
96
- 'intensity_verification', 'suite2p_processing', 'behavior_processing', 'video_processing',
97
- 'dataset_formation'.
107
+ 'intensity_verification', 'suite2p', 'behavior', 'video',
108
+ 'dataset'.
98
109
  """
99
110
 
100
111
  def generate_project_manifest(
@@ -146,3 +157,35 @@ def verify_session_checksum(
146
157
  the root directory where to store the processed data from all projects, and it will be automatically
147
158
  modified to include the project name, the animal name, and the session ID.
148
159
  """
160
+
161
+ def resolve_p53_marker(
162
+ session_path: Path,
163
+ create_processed_data_directory: bool = True,
164
+ processed_data_root: None | Path = None,
165
+ remove: bool = False,
166
+ ) -> None:
167
+ """Depending on configuration, either creates or removes the p53.bin marker file for the target session.
168
+
169
+ The marker file statically determines whether the session can be targeted by data processing or dataset formation
170
+ pipelines.
171
+
172
+ Notes:
173
+ Since dataset integration relies on data processing outputs, it is essential to prevent processing pipelines
174
+ from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
175
+ that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
176
+
177
+ For the p53.bin marker to be created, the session must currently not undergo any processing and must be
178
+ successfully processed with the minimal set of pipelines for its session type. Removing the p53.bin marker does
179
+ not have any dependencies and will be executed even if the session is currently undergoing dataset integration.
180
+ Due to this limitation, it is only possible to call this function with the 'remove' flag manually (via the
181
+ dedicated CLI).
182
+
183
+ Args:
184
+ session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
185
+ input session directory must contain the 'raw_data' subdirectory.
186
+ create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
187
+ processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
188
+ the root directory where to store the processed data from all projects, and it will be automatically
189
+ modified to include the project name, the animal name, and the session ID.
190
+ remove: Determines whether this function is called to create or remove the p53.bin marker.
191
+ """
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sl-shared-assets
3
- Version: 2.0.0
4
- Summary: Stores assets shared between multiple Sun (NeuroAI) lab data pipelines.
3
+ Version: 3.0.0rc1
4
+ Summary: Provides data acquisition and processing assets shared between Sun (NeuroAI) lab libraries.
5
5
  Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
6
6
  Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
7
7
  Author: Ivan Kondratyev, Kushaan Gupta, Natalie Yeung
@@ -681,7 +681,7 @@ License: GNU GENERAL PUBLIC LICENSE
681
681
  Public License instead of this License. But first, please read
682
682
  <https://www.gnu.org/licenses/why-not-lgpl.html>.
683
683
  License-File: LICENSE
684
- Keywords: acquisition,assets,data,processing,sunlab
684
+ Keywords: acquisition,assets,data,processing,server,sunlab
685
685
  Classifier: Development Status :: 5 - Production/Stable
686
686
  Classifier: Intended Audience :: Developers
687
687
  Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
@@ -697,7 +697,7 @@ Requires-Dist: ataraxis-time==3.0.0
697
697
  Requires-Dist: click==8.2.1
698
698
  Requires-Dist: filelock==3.18.0
699
699
  Requires-Dist: natsort==8.4.0
700
- Requires-Dist: numpy<2.3.0,>=2.0.2
700
+ Requires-Dist: numpy==2.2.6
701
701
  Requires-Dist: paramiko==3.5.1
702
702
  Requires-Dist: polars==1.31.0
703
703
  Requires-Dist: pyarrow==20.0.0
@@ -725,7 +725,7 @@ Requires-Dist: appdirs==1.4.4; extra == 'condarun'
725
725
  Requires-Dist: click==8.2.1; extra == 'condarun'
726
726
  Requires-Dist: filelock==3.18.0; extra == 'condarun'
727
727
  Requires-Dist: natsort==8.4.0; extra == 'condarun'
728
- Requires-Dist: numpy<2.3.0,>=2.0.2; extra == 'condarun'
728
+ Requires-Dist: numpy==2.2.6; extra == 'condarun'
729
729
  Requires-Dist: paramiko==3.5.1; extra == 'condarun'
730
730
  Requires-Dist: polars==1.31.0; extra == 'condarun'
731
731
  Requires-Dist: pyarrow==20.0.0; extra == 'condarun'
@@ -858,7 +858,6 @@ We use [semantic versioning](https://semver.org/) for this project. For the vers
858
858
 
859
859
  - Ivan Kondratyev ([Inkaros](https://github.com/Inkaros))
860
860
  - Kushaan Gupta ([kushaangupta](https://github.com/kushaangupta))
861
- - Yuantao Deng ([YuantaoDeng](https://github.com/YuantaoDeng))
862
861
  - Natalie Yeung
863
862
 
864
863
  ___