sl-shared-assets 2.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (32) hide show
  1. sl_shared_assets/__init__.py +17 -9
  2. sl_shared_assets/__init__.pyi +12 -8
  3. sl_shared_assets/cli.py +266 -20
  4. sl_shared_assets/cli.pyi +46 -5
  5. sl_shared_assets/data_classes/__init__.py +8 -3
  6. sl_shared_assets/data_classes/__init__.pyi +8 -4
  7. sl_shared_assets/data_classes/configuration_data.py +149 -30
  8. sl_shared_assets/data_classes/configuration_data.pyi +49 -11
  9. sl_shared_assets/data_classes/runtime_data.py +70 -49
  10. sl_shared_assets/data_classes/runtime_data.pyi +41 -33
  11. sl_shared_assets/data_classes/session_data.py +193 -253
  12. sl_shared_assets/data_classes/session_data.pyi +99 -116
  13. sl_shared_assets/data_classes/surgery_data.py +1 -1
  14. sl_shared_assets/server/__init__.py +2 -2
  15. sl_shared_assets/server/__init__.pyi +5 -2
  16. sl_shared_assets/server/job.py +229 -1
  17. sl_shared_assets/server/job.pyi +111 -0
  18. sl_shared_assets/server/server.py +431 -31
  19. sl_shared_assets/server/server.pyi +158 -15
  20. sl_shared_assets/tools/__init__.py +2 -1
  21. sl_shared_assets/tools/__init__.pyi +2 -0
  22. sl_shared_assets/tools/ascension_tools.py +9 -21
  23. sl_shared_assets/tools/ascension_tools.pyi +1 -1
  24. sl_shared_assets/tools/packaging_tools.py +2 -2
  25. sl_shared_assets/tools/project_management_tools.py +147 -41
  26. sl_shared_assets/tools/project_management_tools.pyi +45 -6
  27. {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/METADATA +127 -13
  28. sl_shared_assets-3.0.0.dist-info/RECORD +36 -0
  29. {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/entry_points.txt +2 -0
  30. sl_shared_assets-2.0.0.dist-info/RECORD +0 -36
  31. {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/WHEEL +0 -0
  32. {sl_shared_assets-2.0.0.dist-info → sl_shared_assets-3.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,34 +1,42 @@
1
1
  from pathlib import Path
2
- from dataclasses import dataclass
2
+ from dataclasses import field, dataclass
3
3
 
4
+ from _typeshed import Incomplete
4
5
  from simple_slurm import Slurm as Slurm
5
6
  from paramiko.client import SSHClient as SSHClient
6
7
  from ataraxis_data_structures import YamlConfig
7
8
 
8
- from .job import Job as Job
9
+ from .job import (
10
+ Job as Job,
11
+ JupyterJob as JupyterJob,
12
+ )
9
13
 
10
14
  def generate_server_credentials(
11
15
  output_directory: Path,
12
16
  username: str,
13
17
  password: str,
14
18
  host: str = "cbsuwsun.biohpc.cornell.edu",
15
- raw_data_root: str = "/workdir/sun_data",
16
- processed_data_root: str = "/storage/sun_data",
19
+ storage_root: str = "/local/workdir",
20
+ working_root: str = "/local/storage",
21
+ shared_directory_name: str = "sun_data",
17
22
  ) -> None:
18
23
  """Generates a new server_credentials.yaml file under the specified directory, using input information.
19
24
 
20
25
  This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
21
- only used when setting up new host-computers in the lab.
26
+ only used when setting up new host-computers or users in the lab.
22
27
 
23
28
  Args:
24
29
  output_directory: The directory where to save the generated server_credentials.yaml file.
25
30
  username: The username to use for server authentication.
26
31
  password: The password to use for server authentication.
27
32
  host: The hostname or IP address of the server to connect to.
28
- raw_data_root: The path to the root directory used to store the raw data from all Sun lab projects on the
29
- server.
30
- processed_data_root: The path to the root directory used to store the processed data from all Sun lab projects
31
- on the server.
33
+ storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
34
+ top-level (root) directory of the HDD RAID volume.
35
+ working_root: The path to the root working (fast) server directory. Typically, this is the path to the
36
+ top-level (root) directory of the NVME RAID volume. If the server uses the same volume for both storage and
37
+ working directories, enter the same path under both 'storage_root' and 'working_root'.
38
+ shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
39
+ and working server volumes.
32
40
  """
33
41
  @dataclass()
34
42
  class ServerCredentials(YamlConfig):
@@ -43,8 +51,15 @@ class ServerCredentials(YamlConfig):
43
51
  username: str = ...
44
52
  password: str = ...
45
53
  host: str = ...
46
- raw_data_root: str = ...
47
- processed_data_root: str = ...
54
+ storage_root: str = ...
55
+ working_root: str = ...
56
+ shared_directory_name: str = ...
57
+ raw_data_root: str = field(init=False, default_factory=Incomplete)
58
+ processed_data_root: str = field(init=False, default_factory=Incomplete)
59
+ user_data_root: str = field(init=False, default_factory=Incomplete)
60
+ user_working_root: str = field(init=False, default_factory=Incomplete)
61
+ def __post_init__(self) -> None:
62
+ """Statically resolves the paths to end-point directories using provided root directories."""
48
63
 
49
64
  class Server:
50
65
  """Encapsulates access to the Sun lab BioHPC processing server.
@@ -75,7 +90,79 @@ class Server:
75
90
  def __init__(self, credentials_path: Path) -> None: ...
76
91
  def __del__(self) -> None:
77
92
  """If the instance is connected to the server, terminates the connection before the instance is destroyed."""
78
- def submit_job(self, job: Job) -> Job:
93
+ def create_job(
94
+ self, job_name: str, conda_environment: str, cpus_to_use: int = 10, ram_gb: int = 10, time_limit: int = 60
95
+ ) -> Job:
96
+ """Creates and returns a new Job instance.
97
+
98
+ Use this method to generate Job objects for all headless jobs that need to be run on the remote server. The
99
+ generated Job is a precursor that requires further configuration by the user before it can be submitted to the
100
+ server for execution.
101
+
102
+ Args:
103
+ job_name: The descriptive name of the SLURM job to be created. Primarily, this name is used in terminal
104
+ printouts to identify the job to human operators.
105
+ conda_environment: The name of the conda environment to activate on the server before running the job logic.
106
+ The environment should contain the necessary Python packages and CLIs to support running the job's
107
+ logic.
108
+ cpus_to_use: The number of CPUs to use for the job.
109
+ ram_gb: The amount of RAM to allocate for the job, in Gigabytes.
110
+ time_limit: The maximum time limit for the job, in minutes. If the job is still running at the end of this
111
+ time period, it will be forcibly terminated. It is highly advised to always set adequate maximum runtime
112
+ limits to prevent jobs from hogging the server in case of runtime or algorithm errors.
113
+
114
+ Returns:
115
+ The initialized Job instance pre-filled with SLURM configuration data and conda activation commands. Modify
116
+ the returned instance with any additional commands as necessary for the job to fulfill its intended
117
+ purpose. Note, the Job requires submission via submit_job() to be executed by the server.
118
+ """
119
+ def launch_jupyter_server(
120
+ self,
121
+ job_name: str,
122
+ conda_environment: str,
123
+ notebook_directory: Path,
124
+ cpus_to_use: int = 2,
125
+ ram_gb: int = 32,
126
+ time_limit: int = 240,
127
+ port: int = 0,
128
+ jupyter_args: str = "",
129
+ ) -> JupyterJob:
130
+ """Launches a Jupyter notebook server on the target remote Sun lab server.
131
+
132
+ Use this method to run interactive Jupyter sessions on the remote server under SLURM control. Unlike the
133
+ create_job(), this method automatically submits the job for execution as part of its runtime. Therefore, the
134
+ returned JupyterJob instance should only be used to query information about how to connect to the remote
135
+ Jupyter server.
136
+
137
+ Args:
138
+ job_name: The descriptive name of the Jupyter SLURM job to be created. Primarily, this name is used in
139
+ terminal printouts to identify the job to human operators.
140
+ conda_environment: The name of the conda environment to activate on the server before running the job logic.
141
+ The environment should contain the necessary Python packages and CLIs to support running the job's
142
+ logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
143
+ port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
144
+ 8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
145
+ user sessions.
146
+ notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
147
+ access to items stored in or under this directory. For most runtimes, this should be set to the user's
148
+ root data or working directory.
149
+ cpus_to_use: The number of CPUs to allocate to the Jupyter server. Keep this value as small as possible to
150
+ avoid interfering with headless data processing jobs.
151
+ ram_gb: The amount of RAM, in GB, to allocate to the Jupyter server. Keep this value as small as possible to
152
+ avoid interfering with headless data processing jobs.
153
+ time_limit: The maximum Jupyter server uptime, in minutes. Set this to the expected duration of your jupyter
154
+ session.
155
+ jupyter_args: Stores additional arguments to pass to jupyter notebook initialization command.
156
+
157
+ Returns:
158
+ The initialized JupyterJob instance that stores information on how to connect to the created Jupyter server.
159
+ Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
160
+
161
+ Raises:
162
+ TimeoutError: If the target Jupyter server doesn't start within 120 minutes from this method being called.
163
+ RuntimeError: If job submission fails for any reason.
164
+ """
165
+ def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
79
166
  """Submits the input job to the managed BioHPC server via SLURM job manager.
80
167
 
81
168
  This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
@@ -92,7 +179,7 @@ class Server:
92
179
  Raises:
93
180
  RuntimeError: If job submission to the server fails.
94
181
  """
95
- def job_complete(self, job: Job) -> bool:
182
+ def job_complete(self, job: Job | JupyterJob) -> bool:
96
183
  """Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
97
184
  to an error.
98
185
 
@@ -105,6 +192,16 @@ class Server:
105
192
  ValueError: If the input Job object does not contain a valid job_id, suggesting that it has not been
106
193
  submitted to the server.
107
194
  """
195
+ def abort_job(self, job: Job | JupyterJob) -> None:
196
+ """Aborts the target job if it is currently running on the server.
197
+
198
+ Use this method to immediately abort running or queued jobs, without waiting for the timeout guard. If the job
199
+ is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
200
+ do nothing.
201
+
202
+ Args:
203
+ job: The Job object that needs to be aborted.
204
+ """
108
205
  def pull_file(self, local_file_path: Path, remote_file_path: Path) -> None:
109
206
  """Moves the specified file from the remote server to the local machine.
110
207
 
@@ -119,6 +216,20 @@ class Server:
119
216
  local_file_path: The path to the file that needs to be copied to the remote server.
120
217
  remote_file_path: The path to the file on the remote server (where to copy the file).
121
218
  """
219
+ def pull_directory(self, local_directory_path: Path, remote_directory_path: Path) -> None:
220
+ """Recursively downloads the entire target directory from the remote server to the local machine.
221
+
222
+ Args:
223
+ local_directory_path: The path to the local directory where the remote directory will be copied.
224
+ remote_directory_path: The path to the directory on the remote server to be downloaded.
225
+ """
226
+ def push_directory(self, local_directory_path: Path, remote_directory_path: Path) -> None:
227
+ """Recursively uploads the entire target directory from the local machine to the remote server.
228
+
229
+ Args:
230
+ local_directory_path: The path to the local directory to be uploaded.
231
+ remote_directory_path: The path on the remote server where the directory will be copied.
232
+ """
122
233
  def remove(self, remote_path: Path, is_dir: bool) -> None:
123
234
  """Removes the specified file or directory from the remote server.
124
235
 
@@ -126,18 +237,50 @@ class Server:
126
237
  remote_path: The path to the file or directory on the remote server to be removed.
127
238
  is_dir: Determines whether the input path represents a directory or a file.
128
239
  """
240
+ def create_directory(self, remote_path: Path, parents: bool = True) -> None:
241
+ """Creates the specified directory tree on the managed remote server via SFTP.
242
+
243
+ This method creates directories on the remote server, with options to create parent directories and handle
244
+ existing directories gracefully.
245
+
246
+ Args:
247
+ remote_path: The absolute path to the directory to create on the remote server, relative to the server
248
+ root.
249
+ parents: Determines whether to create parent directories, if they are missing. Otherwise, if parents do not
250
+ exist, raises a FileNotFoundError.
251
+
252
+ Notes:
253
+ This method silently assumes that it is fine if the directory already exists and treats it as a successful
254
+ runtime end-point.
255
+ """
256
+ def exists(self, remote_path: Path) -> bool:
257
+ """Returns True if the target file or directory exists on the remote server."""
129
258
  def close(self) -> None:
130
259
  """Closes the SSH connection to the server.
131
260
 
132
261
  This method has to be called before destroying the class instance to ensure proper resource cleanup.
133
262
  """
134
263
  @property
135
- def raw_data_root(self) -> str:
264
+ def raw_data_root(self) -> Path:
136
265
  """Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
137
266
  accessible through this class.
138
267
  """
139
268
  @property
140
- def processed_data_root(self) -> str:
269
+ def processed_data_root(self) -> Path:
141
270
  """Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
142
271
  server accessible through this class.
143
272
  """
273
+ @property
274
+ def user_data_root(self) -> Path:
275
+ """Returns the absolute path to the directory used to store user-specific data on the server accessible through
276
+ this class."""
277
+ @property
278
+ def user_working_root(self) -> Path:
279
+ """Returns the absolute path to the user-specific working (fast) directory on the server accessible through
280
+ this class."""
281
+ @property
282
+ def host(self) -> str:
283
+ """Returns the hostname or IP address of the server accessible through this class."""
284
+ @property
285
+ def user(self) -> str:
286
+ """Returns the username used to authenticate with the server."""
@@ -4,7 +4,7 @@ integrity of the data. The tools from this package are used by most other data p
4
4
  from .transfer_tools import transfer_directory
5
5
  from .ascension_tools import ascend_tyche_data
6
6
  from .packaging_tools import calculate_directory_checksum
7
- from .project_management_tools import verify_session_checksum, generate_project_manifest
7
+ from .project_management_tools import resolve_p53_marker, verify_session_checksum, generate_project_manifest
8
8
 
9
9
  __all__ = [
10
10
  "transfer_directory",
@@ -12,4 +12,5 @@ __all__ = [
12
12
  "ascend_tyche_data",
13
13
  "verify_session_checksum",
14
14
  "generate_project_manifest",
15
+ "resolve_p53_marker",
15
16
  ]
@@ -2,6 +2,7 @@ from .transfer_tools import transfer_directory as transfer_directory
2
2
  from .ascension_tools import ascend_tyche_data as ascend_tyche_data
3
3
  from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
4
4
  from .project_management_tools import (
5
+ resolve_p53_marker as resolve_p53_marker,
5
6
  verify_session_checksum as verify_session_checksum,
6
7
  generate_project_manifest as generate_project_manifest,
7
8
  )
@@ -12,4 +13,5 @@ __all__ = [
12
13
  "ascend_tyche_data",
13
14
  "verify_session_checksum",
14
15
  "generate_project_manifest",
16
+ "resolve_p53_marker",
15
17
  ]
@@ -7,10 +7,10 @@ from pathlib import Path
7
7
  import datetime
8
8
 
9
9
  import numpy as np
10
- from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
10
+ from ataraxis_base_utilities import LogLevel, console
11
11
  from ataraxis_time.time_helpers import extract_timestamp_from_bytes
12
12
 
13
- from ..data_classes import SessionData, ProjectConfiguration, get_system_configuration_data
13
+ from ..data_classes import SessionData, SessionTypes, get_system_configuration_data
14
14
  from .transfer_tools import transfer_directory
15
15
  from .packaging_tools import calculate_directory_checksum
16
16
 
@@ -194,26 +194,12 @@ def ascend_tyche_data(root_directory: Path) -> None:
194
194
  root_directory: The directory that stores one or more Tyche animal folders. This can be conceptualized as the
195
195
  root directory for the Tyche project.
196
196
  """
197
- # Generates a (shared) project configuration file.
198
- project_configuration = ProjectConfiguration()
199
-
200
197
  # The acquisition system config resolves most paths and filesystem configuration arguments
201
198
  acquisition_system = get_system_configuration_data()
202
- output_root_directory = acquisition_system.paths.root_directory
203
199
  server_root_directory = acquisition_system.paths.server_storage_directory
204
200
 
205
201
  # Statically defines project name and local root paths
206
202
  project_name = "Tyche"
207
- project_configuration.project_name = project_name
208
-
209
- # Uses nonsensical google sheet IDs. Tyche project did not use Google Sheet processing like our modern projects do.
210
- project_configuration.water_log_sheet_id = "1xFh9Q2zT7pL3mVkJdR8bN6yXoE4wS5aG0cHu2Kf7D3v"
211
- project_configuration.surgery_sheet_id = "1xFh9Q2zT7pL3mVkJdR8bN6yXoE4wS5aG0cHu2Kf7D3v"
212
-
213
- # Dumps project configuration into the 'configuration' subfolder of the Tyche project.
214
- configuration_path = output_root_directory.joinpath("Tyche", "configuration", "project_configuration.yaml")
215
- ensure_directory_exists(configuration_path)
216
- project_configuration.save(path=configuration_path)
217
203
 
218
204
  # Assumes that root directory stores all animal folders to be processed
219
205
  for animal_folder in root_directory.iterdir():
@@ -230,17 +216,19 @@ def ascend_tyche_data(root_directory: Path) -> None:
230
216
  # This procedure generates timestamp-based session names, analogous to how our modern pipeline does it.
231
217
  session_name = _generate_session_name(acquisition_path=acquisition_folder)
232
218
 
233
- # Uses derived session name and the statically created project configuration file to create the
234
- # session data hierarchy using the output root. This generates a 'standard' Sun lab directory structure
235
- # for the Tyche data.
219
+ # Uses derived session name and the derived project name to create the session data hierarchy using the
220
+ # output root. This generates a 'standard' Sun lab directory structure for the Tyche data.
236
221
  session_data = SessionData.create(
237
- project_name=project_configuration.project_name,
222
+ project_name=project_name,
238
223
  session_name=session_name,
239
224
  animal_id=animal_name,
240
- session_type="mesoscope experiment",
225
+ session_type=SessionTypes.MESOSCOPE_EXPERIMENT,
241
226
  experiment_name=None,
242
227
  )
243
228
 
229
+ # Since this runtime reprocesses already acquired data, marks the session as fully initialized.
230
+ session_data.runtime_initialized()
231
+
244
232
  # Moves the data from the old hierarchy to the new hierarchy. If the process runs as expected, and
245
233
  # fully empties the source acquisition folder, destroys the folder. Otherwise, notifies the user that
246
234
  # the runtime did not fully process the session data and requests intervention.
@@ -2,7 +2,7 @@ from pathlib import Path
2
2
 
3
3
  from ..data_classes import (
4
4
  SessionData as SessionData,
5
- ProjectConfiguration as ProjectConfiguration,
5
+ SessionTypes as SessionTypes,
6
6
  get_system_configuration_data as get_system_configuration_data,
7
7
  )
8
8
  from .transfer_tools import transfer_directory as transfer_directory
@@ -17,13 +17,13 @@ _excluded_files = {
17
17
  "ax_checksum.txt",
18
18
  "ubiquitin.bin",
19
19
  "telomere.bin",
20
+ "p53.bin",
21
+ "nk.bin",
20
22
  "suite2p_processing_tracker.yaml",
21
23
  "dataset_formation_tracker.yaml",
22
- "behavior_processing_tracker.yaml",
23
24
  "video_processing_tracker.yaml",
24
25
  "integrity_verification_tracker.yaml",
25
26
  "suite2p_processing_tracker.yaml.lock",
26
- "dataset_formation_tracker.yaml.lock",
27
27
  "behavior_processing_tracker.yaml.lock",
28
28
  "video_processing_tracker.yaml.lock",
29
29
  "integrity_verification_tracker.yaml.lock",