sl-shared-assets 3.0.0rc14__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +2 -0
- sl_shared_assets/__init__.pyi +2 -0
- sl_shared_assets/cli.py +38 -14
- sl_shared_assets/cli.pyi +11 -11
- sl_shared_assets/data_classes/__init__.py +2 -2
- sl_shared_assets/data_classes/configuration_data.py +11 -8
- sl_shared_assets/data_classes/configuration_data.pyi +8 -7
- sl_shared_assets/data_classes/runtime_data.py +2 -2
- sl_shared_assets/data_classes/runtime_data.pyi +2 -2
- sl_shared_assets/data_classes/session_data.py +43 -29
- sl_shared_assets/data_classes/session_data.pyi +11 -11
- sl_shared_assets/server/__init__.py +1 -1
- sl_shared_assets/server/job.py +10 -10
- sl_shared_assets/server/job.pyi +5 -5
- sl_shared_assets/server/server.py +15 -15
- sl_shared_assets/server/server.pyi +7 -7
- sl_shared_assets/tools/__init__.py +7 -1
- sl_shared_assets/tools/__init__.pyi +2 -0
- sl_shared_assets/tools/ascension_tools.py +8 -8
- sl_shared_assets/tools/packaging_tools.py +2 -1
- sl_shared_assets/tools/project_management_tools.py +87 -41
- sl_shared_assets/tools/project_management_tools.pyi +23 -11
- sl_shared_assets/tools/transfer_tools.py +1 -1
- sl_shared_assets/tools/transfer_tools.pyi +1 -1
- {sl_shared_assets-3.0.0rc14.dist-info → sl_shared_assets-3.1.0.dist-info}/METADATA +122 -5
- sl_shared_assets-3.1.0.dist-info/RECORD +36 -0
- sl_shared_assets-3.0.0rc14.dist-info/RECORD +0 -36
- {sl_shared_assets-3.0.0rc14.dist-info → sl_shared_assets-3.1.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-3.0.0rc14.dist-info → sl_shared_assets-3.1.0.dist-info}/entry_points.txt +0 -0
- {sl_shared_assets-3.0.0rc14.dist-info → sl_shared_assets-3.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -20,7 +20,7 @@ class SessionTypes(StrEnum):
|
|
|
20
20
|
|
|
21
21
|
Notes:
|
|
22
22
|
This enumeration does not differentiate between different acquisition systems. Different acquisition systems
|
|
23
|
-
support different session types
|
|
23
|
+
support different session types and may not be suited for acquiring some of the session types listed in this
|
|
24
24
|
enumeration.
|
|
25
25
|
"""
|
|
26
26
|
|
|
@@ -206,8 +206,8 @@ class SessionData(YamlConfig):
|
|
|
206
206
|
provide the path to the root project directory (directory that stores all Sun lab projects) on that
|
|
207
207
|
drive. The method will automatically resolve the project/animal/session/processed_data hierarchy using
|
|
208
208
|
this root path. If raw and processed data are kept on the same drive, keep this set to None.
|
|
209
|
-
make_processed_data_directory: Determines whether this method should create processed_data directory if
|
|
210
|
-
does not exist.
|
|
209
|
+
make_processed_data_directory: Determines whether this method should create the processed_data directory if
|
|
210
|
+
it does not exist.
|
|
211
211
|
|
|
212
212
|
Returns:
|
|
213
213
|
An initialized SessionData instance for the session whose data is stored at the provided path.
|
|
@@ -226,7 +226,7 @@ class SessionData(YamlConfig):
|
|
|
226
226
|
def _save(self) -> None:
|
|
227
227
|
"""Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
|
|
228
228
|
|
|
229
|
-
This is used to save the data stored in the instance to disk
|
|
229
|
+
This is used to save the data stored in the instance to disk so that it can be reused during further stages of
|
|
230
230
|
data processing. The method is intended to only be used by the SessionData instance itself during its
|
|
231
231
|
create() method runtime.
|
|
232
232
|
"""
|
|
@@ -245,13 +245,13 @@ class ProcessingTracker(YamlConfig):
|
|
|
245
245
|
_encountered_error: bool = ...
|
|
246
246
|
_is_running: bool = ...
|
|
247
247
|
_lock_path: str = field(init=False)
|
|
248
|
+
_started_runtime: bool = ...
|
|
248
249
|
def __post_init__(self) -> None: ...
|
|
249
250
|
def __del__(self) -> None:
|
|
250
|
-
"""If the instance
|
|
251
|
-
|
|
251
|
+
"""If the instance as used to start a runtime, ensures that the instance properly marks the runtime as completed
|
|
252
|
+
or erred before being garbage-collected.
|
|
252
253
|
|
|
253
|
-
|
|
254
|
-
data.
|
|
254
|
+
This is a security mechanism to prevent deadlocking the processed session and pipeline for future runtimes.
|
|
255
255
|
"""
|
|
256
256
|
def _load_state(self) -> None:
|
|
257
257
|
"""Reads the current processing state from the wrapped .YAML file."""
|
|
@@ -264,7 +264,7 @@ class ProcessingTracker(YamlConfig):
|
|
|
264
264
|
with an error.
|
|
265
265
|
|
|
266
266
|
Raises:
|
|
267
|
-
TimeoutError: If the file
|
|
267
|
+
TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
|
|
268
268
|
"""
|
|
269
269
|
def error(self) -> None:
|
|
270
270
|
"""Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
|
|
@@ -276,7 +276,7 @@ class ProcessingTracker(YamlConfig):
|
|
|
276
276
|
from the process that calls this method.
|
|
277
277
|
|
|
278
278
|
Raises:
|
|
279
|
-
TimeoutError: If the file
|
|
279
|
+
TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
|
|
280
280
|
"""
|
|
281
281
|
def stop(self) -> None:
|
|
282
282
|
"""Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
|
|
@@ -286,7 +286,7 @@ class ProcessingTracker(YamlConfig):
|
|
|
286
286
|
at the end of the runtime.
|
|
287
287
|
|
|
288
288
|
Raises:
|
|
289
|
-
TimeoutError: If the file
|
|
289
|
+
TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
|
|
290
290
|
"""
|
|
291
291
|
@property
|
|
292
292
|
def is_complete(self) -> bool:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""This package provides the classes and methods used by all Sun lab libraries to submit remote jobs to the BioHPC
|
|
2
|
-
and other compute servers. This package is also used across all Sun lab members private code to interface with the
|
|
2
|
+
and other compute servers. This package is also used across all Sun lab members' private code to interface with the
|
|
3
3
|
shared server."""
|
|
4
4
|
|
|
5
5
|
from .job import Job, JupyterJob
|
sl_shared_assets/server/job.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""This module provides the core Job class, used as the starting point for all SLURM-managed job executed on lab compute
|
|
2
2
|
server(s). Specifically, the Job class acts as a wrapper around the SLURM configuration and specific logic of each
|
|
3
|
-
job. During runtime, Server class interacts with input job objects to manage their transfer and execution on the
|
|
3
|
+
job. During runtime, the Server class interacts with input job objects to manage their transfer and execution on the
|
|
4
4
|
remote servers.
|
|
5
5
|
|
|
6
6
|
Since version 3.0.0, this module also provides the specialized JupyterJob class used to launch remote Jupyter
|
|
@@ -97,8 +97,8 @@ class Job:
|
|
|
97
97
|
Attributes:
|
|
98
98
|
remote_script_path: Stores the path to the script file relative to the root of the remote server that runs the
|
|
99
99
|
command.
|
|
100
|
-
job_id: Stores the unique job identifier assigned by the SLURM manager to this job
|
|
101
|
-
execution. This field initialized to None and is overwritten by the Server class that submits the job.
|
|
100
|
+
job_id: Stores the unique job identifier assigned by the SLURM manager to this job when it is accepted for
|
|
101
|
+
execution. This field is initialized to None and is overwritten by the Server class that submits the job.
|
|
102
102
|
job_name: Stores the descriptive name of the SLURM job.
|
|
103
103
|
_command: Stores the managed SLURM command object.
|
|
104
104
|
"""
|
|
@@ -174,7 +174,7 @@ class Job:
|
|
|
174
174
|
# initialization would not work as expected.
|
|
175
175
|
fixed_script_content = script_content.replace("\\$", "$")
|
|
176
176
|
|
|
177
|
-
# Returns the script content to caller as a string
|
|
177
|
+
# Returns the script content to the caller as a string
|
|
178
178
|
return fixed_script_content
|
|
179
179
|
|
|
180
180
|
|
|
@@ -202,8 +202,8 @@ class JupyterJob(Job):
|
|
|
202
202
|
conda_environment: The name of the conda environment to activate on the server before running the job logic. The
|
|
203
203
|
environment should contain the necessary Python packages and CLIs to support running the job's logic. For
|
|
204
204
|
Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
205
|
-
port: The connection port number for Jupyter server. Do not change the default value unless you know what
|
|
206
|
-
are doing, as the server has most common communication ports closed for security reasons.
|
|
205
|
+
port: The connection port number for the Jupyter server. Do not change the default value unless you know what
|
|
206
|
+
you are doing, as the server has most common communication ports closed for security reasons.
|
|
207
207
|
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have access to
|
|
208
208
|
items stored in or under this directory. For most runtimes, this should be set to the user's root data or
|
|
209
209
|
working directory.
|
|
@@ -270,7 +270,7 @@ class JupyterJob(Job):
|
|
|
270
270
|
self._build_jupyter_command(jupyter_args)
|
|
271
271
|
|
|
272
272
|
def _build_jupyter_command(self, jupyter_args: str) -> None:
|
|
273
|
-
"""Builds the command to launch Jupyter notebook server on the remote Sun lab server."""
|
|
273
|
+
"""Builds the command to launch the Jupyter notebook server on the remote Sun lab server."""
|
|
274
274
|
|
|
275
275
|
# Gets the hostname of the compute node and caches it in the connection data file. Also caches the port name.
|
|
276
276
|
self.add_command('echo "COMPUTE_NODE: $(hostname)" > {}'.format(self.connection_info_file))
|
|
@@ -297,7 +297,7 @@ class JupyterJob(Job):
|
|
|
297
297
|
if jupyter_args:
|
|
298
298
|
jupyter_cmd.append(jupyter_args)
|
|
299
299
|
|
|
300
|
-
# Adds resolved jupyter command to the list of job commands.
|
|
300
|
+
# Adds the resolved jupyter command to the list of job commands.
|
|
301
301
|
jupyter_cmd_str = " ".join(jupyter_cmd)
|
|
302
302
|
self.add_command(jupyter_cmd_str)
|
|
303
303
|
|
|
@@ -324,7 +324,7 @@ class JupyterJob(Job):
|
|
|
324
324
|
message = f"Could not parse connection information file for the Jupyter server job with id {self.job_id}."
|
|
325
325
|
console.error(message, ValueError)
|
|
326
326
|
|
|
327
|
-
# Stores extracted data inside connection_info attribute as a JupyterConnectionInfo instance.
|
|
327
|
+
# Stores extracted data inside the connection_info attribute as a JupyterConnectionInfo instance.
|
|
328
328
|
self.connection_info = _JupyterConnectionInfo(
|
|
329
329
|
compute_node=compute_node_match.group(1).strip(), # type: ignore
|
|
330
330
|
port=int(port_match.group(1)), # type: ignore
|
|
@@ -352,7 +352,7 @@ class JupyterJob(Job):
|
|
|
352
352
|
)
|
|
353
353
|
return # No connection information available, so does not proceed with printing.
|
|
354
354
|
|
|
355
|
-
# Prints generic connection details to terminal
|
|
355
|
+
# Prints generic connection details to the terminal
|
|
356
356
|
console.echo(f"Jupyter is running on: {self.connection_info.compute_node}")
|
|
357
357
|
console.echo(f"Port: {self.connection_info.port}")
|
|
358
358
|
console.echo(f"Token: {self.connection_info.token}")
|
sl_shared_assets/server/job.pyi
CHANGED
|
@@ -73,8 +73,8 @@ class Job:
|
|
|
73
73
|
Attributes:
|
|
74
74
|
remote_script_path: Stores the path to the script file relative to the root of the remote server that runs the
|
|
75
75
|
command.
|
|
76
|
-
job_id: Stores the unique job identifier assigned by the SLURM manager to this job
|
|
77
|
-
execution. This field initialized to None and is overwritten by the Server class that submits the job.
|
|
76
|
+
job_id: Stores the unique job identifier assigned by the SLURM manager to this job when it is accepted for
|
|
77
|
+
execution. This field is initialized to None and is overwritten by the Server class that submits the job.
|
|
78
78
|
job_name: Stores the descriptive name of the SLURM job.
|
|
79
79
|
_command: Stores the managed SLURM command object.
|
|
80
80
|
"""
|
|
@@ -138,8 +138,8 @@ class JupyterJob(Job):
|
|
|
138
138
|
conda_environment: The name of the conda environment to activate on the server before running the job logic. The
|
|
139
139
|
environment should contain the necessary Python packages and CLIs to support running the job's logic. For
|
|
140
140
|
Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
141
|
-
port: The connection port number for Jupyter server. Do not change the default value unless you know what
|
|
142
|
-
are doing, as the server has most common communication ports closed for security reasons.
|
|
141
|
+
port: The connection port number for the Jupyter server. Do not change the default value unless you know what
|
|
142
|
+
you are doing, as the server has most common communication ports closed for security reasons.
|
|
143
143
|
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have access to
|
|
144
144
|
items stored in or under this directory. For most runtimes, this should be set to the user's root data or
|
|
145
145
|
working directory.
|
|
@@ -184,7 +184,7 @@ class JupyterJob(Job):
|
|
|
184
184
|
jupyter_args: str = "",
|
|
185
185
|
) -> None: ...
|
|
186
186
|
def _build_jupyter_command(self, jupyter_args: str) -> None:
|
|
187
|
-
"""Builds the command to launch Jupyter notebook server on the remote Sun lab server."""
|
|
187
|
+
"""Builds the command to launch the Jupyter notebook server on the remote Sun lab server."""
|
|
188
188
|
def parse_connection_info(self, info_file: Path) -> None:
|
|
189
189
|
"""Parses the connection information file created by the Jupyter job on the server.
|
|
190
190
|
|
|
@@ -27,7 +27,7 @@ def generate_server_credentials(
|
|
|
27
27
|
output_directory: Path,
|
|
28
28
|
username: str,
|
|
29
29
|
password: str,
|
|
30
|
-
host: str = "cbsuwsun.
|
|
30
|
+
host: str = "cbsuwsun.biopic.cornell.edu",
|
|
31
31
|
storage_root: str = "/local/workdir",
|
|
32
32
|
working_root: str = "/local/storage",
|
|
33
33
|
shared_directory_name: str = "sun_data",
|
|
@@ -255,7 +255,7 @@ class Server:
|
|
|
255
255
|
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
256
256
|
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
257
257
|
logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
258
|
-
port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
|
|
258
|
+
port: The connection port number for the Jupyter server. If set to 0 (default), a random port number between
|
|
259
259
|
8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
|
|
260
260
|
user sessions.
|
|
261
261
|
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
|
|
@@ -274,8 +274,8 @@ class Server:
|
|
|
274
274
|
Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
|
|
275
275
|
|
|
276
276
|
Raises:
|
|
277
|
-
TimeoutError: If the target Jupyter server doesn't start within 120 minutes
|
|
278
|
-
RuntimeError: If job submission fails for any reason.
|
|
277
|
+
TimeoutError: If the target Jupyter server doesn't start within 120 minutes of this method being called.
|
|
278
|
+
RuntimeError: If the job submission fails for any reason.
|
|
279
279
|
"""
|
|
280
280
|
|
|
281
281
|
# Statically configures the working directory to be stored under:
|
|
@@ -309,7 +309,7 @@ class Server:
|
|
|
309
309
|
def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
|
|
310
310
|
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
311
311
|
|
|
312
|
-
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
312
|
+
This method submits various jobs for execution via the SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
313
313
|
method translates the Job object into the shell script, moves the script to the target working directory on
|
|
314
314
|
the server, and instructs the server to execute the shell script (via SLURM).
|
|
315
315
|
|
|
@@ -400,7 +400,7 @@ class Server:
|
|
|
400
400
|
|
|
401
401
|
timer.delay_noblock(delay=5, allow_sleep=True) # Waits for 5 seconds before checking again
|
|
402
402
|
else:
|
|
403
|
-
# Only raises timeout error if the while loop is not broken in 120 seconds
|
|
403
|
+
# Only raises the timeout error if the while loop is not broken in 120 seconds
|
|
404
404
|
message = (
|
|
405
405
|
f"Remote jupyter server job {job.job_name} with id {job.job_id} did not start within 120 seconds "
|
|
406
406
|
f"from being submitted. Since all jupyter jobs are intended to be interactive and the server is "
|
|
@@ -418,7 +418,7 @@ class Server:
|
|
|
418
418
|
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
419
419
|
to an error.
|
|
420
420
|
|
|
421
|
-
If the job is still running or is waiting inside the execution queue, returns False.
|
|
421
|
+
If the job is still running or is waiting inside the execution queue, the method returns False.
|
|
422
422
|
|
|
423
423
|
Args:
|
|
424
424
|
job: The Job object whose status needs to be checked.
|
|
@@ -446,7 +446,7 @@ class Server:
|
|
|
446
446
|
def abort_job(self, job: Job | JupyterJob) -> None:
|
|
447
447
|
"""Aborts the target job if it is currently running on the server.
|
|
448
448
|
|
|
449
|
-
Use this method to immediately abort running or queued jobs
|
|
449
|
+
Use this method to immediately abort running or queued jobs without waiting for the timeout guard. If the job
|
|
450
450
|
is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
|
|
451
451
|
do nothing.
|
|
452
452
|
|
|
@@ -507,12 +507,12 @@ class Server:
|
|
|
507
507
|
remote_item_path = remote_directory_path.joinpath(item.filename)
|
|
508
508
|
local_item_path = local_directory_path.joinpath(item.filename)
|
|
509
509
|
|
|
510
|
-
# Checks if item is a directory
|
|
510
|
+
# Checks if the item is a directory
|
|
511
511
|
if stat.S_ISDIR(item.st_mode): # type: ignore
|
|
512
512
|
# Recursively pulls the subdirectory
|
|
513
513
|
self.pull_directory(local_item_path, remote_item_path)
|
|
514
514
|
else:
|
|
515
|
-
# Pulls the individual file using existing method
|
|
515
|
+
# Pulls the individual file using the existing method
|
|
516
516
|
sftp.get(localpath=str(local_item_path), remotepath=str(remote_item_path))
|
|
517
517
|
|
|
518
518
|
finally:
|
|
@@ -535,7 +535,7 @@ class Server:
|
|
|
535
535
|
sftp = self._client.open_sftp()
|
|
536
536
|
|
|
537
537
|
try:
|
|
538
|
-
# Creates the remote directory using existing method
|
|
538
|
+
# Creates the remote directory using the existing method
|
|
539
539
|
self.create_directory(remote_directory_path, parents=True)
|
|
540
540
|
|
|
541
541
|
# Iterates through all items in the local directory
|
|
@@ -546,7 +546,7 @@ class Server:
|
|
|
546
546
|
# Recursively pushes subdirectory
|
|
547
547
|
self.push_directory(local_item_path, remote_item_path)
|
|
548
548
|
else:
|
|
549
|
-
# Pushes the individual file using existing method
|
|
549
|
+
# Pushes the individual file using the existing method
|
|
550
550
|
sftp.put(localpath=str(local_item_path), remotepath=str(remote_item_path))
|
|
551
551
|
|
|
552
552
|
finally:
|
|
@@ -609,7 +609,7 @@ class Server:
|
|
|
609
609
|
current_path = part
|
|
610
610
|
|
|
611
611
|
try:
|
|
612
|
-
# Checks if directory exists by trying to stat it
|
|
612
|
+
# Checks if the directory exists by trying to 'stat' it
|
|
613
613
|
sftp.stat(current_path)
|
|
614
614
|
except FileNotFoundError:
|
|
615
615
|
# If the directory does not exist, creates it
|
|
@@ -617,7 +617,7 @@ class Server:
|
|
|
617
617
|
else:
|
|
618
618
|
# Otherwise, only creates the final directory
|
|
619
619
|
try:
|
|
620
|
-
# Checks if directory already exists
|
|
620
|
+
# Checks if the directory already exists
|
|
621
621
|
sftp.stat(remote_path_str)
|
|
622
622
|
except FileNotFoundError:
|
|
623
623
|
# Creates the directory if it does not exist
|
|
@@ -632,7 +632,7 @@ class Server:
|
|
|
632
632
|
|
|
633
633
|
sftp = self._client.open_sftp()
|
|
634
634
|
try:
|
|
635
|
-
# Checks if the target file or directory exists by trying to stat it
|
|
635
|
+
# Checks if the target file or directory exists by trying to 'stat' it
|
|
636
636
|
sftp.stat(str(remote_path))
|
|
637
637
|
|
|
638
638
|
# If the request does not err, returns True (file or directory exists)
|
|
@@ -15,7 +15,7 @@ def generate_server_credentials(
|
|
|
15
15
|
output_directory: Path,
|
|
16
16
|
username: str,
|
|
17
17
|
password: str,
|
|
18
|
-
host: str = "cbsuwsun.
|
|
18
|
+
host: str = "cbsuwsun.biopic.cornell.edu",
|
|
19
19
|
storage_root: str = "/local/workdir",
|
|
20
20
|
working_root: str = "/local/storage",
|
|
21
21
|
shared_directory_name: str = "sun_data",
|
|
@@ -140,7 +140,7 @@ class Server:
|
|
|
140
140
|
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
141
141
|
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
142
142
|
logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
143
|
-
port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
|
|
143
|
+
port: The connection port number for the Jupyter server. If set to 0 (default), a random port number between
|
|
144
144
|
8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
|
|
145
145
|
user sessions.
|
|
146
146
|
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
|
|
@@ -159,13 +159,13 @@ class Server:
|
|
|
159
159
|
Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
|
|
160
160
|
|
|
161
161
|
Raises:
|
|
162
|
-
TimeoutError: If the target Jupyter server doesn't start within 120 minutes
|
|
163
|
-
RuntimeError: If job submission fails for any reason.
|
|
162
|
+
TimeoutError: If the target Jupyter server doesn't start within 120 minutes of this method being called.
|
|
163
|
+
RuntimeError: If the job submission fails for any reason.
|
|
164
164
|
"""
|
|
165
165
|
def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
|
|
166
166
|
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
167
167
|
|
|
168
|
-
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
168
|
+
This method submits various jobs for execution via the SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
169
169
|
method translates the Job object into the shell script, moves the script to the target working directory on
|
|
170
170
|
the server, and instructs the server to execute the shell script (via SLURM).
|
|
171
171
|
|
|
@@ -183,7 +183,7 @@ class Server:
|
|
|
183
183
|
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
184
184
|
to an error.
|
|
185
185
|
|
|
186
|
-
If the job is still running or is waiting inside the execution queue, returns False.
|
|
186
|
+
If the job is still running or is waiting inside the execution queue, the method returns False.
|
|
187
187
|
|
|
188
188
|
Args:
|
|
189
189
|
job: The Job object whose status needs to be checked.
|
|
@@ -195,7 +195,7 @@ class Server:
|
|
|
195
195
|
def abort_job(self, job: Job | JupyterJob) -> None:
|
|
196
196
|
"""Aborts the target job if it is currently running on the server.
|
|
197
197
|
|
|
198
|
-
Use this method to immediately abort running or queued jobs
|
|
198
|
+
Use this method to immediately abort running or queued jobs without waiting for the timeout guard. If the job
|
|
199
199
|
is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
|
|
200
200
|
do nothing.
|
|
201
201
|
|
|
@@ -4,9 +4,15 @@ integrity of the data. The tools from this package are used by most other data p
|
|
|
4
4
|
from .transfer_tools import transfer_directory
|
|
5
5
|
from .ascension_tools import ascend_tyche_data
|
|
6
6
|
from .packaging_tools import calculate_directory_checksum
|
|
7
|
-
from .project_management_tools import
|
|
7
|
+
from .project_management_tools import (
|
|
8
|
+
ProjectManifest,
|
|
9
|
+
resolve_p53_marker,
|
|
10
|
+
verify_session_checksum,
|
|
11
|
+
generate_project_manifest,
|
|
12
|
+
)
|
|
8
13
|
|
|
9
14
|
__all__ = [
|
|
15
|
+
"ProjectManifest",
|
|
10
16
|
"transfer_directory",
|
|
11
17
|
"calculate_directory_checksum",
|
|
12
18
|
"ascend_tyche_data",
|
|
@@ -2,12 +2,14 @@ from .transfer_tools import transfer_directory as transfer_directory
|
|
|
2
2
|
from .ascension_tools import ascend_tyche_data as ascend_tyche_data
|
|
3
3
|
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
4
4
|
from .project_management_tools import (
|
|
5
|
+
ProjectManifest as ProjectManifest,
|
|
5
6
|
resolve_p53_marker as resolve_p53_marker,
|
|
6
7
|
verify_session_checksum as verify_session_checksum,
|
|
7
8
|
generate_project_manifest as generate_project_manifest,
|
|
8
9
|
)
|
|
9
10
|
|
|
10
11
|
__all__ = [
|
|
12
|
+
"ProjectManifest",
|
|
11
13
|
"transfer_directory",
|
|
12
14
|
"calculate_directory_checksum",
|
|
13
15
|
"ascend_tyche_data",
|
|
@@ -47,7 +47,7 @@ def _generate_session_name(acquisition_path: Path) -> str:
|
|
|
47
47
|
console.error(message=message, error=FileNotFoundError)
|
|
48
48
|
raise FileNotFoundError(message) # Fall-back to appease mypy
|
|
49
49
|
|
|
50
|
-
# Gets last modified time (available on all platforms) and converts it to a UTC timestamp object.
|
|
50
|
+
# Gets the last modified time (available on all platforms) and converts it to a UTC timestamp object.
|
|
51
51
|
mod_time = source.stat().st_mtime
|
|
52
52
|
mod_datetime = datetime.datetime.fromtimestamp(mod_time)
|
|
53
53
|
|
|
@@ -57,7 +57,7 @@ def _generate_session_name(acquisition_path: Path) -> str:
|
|
|
57
57
|
timestamp_bytes = np.array([(timestamp_microseconds >> (8 * i)) & 0xFF for i in range(8)], dtype=np.uint8)
|
|
58
58
|
stamp = extract_timestamp_from_bytes(timestamp_bytes=timestamp_bytes)
|
|
59
59
|
|
|
60
|
-
# Returns the generated session name to caller.
|
|
60
|
+
# Returns the generated session name to the caller.
|
|
61
61
|
return stamp
|
|
62
62
|
|
|
63
63
|
|
|
@@ -89,8 +89,8 @@ def _reorganize_data(session_data: SessionData, source_root: Path) -> bool:
|
|
|
89
89
|
mesoscope_frames_path = source_root.joinpath("mesoscope_frames")
|
|
90
90
|
ax_checksum_path = source_root.joinpath("ax_checksum.txt")
|
|
91
91
|
|
|
92
|
-
# These two file types are present for some, but not all folders. They are not as important as the
|
|
93
|
-
# above though, as, currently, the data stored in these files is not used during processing.
|
|
92
|
+
# These two file types are present for some, but not all folders. They are not as important as the files mentioned
|
|
93
|
+
# above, though, as, currently, the data stored in these files is not used during processing.
|
|
94
94
|
frame_metadata_path = source_root.joinpath("frame_metadata.npz")
|
|
95
95
|
metadata_path = source_root.joinpath("metadata.json")
|
|
96
96
|
|
|
@@ -201,10 +201,10 @@ def ascend_tyche_data(root_directory: Path) -> None:
|
|
|
201
201
|
# Statically defines project name and local root paths
|
|
202
202
|
project_name = "Tyche"
|
|
203
203
|
|
|
204
|
-
# Assumes that root directory stores all animal folders to be processed
|
|
204
|
+
# Assumes that the root directory stores all animal folders to be processed
|
|
205
205
|
for animal_folder in root_directory.iterdir():
|
|
206
|
-
# Each animal folder is named to include project name and a static animal ID, e.g.: Tyche-A7. This extracts
|
|
207
|
-
# animal ID.
|
|
206
|
+
# Each animal folder is named to include a project name and a static animal ID, e.g.: Tyche-A7. This extracts
|
|
207
|
+
# each animal ID.
|
|
208
208
|
animal_name = animal_folder.stem.split(sep="-")[1]
|
|
209
209
|
|
|
210
210
|
# Under each animal root folder, there are day folders that use YYYY-MM-DD timestamps
|
|
@@ -230,7 +230,7 @@ def ascend_tyche_data(root_directory: Path) -> None:
|
|
|
230
230
|
session_data.runtime_initialized()
|
|
231
231
|
|
|
232
232
|
# Moves the data from the old hierarchy to the new hierarchy. If the process runs as expected, and
|
|
233
|
-
# fully empties the source acquisition folder, destroys the folder. Otherwise, notifies the user that
|
|
233
|
+
# fully empties the source acquisition folder, it destroys the folder. Otherwise, notifies the user that
|
|
234
234
|
# the runtime did not fully process the session data and requests intervention.
|
|
235
235
|
success = _reorganize_data(session_data, acquisition_folder)
|
|
236
236
|
if not success:
|
|
@@ -10,7 +10,7 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
import xxhash
|
|
12
12
|
|
|
13
|
-
# Defines a 'blacklist' set of files. Primarily, this
|
|
13
|
+
# Defines a 'blacklist' set of files. Primarily, this list contains the service files that may change after the session
|
|
14
14
|
# data has been acquired. Therefore, it does not make sense to include them in the checksum, as they do not reflect the
|
|
15
15
|
# data that should remain permanently unchanged. Note, make sure all service files are added to this set!
|
|
16
16
|
_excluded_files = {
|
|
@@ -18,6 +18,7 @@ _excluded_files = {
|
|
|
18
18
|
"ubiquitin.bin",
|
|
19
19
|
"telomere.bin",
|
|
20
20
|
"p53.bin",
|
|
21
|
+
"nk.bin",
|
|
21
22
|
"suite2p_processing_tracker.yaml",
|
|
22
23
|
"dataset_formation_tracker.yaml",
|
|
23
24
|
"video_processing_tracker.yaml",
|