sl-shared-assets 3.0.0rc13__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/cli.py +7 -11
- sl_shared_assets/cli.pyi +5 -9
- sl_shared_assets/data_classes/__init__.py +2 -2
- sl_shared_assets/data_classes/configuration_data.py +11 -8
- sl_shared_assets/data_classes/configuration_data.pyi +8 -7
- sl_shared_assets/data_classes/runtime_data.py +2 -2
- sl_shared_assets/data_classes/runtime_data.pyi +2 -2
- sl_shared_assets/data_classes/session_data.py +44 -30
- sl_shared_assets/data_classes/session_data.pyi +11 -11
- sl_shared_assets/server/__init__.py +1 -1
- sl_shared_assets/server/job.py +10 -10
- sl_shared_assets/server/job.pyi +5 -5
- sl_shared_assets/server/server.py +15 -15
- sl_shared_assets/server/server.pyi +7 -7
- sl_shared_assets/tools/ascension_tools.py +8 -8
- sl_shared_assets/tools/packaging_tools.py +2 -1
- sl_shared_assets/tools/project_management_tools.py +30 -40
- sl_shared_assets/tools/project_management_tools.pyi +6 -10
- sl_shared_assets/tools/transfer_tools.py +1 -1
- sl_shared_assets/tools/transfer_tools.pyi +1 -1
- {sl_shared_assets-3.0.0rc13.dist-info → sl_shared_assets-3.0.1.dist-info}/METADATA +122 -5
- sl_shared_assets-3.0.1.dist-info/RECORD +36 -0
- sl_shared_assets-3.0.0rc13.dist-info/RECORD +0 -36
- {sl_shared_assets-3.0.0rc13.dist-info → sl_shared_assets-3.0.1.dist-info}/WHEEL +0 -0
- {sl_shared_assets-3.0.0rc13.dist-info → sl_shared_assets-3.0.1.dist-info}/entry_points.txt +0 -0
- {sl_shared_assets-3.0.0rc13.dist-info → sl_shared_assets-3.0.1.dist-info}/licenses/LICENSE +0 -0
sl_shared_assets/server/job.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""This module provides the core Job class, used as the starting point for all SLURM-managed job executed on lab compute
|
|
2
2
|
server(s). Specifically, the Job class acts as a wrapper around the SLURM configuration and specific logic of each
|
|
3
|
-
job. During runtime, Server class interacts with input job objects to manage their transfer and execution on the
|
|
3
|
+
job. During runtime, the Server class interacts with input job objects to manage their transfer and execution on the
|
|
4
4
|
remote servers.
|
|
5
5
|
|
|
6
6
|
Since version 3.0.0, this module also provides the specialized JupyterJob class used to launch remote Jupyter
|
|
@@ -97,8 +97,8 @@ class Job:
|
|
|
97
97
|
Attributes:
|
|
98
98
|
remote_script_path: Stores the path to the script file relative to the root of the remote server that runs the
|
|
99
99
|
command.
|
|
100
|
-
job_id: Stores the unique job identifier assigned by the SLURM manager to this job
|
|
101
|
-
execution. This field initialized to None and is overwritten by the Server class that submits the job.
|
|
100
|
+
job_id: Stores the unique job identifier assigned by the SLURM manager to this job when it is accepted for
|
|
101
|
+
execution. This field is initialized to None and is overwritten by the Server class that submits the job.
|
|
102
102
|
job_name: Stores the descriptive name of the SLURM job.
|
|
103
103
|
_command: Stores the managed SLURM command object.
|
|
104
104
|
"""
|
|
@@ -174,7 +174,7 @@ class Job:
|
|
|
174
174
|
# initialization would not work as expected.
|
|
175
175
|
fixed_script_content = script_content.replace("\\$", "$")
|
|
176
176
|
|
|
177
|
-
# Returns the script content to caller as a string
|
|
177
|
+
# Returns the script content to the caller as a string
|
|
178
178
|
return fixed_script_content
|
|
179
179
|
|
|
180
180
|
|
|
@@ -202,8 +202,8 @@ class JupyterJob(Job):
|
|
|
202
202
|
conda_environment: The name of the conda environment to activate on the server before running the job logic. The
|
|
203
203
|
environment should contain the necessary Python packages and CLIs to support running the job's logic. For
|
|
204
204
|
Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
205
|
-
port: The connection port number for Jupyter server. Do not change the default value unless you know what
|
|
206
|
-
are doing, as the server has most common communication ports closed for security reasons.
|
|
205
|
+
port: The connection port number for the Jupyter server. Do not change the default value unless you know what
|
|
206
|
+
you are doing, as the server has most common communication ports closed for security reasons.
|
|
207
207
|
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have access to
|
|
208
208
|
items stored in or under this directory. For most runtimes, this should be set to the user's root data or
|
|
209
209
|
working directory.
|
|
@@ -270,7 +270,7 @@ class JupyterJob(Job):
|
|
|
270
270
|
self._build_jupyter_command(jupyter_args)
|
|
271
271
|
|
|
272
272
|
def _build_jupyter_command(self, jupyter_args: str) -> None:
|
|
273
|
-
"""Builds the command to launch Jupyter notebook server on the remote Sun lab server."""
|
|
273
|
+
"""Builds the command to launch the Jupyter notebook server on the remote Sun lab server."""
|
|
274
274
|
|
|
275
275
|
# Gets the hostname of the compute node and caches it in the connection data file. Also caches the port name.
|
|
276
276
|
self.add_command('echo "COMPUTE_NODE: $(hostname)" > {}'.format(self.connection_info_file))
|
|
@@ -297,7 +297,7 @@ class JupyterJob(Job):
|
|
|
297
297
|
if jupyter_args:
|
|
298
298
|
jupyter_cmd.append(jupyter_args)
|
|
299
299
|
|
|
300
|
-
# Adds resolved jupyter command to the list of job commands.
|
|
300
|
+
# Adds the resolved jupyter command to the list of job commands.
|
|
301
301
|
jupyter_cmd_str = " ".join(jupyter_cmd)
|
|
302
302
|
self.add_command(jupyter_cmd_str)
|
|
303
303
|
|
|
@@ -324,7 +324,7 @@ class JupyterJob(Job):
|
|
|
324
324
|
message = f"Could not parse connection information file for the Jupyter server job with id {self.job_id}."
|
|
325
325
|
console.error(message, ValueError)
|
|
326
326
|
|
|
327
|
-
# Stores extracted data inside connection_info attribute as a JupyterConnectionInfo instance.
|
|
327
|
+
# Stores extracted data inside the connection_info attribute as a JupyterConnectionInfo instance.
|
|
328
328
|
self.connection_info = _JupyterConnectionInfo(
|
|
329
329
|
compute_node=compute_node_match.group(1).strip(), # type: ignore
|
|
330
330
|
port=int(port_match.group(1)), # type: ignore
|
|
@@ -352,7 +352,7 @@ class JupyterJob(Job):
|
|
|
352
352
|
)
|
|
353
353
|
return # No connection information available, so does not proceed with printing.
|
|
354
354
|
|
|
355
|
-
# Prints generic connection details to terminal
|
|
355
|
+
# Prints generic connection details to the terminal
|
|
356
356
|
console.echo(f"Jupyter is running on: {self.connection_info.compute_node}")
|
|
357
357
|
console.echo(f"Port: {self.connection_info.port}")
|
|
358
358
|
console.echo(f"Token: {self.connection_info.token}")
|
sl_shared_assets/server/job.pyi
CHANGED
|
@@ -73,8 +73,8 @@ class Job:
|
|
|
73
73
|
Attributes:
|
|
74
74
|
remote_script_path: Stores the path to the script file relative to the root of the remote server that runs the
|
|
75
75
|
command.
|
|
76
|
-
job_id: Stores the unique job identifier assigned by the SLURM manager to this job
|
|
77
|
-
execution. This field initialized to None and is overwritten by the Server class that submits the job.
|
|
76
|
+
job_id: Stores the unique job identifier assigned by the SLURM manager to this job when it is accepted for
|
|
77
|
+
execution. This field is initialized to None and is overwritten by the Server class that submits the job.
|
|
78
78
|
job_name: Stores the descriptive name of the SLURM job.
|
|
79
79
|
_command: Stores the managed SLURM command object.
|
|
80
80
|
"""
|
|
@@ -138,8 +138,8 @@ class JupyterJob(Job):
|
|
|
138
138
|
conda_environment: The name of the conda environment to activate on the server before running the job logic. The
|
|
139
139
|
environment should contain the necessary Python packages and CLIs to support running the job's logic. For
|
|
140
140
|
Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
141
|
-
port: The connection port number for Jupyter server. Do not change the default value unless you know what
|
|
142
|
-
are doing, as the server has most common communication ports closed for security reasons.
|
|
141
|
+
port: The connection port number for the Jupyter server. Do not change the default value unless you know what
|
|
142
|
+
you are doing, as the server has most common communication ports closed for security reasons.
|
|
143
143
|
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have access to
|
|
144
144
|
items stored in or under this directory. For most runtimes, this should be set to the user's root data or
|
|
145
145
|
working directory.
|
|
@@ -184,7 +184,7 @@ class JupyterJob(Job):
|
|
|
184
184
|
jupyter_args: str = "",
|
|
185
185
|
) -> None: ...
|
|
186
186
|
def _build_jupyter_command(self, jupyter_args: str) -> None:
|
|
187
|
-
"""Builds the command to launch Jupyter notebook server on the remote Sun lab server."""
|
|
187
|
+
"""Builds the command to launch the Jupyter notebook server on the remote Sun lab server."""
|
|
188
188
|
def parse_connection_info(self, info_file: Path) -> None:
|
|
189
189
|
"""Parses the connection information file created by the Jupyter job on the server.
|
|
190
190
|
|
|
@@ -27,7 +27,7 @@ def generate_server_credentials(
|
|
|
27
27
|
output_directory: Path,
|
|
28
28
|
username: str,
|
|
29
29
|
password: str,
|
|
30
|
-
host: str = "cbsuwsun.
|
|
30
|
+
host: str = "cbsuwsun.biopic.cornell.edu",
|
|
31
31
|
storage_root: str = "/local/workdir",
|
|
32
32
|
working_root: str = "/local/storage",
|
|
33
33
|
shared_directory_name: str = "sun_data",
|
|
@@ -255,7 +255,7 @@ class Server:
|
|
|
255
255
|
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
256
256
|
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
257
257
|
logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
258
|
-
port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
|
|
258
|
+
port: The connection port number for the Jupyter server. If set to 0 (default), a random port number between
|
|
259
259
|
8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
|
|
260
260
|
user sessions.
|
|
261
261
|
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
|
|
@@ -274,8 +274,8 @@ class Server:
|
|
|
274
274
|
Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
|
|
275
275
|
|
|
276
276
|
Raises:
|
|
277
|
-
TimeoutError: If the target Jupyter server doesn't start within 120 minutes
|
|
278
|
-
RuntimeError: If job submission fails for any reason.
|
|
277
|
+
TimeoutError: If the target Jupyter server doesn't start within 120 minutes of this method being called.
|
|
278
|
+
RuntimeError: If the job submission fails for any reason.
|
|
279
279
|
"""
|
|
280
280
|
|
|
281
281
|
# Statically configures the working directory to be stored under:
|
|
@@ -309,7 +309,7 @@ class Server:
|
|
|
309
309
|
def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
|
|
310
310
|
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
311
311
|
|
|
312
|
-
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
312
|
+
This method submits various jobs for execution via the SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
313
313
|
method translates the Job object into the shell script, moves the script to the target working directory on
|
|
314
314
|
the server, and instructs the server to execute the shell script (via SLURM).
|
|
315
315
|
|
|
@@ -400,7 +400,7 @@ class Server:
|
|
|
400
400
|
|
|
401
401
|
timer.delay_noblock(delay=5, allow_sleep=True) # Waits for 5 seconds before checking again
|
|
402
402
|
else:
|
|
403
|
-
# Only raises timeout error if the while loop is not broken in 120 seconds
|
|
403
|
+
# Only raises the timeout error if the while loop is not broken in 120 seconds
|
|
404
404
|
message = (
|
|
405
405
|
f"Remote jupyter server job {job.job_name} with id {job.job_id} did not start within 120 seconds "
|
|
406
406
|
f"from being submitted. Since all jupyter jobs are intended to be interactive and the server is "
|
|
@@ -418,7 +418,7 @@ class Server:
|
|
|
418
418
|
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
419
419
|
to an error.
|
|
420
420
|
|
|
421
|
-
If the job is still running or is waiting inside the execution queue, returns False.
|
|
421
|
+
If the job is still running or is waiting inside the execution queue, the method returns False.
|
|
422
422
|
|
|
423
423
|
Args:
|
|
424
424
|
job: The Job object whose status needs to be checked.
|
|
@@ -446,7 +446,7 @@ class Server:
|
|
|
446
446
|
def abort_job(self, job: Job | JupyterJob) -> None:
|
|
447
447
|
"""Aborts the target job if it is currently running on the server.
|
|
448
448
|
|
|
449
|
-
Use this method to immediately abort running or queued jobs
|
|
449
|
+
Use this method to immediately abort running or queued jobs without waiting for the timeout guard. If the job
|
|
450
450
|
is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
|
|
451
451
|
do nothing.
|
|
452
452
|
|
|
@@ -507,12 +507,12 @@ class Server:
|
|
|
507
507
|
remote_item_path = remote_directory_path.joinpath(item.filename)
|
|
508
508
|
local_item_path = local_directory_path.joinpath(item.filename)
|
|
509
509
|
|
|
510
|
-
# Checks if item is a directory
|
|
510
|
+
# Checks if the item is a directory
|
|
511
511
|
if stat.S_ISDIR(item.st_mode): # type: ignore
|
|
512
512
|
# Recursively pulls the subdirectory
|
|
513
513
|
self.pull_directory(local_item_path, remote_item_path)
|
|
514
514
|
else:
|
|
515
|
-
# Pulls the individual file using existing method
|
|
515
|
+
# Pulls the individual file using the existing method
|
|
516
516
|
sftp.get(localpath=str(local_item_path), remotepath=str(remote_item_path))
|
|
517
517
|
|
|
518
518
|
finally:
|
|
@@ -535,7 +535,7 @@ class Server:
|
|
|
535
535
|
sftp = self._client.open_sftp()
|
|
536
536
|
|
|
537
537
|
try:
|
|
538
|
-
# Creates the remote directory using existing method
|
|
538
|
+
# Creates the remote directory using the existing method
|
|
539
539
|
self.create_directory(remote_directory_path, parents=True)
|
|
540
540
|
|
|
541
541
|
# Iterates through all items in the local directory
|
|
@@ -546,7 +546,7 @@ class Server:
|
|
|
546
546
|
# Recursively pushes subdirectory
|
|
547
547
|
self.push_directory(local_item_path, remote_item_path)
|
|
548
548
|
else:
|
|
549
|
-
# Pushes the individual file using existing method
|
|
549
|
+
# Pushes the individual file using the existing method
|
|
550
550
|
sftp.put(localpath=str(local_item_path), remotepath=str(remote_item_path))
|
|
551
551
|
|
|
552
552
|
finally:
|
|
@@ -609,7 +609,7 @@ class Server:
|
|
|
609
609
|
current_path = part
|
|
610
610
|
|
|
611
611
|
try:
|
|
612
|
-
# Checks if directory exists by trying to stat it
|
|
612
|
+
# Checks if the directory exists by trying to 'stat' it
|
|
613
613
|
sftp.stat(current_path)
|
|
614
614
|
except FileNotFoundError:
|
|
615
615
|
# If the directory does not exist, creates it
|
|
@@ -617,7 +617,7 @@ class Server:
|
|
|
617
617
|
else:
|
|
618
618
|
# Otherwise, only creates the final directory
|
|
619
619
|
try:
|
|
620
|
-
# Checks if directory already exists
|
|
620
|
+
# Checks if the directory already exists
|
|
621
621
|
sftp.stat(remote_path_str)
|
|
622
622
|
except FileNotFoundError:
|
|
623
623
|
# Creates the directory if it does not exist
|
|
@@ -632,7 +632,7 @@ class Server:
|
|
|
632
632
|
|
|
633
633
|
sftp = self._client.open_sftp()
|
|
634
634
|
try:
|
|
635
|
-
# Checks if the target file or directory exists by trying to stat it
|
|
635
|
+
# Checks if the target file or directory exists by trying to 'stat' it
|
|
636
636
|
sftp.stat(str(remote_path))
|
|
637
637
|
|
|
638
638
|
# If the request does not err, returns True (file or directory exists)
|
|
@@ -15,7 +15,7 @@ def generate_server_credentials(
|
|
|
15
15
|
output_directory: Path,
|
|
16
16
|
username: str,
|
|
17
17
|
password: str,
|
|
18
|
-
host: str = "cbsuwsun.
|
|
18
|
+
host: str = "cbsuwsun.biopic.cornell.edu",
|
|
19
19
|
storage_root: str = "/local/workdir",
|
|
20
20
|
working_root: str = "/local/storage",
|
|
21
21
|
shared_directory_name: str = "sun_data",
|
|
@@ -140,7 +140,7 @@ class Server:
|
|
|
140
140
|
conda_environment: The name of the conda environment to activate on the server before running the job logic.
|
|
141
141
|
The environment should contain the necessary Python packages and CLIs to support running the job's
|
|
142
142
|
logic. For Jupyter jobs, this necessarily includes the Jupyter notebook and jupyterlab packages.
|
|
143
|
-
port: The connection port number for Jupyter server. If set to 0 (default), a random port number between
|
|
143
|
+
port: The connection port number for the Jupyter server. If set to 0 (default), a random port number between
|
|
144
144
|
8888 and 9999 will be assigned to this connection to reduce the possibility of colliding with other
|
|
145
145
|
user sessions.
|
|
146
146
|
notebook_directory: The directory to use as Jupyter's root. During runtime, Jupyter will only have GUI
|
|
@@ -159,13 +159,13 @@ class Server:
|
|
|
159
159
|
Do NOT re-submit the job to the server, as this is done as part of this method's runtime.
|
|
160
160
|
|
|
161
161
|
Raises:
|
|
162
|
-
TimeoutError: If the target Jupyter server doesn't start within 120 minutes
|
|
163
|
-
RuntimeError: If job submission fails for any reason.
|
|
162
|
+
TimeoutError: If the target Jupyter server doesn't start within 120 minutes of this method being called.
|
|
163
|
+
RuntimeError: If the job submission fails for any reason.
|
|
164
164
|
"""
|
|
165
165
|
def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
|
|
166
166
|
"""Submits the input job to the managed BioHPC server via SLURM job manager.
|
|
167
167
|
|
|
168
|
-
This method submits various jobs for execution via SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
168
|
+
This method submits various jobs for execution via the SLURM-managed BioHPC cluster. As part of its runtime, the
|
|
169
169
|
method translates the Job object into the shell script, moves the script to the target working directory on
|
|
170
170
|
the server, and instructs the server to execute the shell script (via SLURM).
|
|
171
171
|
|
|
@@ -183,7 +183,7 @@ class Server:
|
|
|
183
183
|
"""Returns True if the job managed by the input Job instance has been completed or terminated its runtime due
|
|
184
184
|
to an error.
|
|
185
185
|
|
|
186
|
-
If the job is still running or is waiting inside the execution queue, returns False.
|
|
186
|
+
If the job is still running or is waiting inside the execution queue, the method returns False.
|
|
187
187
|
|
|
188
188
|
Args:
|
|
189
189
|
job: The Job object whose status needs to be checked.
|
|
@@ -195,7 +195,7 @@ class Server:
|
|
|
195
195
|
def abort_job(self, job: Job | JupyterJob) -> None:
|
|
196
196
|
"""Aborts the target job if it is currently running on the server.
|
|
197
197
|
|
|
198
|
-
Use this method to immediately abort running or queued jobs
|
|
198
|
+
Use this method to immediately abort running or queued jobs without waiting for the timeout guard. If the job
|
|
199
199
|
is queued, this method will remove it from the SLURM queue. If the job is already terminated, this method will
|
|
200
200
|
do nothing.
|
|
201
201
|
|
|
@@ -47,7 +47,7 @@ def _generate_session_name(acquisition_path: Path) -> str:
|
|
|
47
47
|
console.error(message=message, error=FileNotFoundError)
|
|
48
48
|
raise FileNotFoundError(message) # Fall-back to appease mypy
|
|
49
49
|
|
|
50
|
-
# Gets last modified time (available on all platforms) and converts it to a UTC timestamp object.
|
|
50
|
+
# Gets the last modified time (available on all platforms) and converts it to a UTC timestamp object.
|
|
51
51
|
mod_time = source.stat().st_mtime
|
|
52
52
|
mod_datetime = datetime.datetime.fromtimestamp(mod_time)
|
|
53
53
|
|
|
@@ -57,7 +57,7 @@ def _generate_session_name(acquisition_path: Path) -> str:
|
|
|
57
57
|
timestamp_bytes = np.array([(timestamp_microseconds >> (8 * i)) & 0xFF for i in range(8)], dtype=np.uint8)
|
|
58
58
|
stamp = extract_timestamp_from_bytes(timestamp_bytes=timestamp_bytes)
|
|
59
59
|
|
|
60
|
-
# Returns the generated session name to caller.
|
|
60
|
+
# Returns the generated session name to the caller.
|
|
61
61
|
return stamp
|
|
62
62
|
|
|
63
63
|
|
|
@@ -89,8 +89,8 @@ def _reorganize_data(session_data: SessionData, source_root: Path) -> bool:
|
|
|
89
89
|
mesoscope_frames_path = source_root.joinpath("mesoscope_frames")
|
|
90
90
|
ax_checksum_path = source_root.joinpath("ax_checksum.txt")
|
|
91
91
|
|
|
92
|
-
# These two file types are present for some, but not all folders. They are not as important as the
|
|
93
|
-
# above though, as, currently, the data stored in these files is not used during processing.
|
|
92
|
+
# These two file types are present for some, but not all folders. They are not as important as the files mentioned
|
|
93
|
+
# above, though, as, currently, the data stored in these files is not used during processing.
|
|
94
94
|
frame_metadata_path = source_root.joinpath("frame_metadata.npz")
|
|
95
95
|
metadata_path = source_root.joinpath("metadata.json")
|
|
96
96
|
|
|
@@ -201,10 +201,10 @@ def ascend_tyche_data(root_directory: Path) -> None:
|
|
|
201
201
|
# Statically defines project name and local root paths
|
|
202
202
|
project_name = "Tyche"
|
|
203
203
|
|
|
204
|
-
# Assumes that root directory stores all animal folders to be processed
|
|
204
|
+
# Assumes that the root directory stores all animal folders to be processed
|
|
205
205
|
for animal_folder in root_directory.iterdir():
|
|
206
|
-
# Each animal folder is named to include project name and a static animal ID, e.g.: Tyche-A7. This extracts
|
|
207
|
-
# animal ID.
|
|
206
|
+
# Each animal folder is named to include a project name and a static animal ID, e.g.: Tyche-A7. This extracts
|
|
207
|
+
# each animal ID.
|
|
208
208
|
animal_name = animal_folder.stem.split(sep="-")[1]
|
|
209
209
|
|
|
210
210
|
# Under each animal root folder, there are day folders that use YYYY-MM-DD timestamps
|
|
@@ -230,7 +230,7 @@ def ascend_tyche_data(root_directory: Path) -> None:
|
|
|
230
230
|
session_data.runtime_initialized()
|
|
231
231
|
|
|
232
232
|
# Moves the data from the old hierarchy to the new hierarchy. If the process runs as expected, and
|
|
233
|
-
# fully empties the source acquisition folder, destroys the folder. Otherwise, notifies the user that
|
|
233
|
+
# fully empties the source acquisition folder, it destroys the folder. Otherwise, notifies the user that
|
|
234
234
|
# the runtime did not fully process the session data and requests intervention.
|
|
235
235
|
success = _reorganize_data(session_data, acquisition_folder)
|
|
236
236
|
if not success:
|
|
@@ -10,7 +10,7 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
import xxhash
|
|
12
12
|
|
|
13
|
-
# Defines a 'blacklist' set of files. Primarily, this
|
|
13
|
+
# Defines a 'blacklist' set of files. Primarily, this list contains the service files that may change after the session
|
|
14
14
|
# data has been acquired. Therefore, it does not make sense to include them in the checksum, as they do not reflect the
|
|
15
15
|
# data that should remain permanently unchanged. Note, make sure all service files are added to this set!
|
|
16
16
|
_excluded_files = {
|
|
@@ -18,6 +18,7 @@ _excluded_files = {
|
|
|
18
18
|
"ubiquitin.bin",
|
|
19
19
|
"telomere.bin",
|
|
20
20
|
"p53.bin",
|
|
21
|
+
"nk.bin",
|
|
21
22
|
"suite2p_processing_tracker.yaml",
|
|
22
23
|
"dataset_formation_tracker.yaml",
|
|
23
24
|
"video_processing_tracker.yaml",
|
|
@@ -11,15 +11,15 @@ from ataraxis_base_utilities import console
|
|
|
11
11
|
|
|
12
12
|
from ..data_classes import (
|
|
13
13
|
SessionData,
|
|
14
|
+
SessionTypes,
|
|
14
15
|
ProcessingTracker,
|
|
15
16
|
RunTrainingDescriptor,
|
|
16
17
|
LickTrainingDescriptor,
|
|
18
|
+
WindowCheckingDescriptor,
|
|
17
19
|
MesoscopeExperimentDescriptor,
|
|
18
20
|
)
|
|
19
21
|
from .packaging_tools import calculate_directory_checksum
|
|
20
22
|
|
|
21
|
-
_valid_session_types = {"lick training", "run training", "mesoscope experiment", "window checking"}
|
|
22
|
-
|
|
23
23
|
|
|
24
24
|
class ProjectManifest:
|
|
25
25
|
"""Wraps the contents of a Sun lab project manifest .feather file and exposes methods for visualizing and
|
|
@@ -220,8 +220,7 @@ class ProjectManifest:
|
|
|
220
220
|
|
|
221
221
|
Returns:
|
|
222
222
|
A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
|
|
223
|
-
'intensity_verification', 'suite2p', 'behavior', 'video',
|
|
224
|
-
'dataset'.
|
|
223
|
+
'intensity_verification', 'suite2p', 'behavior', 'video', 'dataset'.
|
|
225
224
|
"""
|
|
226
225
|
|
|
227
226
|
df = self._data
|
|
@@ -330,23 +329,31 @@ def generate_project_manifest(
|
|
|
330
329
|
|
|
331
330
|
# Depending on the session type, instantiates the appropriate descriptor instance and uses it to read the
|
|
332
331
|
# experimenter notes
|
|
333
|
-
if session_data.session_type ==
|
|
332
|
+
if session_data.session_type == SessionTypes.LICK_TRAINING:
|
|
334
333
|
descriptor: LickTrainingDescriptor = LickTrainingDescriptor.from_yaml( # type: ignore
|
|
335
334
|
file_path=session_data.raw_data.session_descriptor_path
|
|
336
335
|
)
|
|
337
336
|
manifest["notes"].append(descriptor.experimenter_notes)
|
|
338
|
-
elif session_data.session_type ==
|
|
337
|
+
elif session_data.session_type == SessionTypes.RUN_TRAINING:
|
|
339
338
|
descriptor: RunTrainingDescriptor = RunTrainingDescriptor.from_yaml( # type: ignore
|
|
340
339
|
file_path=session_data.raw_data.session_descriptor_path
|
|
341
340
|
)
|
|
342
341
|
manifest["notes"].append(descriptor.experimenter_notes)
|
|
343
|
-
elif session_data.session_type ==
|
|
342
|
+
elif session_data.session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
|
|
344
343
|
descriptor: MesoscopeExperimentDescriptor = MesoscopeExperimentDescriptor.from_yaml( # type: ignore
|
|
345
344
|
file_path=session_data.raw_data.session_descriptor_path
|
|
346
345
|
)
|
|
347
346
|
manifest["notes"].append(descriptor.experimenter_notes)
|
|
348
|
-
elif session_data.session_type ==
|
|
349
|
-
|
|
347
|
+
elif session_data.session_type == SessionTypes.WINDOW_CHECKING:
|
|
348
|
+
# sl-experiment version 3.0.0 added session descriptors to Window Checking runtimes. Since the file does not
|
|
349
|
+
# exist in prior versions, this section is written to statically handle the discrepancy.
|
|
350
|
+
try:
|
|
351
|
+
descriptor: WindowCheckingDescriptor = WindowCheckingDescriptor.from_yaml( # type: ignore
|
|
352
|
+
file_path=session_data.raw_data.session_descriptor_path
|
|
353
|
+
)
|
|
354
|
+
manifest["notes"].append(descriptor.experimenter_notes)
|
|
355
|
+
except Exception:
|
|
356
|
+
manifest["notes"].append("N/A")
|
|
350
357
|
|
|
351
358
|
# If the session raw_data folder contains the telomere.bin file, marks the session as complete.
|
|
352
359
|
manifest["complete"].append(session_data.raw_data.telomere_path.exists())
|
|
@@ -377,9 +384,7 @@ def generate_project_manifest(
|
|
|
377
384
|
tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
|
|
378
385
|
manifest["video"].append(tracker.is_complete)
|
|
379
386
|
|
|
380
|
-
# Tracks whether the session's data is
|
|
381
|
-
# successfully processed with all relevant pipelines. Any session currently being processed with any processing
|
|
382
|
-
# pipeline is considered NOT ready.
|
|
387
|
+
# Tracks whether the session's data is currently in the processing or dataset integration mode.
|
|
383
388
|
manifest["dataset"].append(session_data.processed_data.p53_path.exists())
|
|
384
389
|
|
|
385
390
|
# If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
|
|
@@ -504,11 +509,9 @@ def resolve_p53_marker(
|
|
|
504
509
|
from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
|
|
505
510
|
that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
|
|
506
511
|
|
|
507
|
-
For the p53.bin marker to be created, the session must currently not undergo any processing
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
Due to this limitation, it is only possible to call this function with the 'remove' flag manually (via the
|
|
511
|
-
dedicated CLI).
|
|
512
|
+
For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
|
|
513
|
+
p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
|
|
514
|
+
dataset integration. This is due to data access hierarchy limitations of the Sun lab BioHPC server.
|
|
512
515
|
|
|
513
516
|
Args:
|
|
514
517
|
session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
|
|
@@ -528,7 +531,7 @@ def resolve_p53_marker(
|
|
|
528
531
|
)
|
|
529
532
|
|
|
530
533
|
# If the p53.bin marker exists and the runtime is configured to remove it, removes the marker file. If the runtime
|
|
531
|
-
# is configured to create the marker, aborts the runtime (as the marker already exists).
|
|
534
|
+
# is configured to create the marker, the method aborts the runtime (as the marker already exists).
|
|
532
535
|
if session_data.processed_data.p53_path.exists():
|
|
533
536
|
if remove:
|
|
534
537
|
session_data.processed_data.p53_path.unlink()
|
|
@@ -544,41 +547,28 @@ def resolve_p53_marker(
|
|
|
544
547
|
# Queries the type of the processed session
|
|
545
548
|
session_type = session_data.session_type
|
|
546
549
|
|
|
547
|
-
#
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
f"Unable to determine the mandatory processing pipelines for session {session_data.session_name} of animal "
|
|
551
|
-
f"{session_data.animal_id} and project {session_data.processed_data}. The type of the session "
|
|
552
|
-
f"{session_type} is not one of the supported session types: {', '.join(_valid_session_types)}."
|
|
553
|
-
)
|
|
554
|
-
console.error(message=message, error=ValueError)
|
|
555
|
-
|
|
556
|
-
# Window checking sessions are not designed to be integrated into datasets, so they cannot be marked with p53.bin
|
|
557
|
-
# file. Similarly, any incomplete session is automatically excluded from dataset formation.
|
|
558
|
-
if session_type == "window checking" or not session_data.raw_data.telomere_path.exists():
|
|
550
|
+
# Window checking sessions are not designed to be integrated into datasets, so they cannot be marked with the
|
|
551
|
+
# p53.bin file. Similarly, any incomplete session is automatically excluded from dataset formation.
|
|
552
|
+
if session_type == SessionTypes.WINDOW_CHECKING or not session_data.raw_data.telomere_path.exists():
|
|
559
553
|
return
|
|
560
554
|
|
|
561
555
|
# Training sessions collect similar data and share processing pipeline requirements
|
|
562
|
-
if session_type ==
|
|
563
|
-
#
|
|
564
|
-
# creating the marker file. Also ensures that the video tracking pipeline is not actively running, although it
|
|
565
|
-
# is not required
|
|
556
|
+
if session_type == SessionTypes.LICK_TRAINING or session_type == SessionTypes.RUN_TRAINING:
|
|
557
|
+
# Ensures that the session is not being processed with one of the supported pipelines.
|
|
566
558
|
behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
|
|
567
559
|
video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
|
|
568
|
-
if
|
|
560
|
+
if behavior_tracker.is_running or video_tracker.is_running:
|
|
569
561
|
# Note, training runtimes do not require suite2p processing.
|
|
570
562
|
return
|
|
571
563
|
|
|
572
564
|
# Mesoscope experiment sessions require additional processing with suite2p
|
|
573
|
-
if session_type ==
|
|
565
|
+
if session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
|
|
574
566
|
behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
|
|
575
567
|
suite2p_tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
|
|
576
568
|
video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
|
|
577
569
|
|
|
578
|
-
# Similar to above,
|
|
579
|
-
|
|
580
|
-
# tracking pipeline must not be actively running.
|
|
581
|
-
if not behavior_tracker.is_complete or not suite2p_tracker.is_complete or video_tracker.is_running:
|
|
570
|
+
# Similar to the above, ensures that the session is not being processed with one of the supported pipelines.
|
|
571
|
+
if behavior_tracker.is_running or suite2p_tracker.is_running or video_tracker.is_running:
|
|
582
572
|
return
|
|
583
573
|
|
|
584
574
|
# If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
|
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
3
|
import polars as pl
|
|
4
|
-
from _typeshed import Incomplete
|
|
5
4
|
|
|
6
5
|
from ..data_classes import (
|
|
7
6
|
SessionData as SessionData,
|
|
7
|
+
SessionTypes as SessionTypes,
|
|
8
8
|
ProcessingTracker as ProcessingTracker,
|
|
9
9
|
RunTrainingDescriptor as RunTrainingDescriptor,
|
|
10
10
|
LickTrainingDescriptor as LickTrainingDescriptor,
|
|
11
|
+
WindowCheckingDescriptor as WindowCheckingDescriptor,
|
|
11
12
|
MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
|
|
12
13
|
)
|
|
13
14
|
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
14
15
|
|
|
15
|
-
_valid_session_types: Incomplete
|
|
16
|
-
|
|
17
16
|
class ProjectManifest:
|
|
18
17
|
"""Wraps the contents of a Sun lab project manifest .feather file and exposes methods for visualizing and
|
|
19
18
|
working with the data stored inside the file.
|
|
@@ -104,8 +103,7 @@ class ProjectManifest:
|
|
|
104
103
|
|
|
105
104
|
Returns:
|
|
106
105
|
A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
|
|
107
|
-
'intensity_verification', 'suite2p', 'behavior', 'video',
|
|
108
|
-
'dataset'.
|
|
106
|
+
'intensity_verification', 'suite2p', 'behavior', 'video', 'dataset'.
|
|
109
107
|
"""
|
|
110
108
|
|
|
111
109
|
def generate_project_manifest(
|
|
@@ -174,11 +172,9 @@ def resolve_p53_marker(
|
|
|
174
172
|
from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
|
|
175
173
|
that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
|
|
176
174
|
|
|
177
|
-
For the p53.bin marker to be created, the session must currently not undergo any processing
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
Due to this limitation, it is only possible to call this function with the 'remove' flag manually (via the
|
|
181
|
-
dedicated CLI).
|
|
175
|
+
For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
|
|
176
|
+
p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
|
|
177
|
+
dataset integration. This is due to data access hierarchy limitations of the Sun lab BioHPC server.
|
|
182
178
|
|
|
183
179
|
Args:
|
|
184
180
|
session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
|
|
@@ -45,7 +45,7 @@ def transfer_directory(source: Path, destination: Path, num_threads: int = 1, ve
|
|
|
45
45
|
done before copying the files.
|
|
46
46
|
|
|
47
47
|
The method executes a multithreading copy operation. It does not clean up the source files. That job is handed
|
|
48
|
-
to the specific preprocessing function from the sl_experiment or sl-forgery libraries that
|
|
48
|
+
to the specific preprocessing function from the sl_experiment or sl-forgery libraries that call this function.
|
|
49
49
|
|
|
50
50
|
If the method is configured to verify transferred file integrity, it reruns the xxHash3-128 checksum calculation
|
|
51
51
|
and compares the returned checksum to the one stored in the source directory. The method assumes that all input
|
|
@@ -30,7 +30,7 @@ def transfer_directory(source: Path, destination: Path, num_threads: int = 1, ve
|
|
|
30
30
|
done before copying the files.
|
|
31
31
|
|
|
32
32
|
The method executes a multithreading copy operation. It does not clean up the source files. That job is handed
|
|
33
|
-
to the specific preprocessing function from the sl_experiment or sl-forgery libraries that
|
|
33
|
+
to the specific preprocessing function from the sl_experiment or sl-forgery libraries that call this function.
|
|
34
34
|
|
|
35
35
|
If the method is configured to verify transferred file integrity, it reruns the xxHash3-128 checksum calculation
|
|
36
36
|
and compares the returned checksum to the one stored in the source directory. The method assumes that all input
|