sl-shared-assets 4.0.0__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (41) hide show
  1. sl_shared_assets/__init__.py +45 -42
  2. sl_shared_assets/command_line_interfaces/__init__.py +3 -0
  3. sl_shared_assets/command_line_interfaces/configure.py +173 -0
  4. sl_shared_assets/command_line_interfaces/manage.py +226 -0
  5. sl_shared_assets/data_classes/__init__.py +33 -32
  6. sl_shared_assets/data_classes/configuration_data.py +267 -79
  7. sl_shared_assets/data_classes/runtime_data.py +11 -11
  8. sl_shared_assets/data_classes/session_data.py +226 -289
  9. sl_shared_assets/data_classes/surgery_data.py +6 -6
  10. sl_shared_assets/server/__init__.py +24 -4
  11. sl_shared_assets/server/job.py +6 -7
  12. sl_shared_assets/server/pipeline.py +570 -0
  13. sl_shared_assets/server/server.py +57 -25
  14. sl_shared_assets/tools/__init__.py +9 -8
  15. sl_shared_assets/tools/packaging_tools.py +14 -25
  16. sl_shared_assets/tools/project_management_tools.py +602 -523
  17. sl_shared_assets/tools/transfer_tools.py +88 -23
  18. {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/METADATA +46 -202
  19. sl_shared_assets-5.0.0.dist-info/RECORD +23 -0
  20. sl_shared_assets-5.0.0.dist-info/entry_points.txt +3 -0
  21. sl_shared_assets/__init__.pyi +0 -91
  22. sl_shared_assets/cli.py +0 -500
  23. sl_shared_assets/cli.pyi +0 -106
  24. sl_shared_assets/data_classes/__init__.pyi +0 -75
  25. sl_shared_assets/data_classes/configuration_data.pyi +0 -235
  26. sl_shared_assets/data_classes/runtime_data.pyi +0 -157
  27. sl_shared_assets/data_classes/session_data.pyi +0 -379
  28. sl_shared_assets/data_classes/surgery_data.pyi +0 -89
  29. sl_shared_assets/server/__init__.pyi +0 -11
  30. sl_shared_assets/server/job.pyi +0 -205
  31. sl_shared_assets/server/server.pyi +0 -298
  32. sl_shared_assets/tools/__init__.pyi +0 -19
  33. sl_shared_assets/tools/ascension_tools.py +0 -265
  34. sl_shared_assets/tools/ascension_tools.pyi +0 -68
  35. sl_shared_assets/tools/packaging_tools.pyi +0 -58
  36. sl_shared_assets/tools/project_management_tools.pyi +0 -239
  37. sl_shared_assets/tools/transfer_tools.pyi +0 -53
  38. sl_shared_assets-4.0.0.dist-info/RECORD +0 -36
  39. sl_shared_assets-4.0.0.dist-info/entry_points.txt +0 -7
  40. {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/WHEEL +0 -0
  41. {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- """This module provides the tools for working with the Sun lab BioHPC cluster. Specifically, the classes from this
1
+ """This module provides the tools for working with remote compute servers. Specifically, the classes from this
2
2
  module establish an API for submitting jobs to the shared data processing cluster (managed via SLURM) and monitoring
3
3
  the running job status. All lab processing and analysis pipelines use this interface for accessing shared compute
4
4
  resources.
@@ -27,20 +27,22 @@ def generate_server_credentials(
27
27
  output_directory: Path,
28
28
  username: str,
29
29
  password: str,
30
+ service: bool = False,
30
31
  host: str = "cbsuwsun.biopic.cornell.edu",
31
32
  storage_root: str = "/local/workdir",
32
33
  working_root: str = "/local/storage",
33
34
  shared_directory_name: str = "sun_data",
34
35
  ) -> None:
35
- """Generates a new server_credentials.yaml file under the specified directory, using input information.
36
+ """Generates a new server access credentials .yaml file under the specified directory, using input information.
36
37
 
37
- This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
38
- only used when setting up new host-computers or users in the lab.
38
+ This function provides a convenience interface for generating new server access credential files. Depending on
39
+ configuration, it either creates user access credentials files or service access credentials files.
39
40
 
40
41
  Args:
41
42
  output_directory: The directory where to save the generated server_credentials.yaml file.
42
43
  username: The username to use for server authentication.
43
44
  password: The password to use for server authentication.
45
+ service: Determines whether the generated credentials file stores the data for a user or a service account.
44
46
  host: The hostname or IP address of the server to connect to.
45
47
  storage_root: The path to the root storage (slow) server directory. Typically, this is the path to the
46
48
  top-level (root) directory of the HDD RAID volume.
@@ -50,15 +52,26 @@ def generate_server_credentials(
50
52
  shared_directory_name: The name of the shared directory used to store all Sun lab project data on the storage
51
53
  and working server volumes.
52
54
  """
53
- # noinspection PyArgumentList
54
- ServerCredentials(
55
- username=username,
56
- password=password,
57
- host=host,
58
- storage_root=storage_root,
59
- working_root=working_root,
60
- shared_directory_name=shared_directory_name,
61
- ).to_yaml(file_path=output_directory.joinpath("server_credentials.yaml"))
55
+ if service:
56
+ ServerCredentials(
57
+ username=username,
58
+ password=password,
59
+ host=host,
60
+ storage_root=storage_root,
61
+ working_root=working_root,
62
+ shared_directory_name=shared_directory_name,
63
+ ).to_yaml(file_path=output_directory.joinpath("service_credentials.yaml"))
64
+ console.echo(message="Service server access credentials file: Created.", level=LogLevel.SUCCESS)
65
+ else:
66
+ ServerCredentials(
67
+ username=username,
68
+ password=password,
69
+ host=host,
70
+ storage_root=storage_root,
71
+ working_root=working_root,
72
+ shared_directory_name=shared_directory_name,
73
+ ).to_yaml(file_path=output_directory.joinpath("user_credentials.yaml"))
74
+ console.echo(message="User server access credentials file: Created.", level=LogLevel.SUCCESS)
62
75
 
63
76
 
64
77
  @dataclass()
@@ -111,11 +124,11 @@ class ServerCredentials(YamlConfig):
111
124
 
112
125
 
113
126
  class Server:
114
- """Encapsulates access to the Sun lab BioHPC processing server.
127
+ """Encapsulates access to a Sun lab processing server.
115
128
 
116
- This class provides the API that allows accessing the BioHPC server to create and submit various SLURM-managed jobs
117
- to the server. It functions as the central interface used by all processing pipelines in the lab to execute costly
118
- data processing on the server.
129
+ This class provides the API that allows accessing the remote processing server to create and submit various
130
+ SLURM-managed jobs to the server. It functions as the central interface used by all processing pipelines in the
131
+ lab to execute costly data processing on the server.
119
132
 
120
133
  Notes:
121
134
  All lab processing pipelines expect the data to be stored on the server and all processing logic to be packaged
@@ -306,7 +319,7 @@ class Server:
306
319
  # include connection data received from the server.
307
320
  return self.submit_job(job) # type: ignore[return-value]
308
321
 
309
- def submit_job(self, job: Job | JupyterJob) -> Job | JupyterJob:
322
+ def submit_job(self, job: Job | JupyterJob, verbose: bool = True) -> Job | JupyterJob:
310
323
  """Submits the input job to the managed BioHPC server via SLURM job manager.
311
324
 
312
325
  This method submits various jobs for execution via the SLURM-managed BioHPC cluster. As part of its runtime, the
@@ -315,6 +328,9 @@ class Server:
315
328
 
316
329
  Args:
317
330
  job: The Job object that contains all job data.
331
+ verbose: Determines whether to notify the user about non-error states of the job submission task. Typically,
332
+ this is disabled when batch-submitting jobs (for example, as part of running a processing pipeline) and
333
+ enabled when submitting single jobs.
318
334
 
319
335
  Returns:
320
336
  The job object whose 'job_id' attribute had been modified with the job ID if the job was successfully
@@ -323,7 +339,8 @@ class Server:
323
339
  Raises:
324
340
  RuntimeError: If job submission to the server fails.
325
341
  """
326
- console.echo(message=f"Submitting '{job.job_name}' job to the remote server {self.host}...")
342
+ if verbose:
343
+ console.echo(message=f"Submitting '{job.job_name}' job to the remote server {self.host}...")
327
344
 
328
345
  # Generates a temporary shell script on the local machine. Uses tempfile to automatically remove the
329
346
  # local script as soon as it is uploaded to the server.
@@ -332,7 +349,7 @@ class Server:
332
349
  fixed_script_content = job.command_script
333
350
 
334
351
  # Creates a temporary script file locally and dumps translated command data into the file
335
- with open(local_script_path, "w") as f:
352
+ with local_script_path.open("w") as f:
336
353
  f.write(fixed_script_content)
337
354
 
338
355
  # Uploads the command script to the server
@@ -400,6 +417,9 @@ class Server:
400
417
 
401
418
  timer.delay_noblock(delay=5, allow_sleep=True) # Waits for 5 seconds before checking again
402
419
  else:
420
+ # Aborts the job if the server is busy running other jobs
421
+ self.abort_job(job=job)
422
+
403
423
  # Only raises the timeout error if the while loop is not broken in 120 seconds
404
424
  message = (
405
425
  f"Remote jupyter server job {job.job_name} with id {job.job_id} did not start within 120 seconds "
@@ -409,7 +429,8 @@ class Server:
409
429
  console.error(message, TimeoutError)
410
430
  raise TimeoutError(message) # Fallback to appease mypy
411
431
 
412
- console.echo(message=f"{job.job_name} job: Submitted to {self.host}.", level=LogLevel.SUCCESS)
432
+ if verbose:
433
+ console.echo(message=f"{job.job_name} job: Submitted to {self.host}.", level=LogLevel.SUCCESS)
413
434
 
414
435
  # Returns the updated job object
415
436
  return job
@@ -603,7 +624,7 @@ class Server:
603
624
  sftp.rmdir(str(remote_path))
604
625
 
605
626
  except Exception as e:
606
- console.echo(f"Unable to remove the specified directory {remote_path}: {str(e)}", level=LogLevel.WARNING)
627
+ console.echo(f"Unable to remove the specified directory {remote_path}: {e!s}", level=LogLevel.WARNING)
607
628
 
608
629
  def create_directory(self, remote_path: Path, parents: bool = True) -> None:
609
630
  """Creates the specified directory tree on the managed remote server via SFTP.
@@ -672,13 +693,14 @@ class Server:
672
693
  # Checks if the target file or directory exists by trying to 'stat' it
673
694
  sftp.stat(str(remote_path))
674
695
 
675
- # If the request does not err, returns True (file or directory exists)
676
- return True
677
-
678
696
  # If the directory or file does not exist, returns False
679
697
  except FileNotFoundError:
680
698
  return False
681
699
 
700
+ else:
701
+ # If the request does not err, returns True (file or directory exists)
702
+ return True
703
+
682
704
  def close(self) -> None:
683
705
  """Closes the SSH connection to the server.
684
706
 
@@ -723,3 +745,13 @@ class Server:
723
745
  def user(self) -> str:
724
746
  """Returns the username used to authenticate with the server."""
725
747
  return self._credentials.username
748
+
749
+ @property
750
+ def suite2p_configurations_directory(self) -> Path:
751
+ """Returns the absolute path to the shared directory that stores all sl-suite2p runtime configuration files."""
752
+ return self.raw_data_root.joinpath("suite2p_configurations")
753
+
754
+ @property
755
+ def dlc_projects_directory(self) -> Path:
756
+ """Returns the absolute path to the shared directory that stores all DeepLabCut projects."""
757
+ return self.raw_data_root.joinpath("deeplabcut_projects")
@@ -1,22 +1,23 @@
1
1
  """This package provides helper tools used to automate routine operations, such as transferring or verifying the
2
2
  integrity of the data. The tools from this package are used by most other data processing libraries in the lab."""
3
3
 
4
- from .transfer_tools import transfer_directory
5
- from .ascension_tools import ascend_tyche_data
4
+ from .transfer_tools import delete_directory, transfer_directory
6
5
  from .packaging_tools import calculate_directory_checksum
7
6
  from .project_management_tools import (
8
7
  ProjectManifest,
9
- resolve_p53_marker,
10
- verify_session_checksum,
8
+ archive_session,
9
+ prepare_session,
10
+ resolve_checksum,
11
11
  generate_project_manifest,
12
12
  )
13
13
 
14
14
  __all__ = [
15
15
  "ProjectManifest",
16
- "transfer_directory",
16
+ "archive_session",
17
17
  "calculate_directory_checksum",
18
- "ascend_tyche_data",
19
- "verify_session_checksum",
18
+ "delete_directory",
20
19
  "generate_project_manifest",
21
- "resolve_p53_marker",
20
+ "prepare_session",
21
+ "resolve_checksum",
22
+ "transfer_directory",
22
23
  ]
@@ -1,5 +1,6 @@
1
- """This module provides methods for packaging session runtime data for transmission over the network. The methods from
2
- this module work in tandem with methods offered by transfer_tools.py to ensure the integrity of the transferred data.
1
+ """This module provides tools for packaging data for transmission. Although this module is primarily used when
2
+ transmitting data over the network, it also works for local (within-machine) transfers. The tools from
3
+ this module work in tandem with tools offered by transfer_tools.py to ensure the integrity of the transferred data.
3
4
  """
4
5
 
5
6
  import os
@@ -10,8 +11,6 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
10
11
  from tqdm import tqdm
11
12
  import xxhash
12
13
 
13
- from ..data_classes import TrackerFileNames
14
-
15
14
  # Defines a 'blacklist' set of files. Primarily, this list contains the service files that may change after the session
16
15
  # data has been acquired. Therefore, it does not make sense to include them in the checksum, as they do not reflect the
17
16
  # data that should remain permanently unchanged. Note, make sure all service files are added to this set!
@@ -19,22 +18,16 @@ _excluded_files = {
19
18
  "ax_checksum.txt",
20
19
  "ubiquitin.bin",
21
20
  "telomere.bin",
22
- "p53.bin",
23
21
  "nk.bin",
24
22
  }
25
23
 
26
- # Extends the exclusion set to include all tracker .yaml files and their concurrent access .lock files.
27
- for name in tuple(TrackerFileNames):
28
- _excluded_files.add(name)
29
- _excluded_files.add(f"{name}.lock")
30
-
31
24
 
32
25
  def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
33
- """Calculates xxHash3-128 checksum for a single file and its path relative to the base directory.
26
+ """Calculates xxHash3-128 checksum for the target file and its path relative to the base directory.
34
27
 
35
28
  This function is passed to parallel workers used by the calculate_directory_hash() method that iteratively
36
29
  calculates the checksum for all files inside a directory. Each call to this function returns the checksum for the
37
- target file, which includes both the contents of the file and its path relative to the base directory.
30
+ target file, which reflects both the contents of the file and its path relative to the base directory.
38
31
 
39
32
  Args:
40
33
  base_directory: The path to the base (root) directory which is being checksummed by the main
@@ -55,7 +48,7 @@ def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str
55
48
 
56
49
  # Extends the checksum to reflect the file data state. Uses 8 MB chunks to avoid excessive RAM hogging at the cost
57
50
  # of slightly reduced throughput.
58
- with open(file_path, "rb") as f:
51
+ with file_path.open("rb") as f:
59
52
  for chunk in iter(lambda: f.read(1024 * 1024 * 8), b""):
60
53
  checksum.update(chunk)
61
54
 
@@ -71,18 +64,14 @@ def calculate_directory_checksum(
71
64
  """Calculates xxHash3-128 checksum for the input directory, which includes the data of all contained files and
72
65
  the directory structure information.
73
66
 
74
- This function is used to generate a checksum for the raw_data directory of each experiment or training session.
75
- Checksums are used to verify the session data integrity during transmission between the PC that acquired the data
76
- and long-term storage locations, such as the Synology NAS or the BioHPC server. The function can be configured to
77
- write the generated checksum as a hexadecimal string to the ax_checksum.txt file stored at the highest level of the
78
- input directory.
67
+ Checksums are used to verify the data integrity during transmission within machines (from one storage volume to
68
+ another) and between machines. The function can be configured to write the generated checksum as a hexadecimal
69
+ string to the ax_checksum.txt file stored at the highest level of the input directory.
79
70
 
80
71
  Note:
81
- This method uses multiprocessing to efficiently parallelize checksum calculation for multiple files. In
82
- combination with xxHash3, this achieves a significant speedup over more common checksums, such as MD5 and
83
- SHA256. Note that xxHash3 is not suitable for security purposes and is only used to ensure data integrity.
84
-
85
- The method notifies the user about the checksum calculation process via the terminal.
72
+ This function uses multiprocessing to efficiently parallelize checksum calculation for multiple files. In
73
+ combination with xxHash3, this achieves a significant speedup over other common checksum options, such as MD5
74
+ and SHA256. Note that xxHash3 is not suitable for security purposes and is only used to ensure data integrity.
86
75
 
87
76
  The returned checksum accounts for both the contents of each file and the layout of the input directory
88
77
  structure.
@@ -145,8 +134,8 @@ def calculate_directory_checksum(
145
134
 
146
135
  # Writes the hash to ax_checksum.txt in the root directory
147
136
  if save_checksum:
148
- checksum_path = directory / "ax_checksum.txt"
149
- with open(checksum_path, "w") as f:
137
+ checksum_path = directory.joinpath("ax_checksum.txt")
138
+ with checksum_path.open("w") as f:
150
139
  f.write(checksum_hexstr)
151
140
 
152
141
  return checksum_hexstr