sl-shared-assets 4.0.1__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (39) hide show
  1. sl_shared_assets/__init__.py +45 -42
  2. sl_shared_assets/command_line_interfaces/__init__.py +3 -0
  3. sl_shared_assets/command_line_interfaces/configure.py +173 -0
  4. sl_shared_assets/command_line_interfaces/manage.py +226 -0
  5. sl_shared_assets/data_classes/__init__.py +33 -32
  6. sl_shared_assets/data_classes/configuration_data.py +267 -79
  7. sl_shared_assets/data_classes/session_data.py +226 -289
  8. sl_shared_assets/server/__init__.py +24 -4
  9. sl_shared_assets/server/job.py +6 -7
  10. sl_shared_assets/server/pipeline.py +570 -0
  11. sl_shared_assets/server/server.py +57 -25
  12. sl_shared_assets/tools/__init__.py +9 -8
  13. sl_shared_assets/tools/packaging_tools.py +14 -25
  14. sl_shared_assets/tools/project_management_tools.py +602 -523
  15. sl_shared_assets/tools/transfer_tools.py +88 -23
  16. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/METADATA +46 -203
  17. sl_shared_assets-5.0.0.dist-info/RECORD +23 -0
  18. sl_shared_assets-5.0.0.dist-info/entry_points.txt +3 -0
  19. sl_shared_assets/__init__.pyi +0 -91
  20. sl_shared_assets/cli.py +0 -501
  21. sl_shared_assets/cli.pyi +0 -106
  22. sl_shared_assets/data_classes/__init__.pyi +0 -75
  23. sl_shared_assets/data_classes/configuration_data.pyi +0 -235
  24. sl_shared_assets/data_classes/runtime_data.pyi +0 -157
  25. sl_shared_assets/data_classes/session_data.pyi +0 -379
  26. sl_shared_assets/data_classes/surgery_data.pyi +0 -89
  27. sl_shared_assets/server/__init__.pyi +0 -11
  28. sl_shared_assets/server/job.pyi +0 -205
  29. sl_shared_assets/server/server.pyi +0 -298
  30. sl_shared_assets/tools/__init__.pyi +0 -19
  31. sl_shared_assets/tools/ascension_tools.py +0 -265
  32. sl_shared_assets/tools/ascension_tools.pyi +0 -68
  33. sl_shared_assets/tools/packaging_tools.pyi +0 -58
  34. sl_shared_assets/tools/project_management_tools.pyi +0 -239
  35. sl_shared_assets/tools/transfer_tools.pyi +0 -53
  36. sl_shared_assets-4.0.1.dist-info/RECORD +0 -36
  37. sl_shared_assets-4.0.1.dist-info/entry_points.txt +0 -7
  38. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/WHEEL +0 -0
  39. {sl_shared_assets-4.0.1.dist-info → sl_shared_assets-5.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,379 +0,0 @@
1
- from enum import StrEnum
2
- from pathlib import Path
3
- from dataclasses import field, dataclass
4
-
5
- from _typeshed import Incomplete
6
- from ataraxis_data_structures import YamlConfig
7
-
8
- from .configuration_data import (
9
- AcquisitionSystems as AcquisitionSystems,
10
- get_system_configuration_data as get_system_configuration_data,
11
- )
12
-
13
- class SessionTypes(StrEnum):
14
- """Defines the set of data acquisition session types supported by various data acquisition systems used in the
15
- Sun lab.
16
-
17
- A data acquisition session broadly encompasses a recording session carried out to either: acquire experiment data,
18
- train the animal for the upcoming experiments, or to assess the quality of surgical or other pre-experiment
19
- intervention.
20
-
21
- Notes:
22
- This enumeration does not differentiate between different acquisition systems. Different acquisition systems
23
- support different session types and may not be suited for acquiring some of the session types listed in this
24
- enumeration.
25
- """
26
-
27
- LICK_TRAINING = "lick training"
28
- RUN_TRAINING = "run training"
29
- MESOSCOPE_EXPERIMENT = "mesoscope experiment"
30
- WINDOW_CHECKING = "window checking"
31
-
32
- class TrackerFileNames(StrEnum):
33
- """Defines a set of processing tacker .yaml files supported by various Sun lab data preprocessing, processing, and
34
- dataset formation pipelines.
35
-
36
- This enumeration standardizes the names for all processing tracker files used in the lab. It is designed to be used
37
- via the get_processing_tracker() function to generate ProcessingTracker instances.
38
- """
39
-
40
- BEHAVIOR = "behavior_processing_tracker.yaml"
41
- SUITE2P = "suite2p_processing_tracker.yaml"
42
- DATASET = "dataset_formation_tracker.yaml"
43
- VIDEO = "video_processing_tracker.yaml"
44
- INTEGRITY = "integrity_verification_tracker.yaml"
45
-
46
- @dataclass()
47
- class RawData:
48
- """Stores the paths to the directories and files that make up the 'raw_data' session-specific directory.
49
-
50
- The raw_data directory stores the data acquired during the session data acquisition runtime, before and after
51
- preprocessing. Since preprocessing does not irreversibly alter the data, any data in that folder is considered
52
- 'raw,' event if preprocessing losslessly re-compresses the data for efficient transfer.
53
-
54
- Notes:
55
- Sun lab data management strategy primarily relies on keeping multiple redundant copies of the raw_data for
56
- each acquired session. Typically, one copy is stored on the lab's processing server and the other is stored on
57
- the NAS.
58
- """
59
-
60
- raw_data_path: Path = ...
61
- camera_data_path: Path = ...
62
- mesoscope_data_path: Path = ...
63
- behavior_data_path: Path = ...
64
- zaber_positions_path: Path = ...
65
- session_descriptor_path: Path = ...
66
- hardware_state_path: Path = ...
67
- surgery_metadata_path: Path = ...
68
- session_data_path: Path = ...
69
- experiment_configuration_path: Path = ...
70
- mesoscope_positions_path: Path = ...
71
- window_screenshot_path: Path = ...
72
- system_configuration_path: Path = ...
73
- checksum_path: Path = ...
74
- telomere_path: Path = ...
75
- ubiquitin_path: Path = ...
76
- nk_path: Path = ...
77
- def resolve_paths(self, root_directory_path: Path) -> None:
78
- """Resolves all paths managed by the class instance based on the input root directory path.
79
-
80
- This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
81
- hierarchy on any machine that instantiates the class.
82
-
83
- Args:
84
- root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
85
- using the following hierarchy: root/project/animal/session_id
86
- """
87
- def make_directories(self) -> None:
88
- """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
89
-
90
- This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
91
- missing data directories.
92
- """
93
-
94
- @dataclass()
95
- class ProcessedData:
96
- """Stores the paths to the directories and files that make up the 'processed_data' session-specific directory.
97
-
98
- The processed_data directory stores the data generated by various processing pipelines from the raw data (contents
99
- of the raw_data directory). Processed data represents an intermediate step between raw data and the dataset used in
100
- the data analysis, but is not itself designed to be analyzed.
101
- """
102
-
103
- processed_data_path: Path = ...
104
- camera_data_path: Path = ...
105
- mesoscope_data_path: Path = ...
106
- behavior_data_path: Path = ...
107
- p53_path: Path = ...
108
- def resolve_paths(self, root_directory_path: Path) -> None:
109
- """Resolves all paths managed by the class instance based on the input root directory path.
110
-
111
- This method is called each time the (wrapper) SessionData class is instantiated to regenerate the managed path
112
- hierarchy on any machine that instantiates the class.
113
-
114
- Args:
115
- root_directory_path: The path to the top-level directory of the session. Typically, this path is assembled
116
- using the following hierarchy: root/project/animal/session_id
117
- """
118
- def make_directories(self) -> None:
119
- """Ensures that all major subdirectories and the root directory exist, creating any missing directories.
120
-
121
- This method is called each time the (wrapper) SessionData class is instantiated and allowed to generate
122
- missing data directories.
123
- """
124
-
125
- @dataclass
126
- class SessionData(YamlConfig):
127
- """Stores and manages the data layout of a single Sun lab data acquisition session.
128
-
129
- The primary purpose of this class is to maintain the session data structure across all supported destinations and to
130
- provide a unified data access interface shared by all Sun lab libraries. The class can be used to either generate a
131
- new session or load the layout of an already existing session. When the class is used to create a new session, it
132
- generates the new session's name using the current UTC timestamp, accurate to microseconds. This ensures that each
133
- session 'name' is unique and preserves the overall session order.
134
-
135
- Notes:
136
- This class is specifically designed for working with the data from a single session, performed by a single
137
- animal under the specific experiment. The class is used to manage both raw and processed data. It follows the
138
- data through acquisition, preprocessing, and processing stages of the Sun lab data workflow. This class serves
139
- as an entry point for all interactions with the managed session's data.
140
- """
141
-
142
- project_name: str
143
- animal_id: str
144
- session_name: str
145
- session_type: str | SessionTypes
146
- acquisition_system: str | AcquisitionSystems = ...
147
- experiment_name: str | None = ...
148
- python_version: str = ...
149
- sl_experiment_version: str = ...
150
- raw_data: RawData = field(default_factory=Incomplete)
151
- processed_data: ProcessedData = field(default_factory=Incomplete)
152
- def __post_init__(self) -> None:
153
- """Ensures raw_data and processed_data are always instances of RawData and ProcessedData."""
154
- @classmethod
155
- def create(
156
- cls,
157
- project_name: str,
158
- animal_id: str,
159
- session_type: SessionTypes | str,
160
- experiment_name: str | None = None,
161
- session_name: str | None = None,
162
- python_version: str = "3.11.13",
163
- sl_experiment_version: str = "2.0.0",
164
- ) -> SessionData:
165
- """Creates a new SessionData object and generates the new session's data structure on the local PC.
166
-
167
- This method is intended to be called exclusively by the sl-experiment library to create new training or
168
- experiment sessions and generate the session data directory tree.
169
-
170
- Notes:
171
- To load an already existing session data structure, use the load() method instead.
172
-
173
- This method automatically dumps the data of the created SessionData instance into the session_data.yaml file
174
- inside the root 'raw_data' directory of the created hierarchy. It also finds and dumps other configuration
175
- files, such as experiment_configuration.yaml and system_configuration.yaml into the same 'raw_data'
176
- directory. If the session's runtime is interrupted unexpectedly, the acquired data can still be processed
177
- using these pre-saved class instances.
178
-
179
- Args:
180
- project_name: The name of the project for which the session is carried out.
181
- animal_id: The ID code of the animal participating in the session.
182
- session_type: The type of the session. Has to be one of the supported session types exposed by the
183
- SessionTypes enumeration.
184
- experiment_name: The name of the experiment executed during the session. This optional argument is only
185
- used for experiment sessions. Note! The name passed to this argument has to match the name of the
186
- experiment configuration .yaml file.
187
- session_name: An optional session name override. Generally, this argument should not be provided for most
188
- sessions. When provided, the method uses this name instead of generating a new timestamp-based name.
189
- This is only used during the 'ascension' runtime to convert old data structures to the modern
190
- lab standards.
191
- python_version: The string that specifies the Python version used to collect session data. Has to be
192
- specified using the major.minor.patch version format.
193
- sl_experiment_version: The string that specifies the version of the sl-experiment library used to collect
194
- session data. Has to be specified using the major.minor.patch version format.
195
-
196
- Returns:
197
- An initialized SessionData instance that stores the layout of the newly created session's data.
198
- """
199
- @classmethod
200
- def load(
201
- cls, session_path: Path, processed_data_root: Path | None = None, make_processed_data_directory: bool = False
202
- ) -> SessionData:
203
- """Loads the SessionData instance from the target session's session_data.yaml file.
204
-
205
- This method is used to load the data layout information of an already existing session. Primarily, this is used
206
- when processing session data. Due to how SessionData is stored and used in the lab, this method always loads the
207
- data layout from the session_data.yaml file stored inside the 'raw_data' session subfolder. Currently, all
208
- interactions with Sun lab data require access to the 'raw_data' folder of each session.
209
-
210
- Notes:
211
- To create a new session, use the create() method instead.
212
-
213
- Args:
214
- session_path: The path to the root directory of an existing session, e.g.: root/project/animal/session.
215
- processed_data_root: If processed data is kept on a drive different from the one that stores raw data,
216
- provide the path to the root project directory (directory that stores all Sun lab projects) on that
217
- drive. The method will automatically resolve the project/animal/session/processed_data hierarchy using
218
- this root path. If raw and processed data are kept on the same drive, keep this set to None.
219
- make_processed_data_directory: Determines whether this method should create the processed_data directory if
220
- it does not exist.
221
-
222
- Returns:
223
- An initialized SessionData instance for the session whose data is stored at the provided path.
224
-
225
- Raises:
226
- FileNotFoundError: If the 'session_data.yaml' file is not found under the session_path/raw_data/ subfolder.
227
-
228
- """
229
- def runtime_initialized(self) -> None:
230
- """Ensures that the 'nk.bin' marker file is removed from the session's raw_data folder.
231
-
232
- The 'nk.bin' marker is generated as part of the SessionData initialization (creation) process to mark sessions
233
- that did not fully initialize during runtime. This service method is designed to be called by the sl-experiment
234
- library classes to remove the 'nk.bin' marker when it is safe to do so. It should not be called by end-users.
235
- """
236
- def save(self) -> None:
237
- """Saves the instance data to the 'raw_data' directory of the managed session as a 'session_data.yaml' file.
238
-
239
- This is used to save the data stored in the instance to disk so that it can be reused during further stages of
240
- data processing. The method is intended to only be used by the SessionData instance itself during its
241
- create() method runtime.
242
- """
243
-
244
- @dataclass()
245
- class ProcessingTracker(YamlConfig):
246
- """Wraps the .yaml file that tracks the state of a data processing runtime and provides tools for communicating the
247
- state between multiple processes in a thread-safe manner.
248
-
249
- Primarily, this tracker class is used by all remote data processing pipelines in the lab to prevent race conditions
250
- and make it impossible to run multiple processing runtimes at the same time. It is also used to evaluate the status
251
- (success / failure) of jobs running on remote compute servers.
252
-
253
- Note:
254
- In library version 4.0.0 the processing trackers have been refactored to work similar to 'lock' files. That is,
255
- when a runtime is started, the tracker is switched into the 'running' (locked) state until it is unlocked,
256
- aborted, or encounters an error. When the tracker is locked, only the same manager process as the one that
257
- locked the tracker is allowed to work with session data. This feature allows executing complex processing
258
- pipelines that use multiple concurrent and / or sequential processing jobs on the remote server.
259
-
260
- This instance frequently refers to a 'manager process' in method documentation. A 'manager process' is the
261
- highest-level process that manages the runtime. When the runtime is executed on remote compute servers, the
262
- manager process is typically the process running on the non-server machine (user PC) that executes the remote
263
- processing job on the compute server (via SSH or similar protocol). The worker process(es) that run the
264
- processing job(s) on the remote compute servers are NOT considered manager processes.
265
- """
266
-
267
- file_path: Path
268
- _complete: bool = ...
269
- _encountered_error: bool = ...
270
- _running: bool = ...
271
- _manager_id: int = ...
272
- _lock_path: str = field(init=False)
273
- def __post_init__(self) -> None: ...
274
- def _load_state(self) -> None:
275
- """Reads the current processing state from the wrapped .YAML file."""
276
- def _save_state(self) -> None:
277
- """Saves the current processing state stored inside instance attributes to the specified .YAML file."""
278
- def start(self, manager_id: int) -> None:
279
- """Configures the tracker file to indicate that a manager process is currently executing the tracked processing
280
- runtime.
281
-
282
- Calling this method effectively 'locks' the tracked session and processing runtime combination to only be
283
- accessible from the manager process that calls this method. Calling this method for an already running runtime
284
- managed by the same process does not have any effect, so it is safe to call this method at the beginning of
285
- each processing job that makes up the runtime.
286
-
287
- Args:
288
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to start the runtime
289
- tracked by this tracker file.
290
-
291
- Raises:
292
- TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
293
- """
294
- def error(self, manager_id: int) -> None:
295
- """Configures the tracker file to indicate that the tracked processing runtime encountered an error and failed
296
- to complete.
297
-
298
- This method fulfills two main purposes. First, it 'unlocks' the runtime, allowing other manager processes to
299
- interface with the tracked runtime. Second, it updates the tracker file to reflect that the runtime was
300
- interrupted due to an error, which is used by the manager processes to detect and handle processing failures.
301
-
302
- Args:
303
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
304
- runtime tracked by this tracker file has encountered an error.
305
-
306
- Raises:
307
- TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
308
- """
309
- def stop(self, manager_id: int) -> None:
310
- """Configures the tracker file to indicate that the tracked processing runtime has been completed successfully.
311
-
312
- This method 'unlocks' the runtime, allowing other manager processes to interface with the tracked runtime. It
313
- also configures the tracker file to indicate that the runtime has been completed successfully, which is used
314
- by the manager processes to detect and handle processing completion.
315
-
316
- Args:
317
- manager_id: The unique xxHash-64 hash identifier of the manager process which attempts to report that the
318
- runtime tracked by this tracker file has been completed successfully.
319
-
320
- Raises:
321
- TimeoutError: If the .lock file for the target .YAML file cannot be acquired within the timeout period.
322
- """
323
- def abort(self) -> None:
324
- """Resets the runtime tracker file to the default state.
325
-
326
- This method can be used to reset the runtime tracker file, regardless of the current runtime state. Unlike other
327
- instance methods, this method can be called from any manager process, even if the runtime is already locked by
328
- another process. This method is only intended to be used in the case of emergency to 'unlock' a deadlocked
329
- runtime.
330
- """
331
- @property
332
- def is_complete(self) -> bool:
333
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime has been completed
334
- successfully and that the runtime is not currently ongoing."""
335
- @property
336
- def encountered_error(self) -> bool:
337
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime has aborted due
338
- to encountering an error."""
339
- @property
340
- def is_running(self) -> bool:
341
- """Returns True if the tracker wrapped by the instance indicates that the processing runtime is currently
342
- ongoing."""
343
-
344
- def get_processing_tracker(root: Path, file_name: TrackerFileNames | str) -> ProcessingTracker:
345
- """Initializes and returns the ProcessingTracker instance that manages the data stored inside the target processing
346
- tracker file.
347
-
348
- This function uses the input root path and tracker file name to first resolve the absolute path to the .yaml data
349
- cache of the target processing tracker file and then wrap the file into a ProcessingTracker instance. All Sun lab
350
- libraries that use ProcessingTracker instances use this function to access the necessary trackers.
351
-
352
- Notes:
353
- If the target file does not exist, this function will create the file as part of the ProcessingTracker
354
- initialization.
355
-
356
- This function also generates the corresponding .lock file to ensure that the data inside the processing tracker
357
- is accessed by a single process at a time.
358
-
359
- Args:
360
- file_name: The name of the target processing tracker file. Has to be one of the names from the TrackerFileNames
361
- enumeration.
362
- root: The absolute path to the directory where the target file is stored or should be created.
363
-
364
- Returns:
365
- The initialized ProcessingTracker instance that manages the data stored in the target file.
366
- """
367
-
368
- def generate_manager_id() -> int:
369
- """Generates and returns a unique integer identifier that can be used to identify the manager process that calls
370
- this function.
371
-
372
- The identifier is generated based on the current timestamp, accurate to microseconds, and a random number between 1
373
- and 9999999999999. This ensures that the identifier is unique for each function call. The generated identifier
374
- string is converted to a unique integer value using the xxHash-64 algorithm before it is returned to the caller.
375
-
376
- Notes:
377
- This function should be used to generate manager process identifiers for working with ProcessingTracker
378
- instances from sl-shared-assets version 4.0.0 and above.
379
- """
@@ -1,89 +0,0 @@
1
- from dataclasses import dataclass
2
-
3
- from ataraxis_data_structures import YamlConfig
4
-
5
- @dataclass()
6
- class SubjectData:
7
- """Stores the ID information of the surgical intervention's subject (animal)."""
8
-
9
- id: int
10
- ear_punch: str
11
- sex: str
12
- genotype: str
13
- date_of_birth_us: int
14
- weight_g: float
15
- cage: int
16
- location_housed: str
17
- status: str
18
-
19
- @dataclass()
20
- class ProcedureData:
21
- """Stores the general information about the surgical intervention."""
22
-
23
- surgery_start_us: int
24
- surgery_end_us: int
25
- surgeon: str
26
- protocol: str
27
- surgery_notes: str
28
- post_op_notes: str
29
- surgery_quality: int = ...
30
-
31
- @dataclass
32
- class ImplantData:
33
- """Stores the information about a single implantation procedure performed during the surgical intervention.
34
-
35
- Multiple ImplantData instances are used at the same time if the surgery involved multiple implants.
36
- """
37
-
38
- implant: str
39
- implant_target: str
40
- implant_code: str
41
- implant_ap_coordinate_mm: float
42
- implant_ml_coordinate_mm: float
43
- implant_dv_coordinate_mm: float
44
-
45
- @dataclass
46
- class InjectionData:
47
- """Stores the information about a single injection performed during surgical intervention.
48
-
49
- Multiple InjectionData instances are used at the same time if the surgery involved multiple injections.
50
- """
51
-
52
- injection: str
53
- injection_target: str
54
- injection_volume_nl: float
55
- injection_code: str
56
- injection_ap_coordinate_mm: float
57
- injection_ml_coordinate_mm: float
58
- injection_dv_coordinate_mm: float
59
-
60
- @dataclass
61
- class DrugData:
62
- """Stores the information about all drugs administered to the subject before, during, and immediately after the
63
- surgical intervention.
64
- """
65
-
66
- lactated_ringers_solution_volume_ml: float
67
- lactated_ringers_solution_code: str
68
- ketoprofen_volume_ml: float
69
- ketoprofen_code: str
70
- buprenorphine_volume_ml: float
71
- buprenorphine_code: str
72
- dexamethasone_volume_ml: float
73
- dexamethasone_code: str
74
-
75
- @dataclass
76
- class SurgeryData(YamlConfig):
77
- """Stores the data about a single animal surgical intervention.
78
-
79
- This class aggregates other dataclass instances that store specific data about the surgical procedure. Primarily, it
80
- is used to save the data as a .yaml file to every session's 'raw_data' directory of each animal used in every lab
81
- project. This way, the surgery data is always stored alongside the behavior and brain activity data collected
82
- during the session.
83
- """
84
-
85
- subject: SubjectData
86
- procedure: ProcedureData
87
- drugs: DrugData
88
- implants: list[ImplantData]
89
- injections: list[InjectionData]
@@ -1,11 +0,0 @@
1
- from .job import (
2
- Job as Job,
3
- JupyterJob as JupyterJob,
4
- )
5
- from .server import (
6
- Server as Server,
7
- ServerCredentials as ServerCredentials,
8
- generate_server_credentials as generate_server_credentials,
9
- )
10
-
11
- __all__ = ["Server", "ServerCredentials", "generate_server_credentials", "Job", "JupyterJob"]