sl-shared-assets 2.0.1__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

Files changed (32) hide show
  1. sl_shared_assets/__init__.py +17 -9
  2. sl_shared_assets/__init__.pyi +12 -8
  3. sl_shared_assets/cli.py +258 -21
  4. sl_shared_assets/cli.pyi +44 -5
  5. sl_shared_assets/data_classes/__init__.py +8 -3
  6. sl_shared_assets/data_classes/__init__.pyi +8 -4
  7. sl_shared_assets/data_classes/configuration_data.py +149 -30
  8. sl_shared_assets/data_classes/configuration_data.pyi +49 -11
  9. sl_shared_assets/data_classes/runtime_data.py +70 -49
  10. sl_shared_assets/data_classes/runtime_data.pyi +41 -33
  11. sl_shared_assets/data_classes/session_data.py +193 -253
  12. sl_shared_assets/data_classes/session_data.pyi +99 -116
  13. sl_shared_assets/data_classes/surgery_data.py +1 -1
  14. sl_shared_assets/server/__init__.py +2 -2
  15. sl_shared_assets/server/__init__.pyi +5 -2
  16. sl_shared_assets/server/job.py +229 -1
  17. sl_shared_assets/server/job.pyi +111 -0
  18. sl_shared_assets/server/server.py +431 -31
  19. sl_shared_assets/server/server.pyi +158 -15
  20. sl_shared_assets/tools/__init__.py +2 -1
  21. sl_shared_assets/tools/__init__.pyi +2 -0
  22. sl_shared_assets/tools/ascension_tools.py +9 -21
  23. sl_shared_assets/tools/ascension_tools.pyi +1 -1
  24. sl_shared_assets/tools/packaging_tools.py +2 -2
  25. sl_shared_assets/tools/project_management_tools.py +147 -41
  26. sl_shared_assets/tools/project_management_tools.pyi +45 -6
  27. {sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/METADATA +127 -13
  28. sl_shared_assets-3.0.0.dist-info/RECORD +36 -0
  29. {sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/entry_points.txt +2 -0
  30. sl_shared_assets-2.0.1.dist-info/RECORD +0 -36
  31. {sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/WHEEL +0 -0
  32. {sl_shared_assets-2.0.1.dist-info → sl_shared_assets-3.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -11,15 +11,15 @@ from ataraxis_base_utilities import console
11
11
 
12
12
  from ..data_classes import (
13
13
  SessionData,
14
+ SessionTypes,
14
15
  ProcessingTracker,
15
16
  RunTrainingDescriptor,
16
17
  LickTrainingDescriptor,
18
+ WindowCheckingDescriptor,
17
19
  MesoscopeExperimentDescriptor,
18
20
  )
19
21
  from .packaging_tools import calculate_directory_checksum
20
22
 
21
- _valid_session_types = {"lick training", "run training", "mesoscope experiment", "window checking"}
22
-
23
23
 
24
24
  class ProjectManifest:
25
25
  """Wraps the contents of a Sun lab project manifest .feather file and exposes methods for visualizing and
@@ -76,11 +76,11 @@ class ProjectManifest:
76
76
  "session",
77
77
  "type",
78
78
  "complete",
79
- "integrity_verification",
80
- "suite2p_processing",
81
- "behavior_processing",
82
- "video_processing",
83
- "dataset_formation",
79
+ "integrity",
80
+ "suite2p",
81
+ "behavior",
82
+ "video",
83
+ "dataset",
84
84
  ]
85
85
 
86
86
  # Retrieves the data
@@ -93,7 +93,7 @@ class ProjectManifest:
93
93
  animal = str(animal)
94
94
  else:
95
95
  animal = int(animal)
96
- df = df.filter(pl.col("animal") == animal)
96
+ df = df.filter(pl.col("animal") == animal)
97
97
 
98
98
  # Ensures the data displays properly
99
99
  with pl.Config(
@@ -157,7 +157,13 @@ class ProjectManifest:
157
157
  """
158
158
  return tuple(self._data.select("session").sort("session").to_series().to_list())
159
159
 
160
- def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
160
+ def get_sessions_for_animal(
161
+ self,
162
+ animal: str | int,
163
+ exclude_incomplete: bool = True,
164
+ dataset_ready_only: bool = False,
165
+ not_dataset_ready_only: bool = False,
166
+ ) -> tuple[str, ...]:
161
167
  """Returns all session IDs for the target animal.
162
168
 
163
169
  This provides a tuple of all sessions performed by the target animal as part of the target project.
@@ -166,6 +172,11 @@ class ProjectManifest:
166
172
  animal: The ID of the animal for which to get the session data.
167
173
  exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
168
174
  list.
175
+ dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
176
+ the output list. Enabling this option only shows sessions that can be integrated into a dataset.
177
+ not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
178
+ as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
179
+ enabled, the 'dataset_ready_only' option takes precedence.
169
180
 
170
181
  Raises:
171
182
  ValueError: If the specified animal is not found in the manifest file.
@@ -188,6 +199,12 @@ class ProjectManifest:
188
199
  if exclude_incomplete:
189
200
  data = data.filter(pl.col("complete") == 1)
190
201
 
202
+ # Optionally filters sessions based on their readiness for dataset integration.
203
+ if dataset_ready_only: # Dataset-ready option always takes precedence
204
+ data = data.filter(pl.col("dataset") == 1)
205
+ elif not_dataset_ready_only:
206
+ data = data.filter(pl.col("dataset") == 0)
207
+
191
208
  # Formats and returns session IDs to the caller
192
209
  sessions = data.select("session").sort("session").to_series().to_list()
193
210
  return tuple(sessions)
@@ -203,8 +220,7 @@ class ProjectManifest:
203
220
 
204
221
  Returns:
205
222
  A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
206
- 'intensity_verification', 'suite2p_processing', 'behavior_processing', 'video_processing',
207
- 'dataset_formation'.
223
+ 'intensity_verification', 'suite2p', 'behavior', 'video', 'dataset'.
208
224
  """
209
225
 
210
226
  df = self._data
@@ -264,12 +280,12 @@ def generate_project_manifest(
264
280
  # Determines whether the session data is complete (ran for the intended duration and has all expected data).
265
281
  "complete": [],
266
282
  # Determines whether the session data integrity has been verified upon transfer to a storage machine.
267
- "integrity_verification": [],
268
- "suite2p_processing": [], # Determines whether the session has been processed with the single-day s2p pipeline.
283
+ "integrity": [],
284
+ "suite2p": [], # Determines whether the session has been processed with the single-day s2p pipeline.
269
285
  # Determines whether the session has been processed with the behavior extraction pipeline.
270
- "behavior_processing": [],
271
- "video_processing": [], # Determines whether the session has been processed with the DeepLabCut pipeline.
272
- "dataset_formation": [], # Determines whether the session's data has been integrated into a dataset.
286
+ "behavior": [],
287
+ "video": [], # Determines whether the session has been processed with the DeepLabCut pipeline.
288
+ "dataset": [], # Determines whether the session's data is ready to be integrated into a dataset.
273
289
  }
274
290
 
275
291
  # Loops over each session of every animal in the project and extracts session ID information and information
@@ -313,56 +329,63 @@ def generate_project_manifest(
313
329
 
314
330
  # Depending on the session type, instantiates the appropriate descriptor instance and uses it to read the
315
331
  # experimenter notes
316
- if session_data.session_type == "lick training":
332
+ if session_data.session_type == SessionTypes.LICK_TRAINING:
317
333
  descriptor: LickTrainingDescriptor = LickTrainingDescriptor.from_yaml( # type: ignore
318
334
  file_path=session_data.raw_data.session_descriptor_path
319
335
  )
320
336
  manifest["notes"].append(descriptor.experimenter_notes)
321
- elif session_data.session_type == "run training":
337
+ elif session_data.session_type == SessionTypes.RUN_TRAINING:
322
338
  descriptor: RunTrainingDescriptor = RunTrainingDescriptor.from_yaml( # type: ignore
323
339
  file_path=session_data.raw_data.session_descriptor_path
324
340
  )
325
341
  manifest["notes"].append(descriptor.experimenter_notes)
326
- elif session_data.session_type == "mesoscope experiment":
342
+ elif session_data.session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
327
343
  descriptor: MesoscopeExperimentDescriptor = MesoscopeExperimentDescriptor.from_yaml( # type: ignore
328
344
  file_path=session_data.raw_data.session_descriptor_path
329
345
  )
330
346
  manifest["notes"].append(descriptor.experimenter_notes)
331
- elif session_data.session_type == "window checking":
332
- manifest["notes"].append("N/A")
347
+ elif session_data.session_type == SessionTypes.WINDOW_CHECKING:
348
+ # sl-experiment version 3.0.0 added session descriptors to Window Checking runtimes. Since the file does not
349
+ # exist in prior versions, this section is written to statically handle the discrepancy.
350
+ try:
351
+ descriptor: WindowCheckingDescriptor = WindowCheckingDescriptor.from_yaml( # type: ignore
352
+ file_path=session_data.raw_data.session_descriptor_path
353
+ )
354
+ manifest["notes"].append(descriptor.experimenter_notes)
355
+ except Exception:
356
+ manifest["notes"].append("N/A")
333
357
 
334
358
  # If the session raw_data folder contains the telomere.bin file, marks the session as complete.
335
359
  manifest["complete"].append(session_data.raw_data.telomere_path.exists())
336
360
 
337
361
  # Data verification status
338
362
  tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
339
- manifest["integrity_verification"].append(tracker.is_complete)
363
+ manifest["integrity"].append(tracker.is_complete)
340
364
 
341
365
  # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing is
342
366
  # disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its data may
343
367
  # be corrupted.
344
- if not manifest["complete"][-1] or not manifest["integrity_verification"][-1]:
345
- manifest["suite2p_processing"].append(False)
346
- manifest["dataset_formation"].append(False)
347
- manifest["behavior_processing"].append(False)
348
- manifest["video_processing"].append(False)
368
+ if not manifest["complete"][-1] or not manifest["integrity"][-1]:
369
+ manifest["suite2p"].append(False)
370
+ manifest["dataset"].append(False)
371
+ manifest["behavior"].append(False)
372
+ manifest["video"].append(False)
349
373
  continue # Cycles to the next session
350
374
 
351
- # Suite2p (single-day) status
375
+ # Suite2p (single-day) processing status.
352
376
  tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
353
- manifest["suite2p_processing"].append(tracker.is_complete)
354
-
355
- # Dataset formation (integration) status. Tracks whether the session has been added to any dataset(s).
356
- tracker = ProcessingTracker(file_path=session_data.processed_data.dataset_formation_tracker_path)
357
- manifest["dataset_formation"].append(tracker.is_complete)
377
+ manifest["suite2p"].append(tracker.is_complete)
358
378
 
359
- # Dataset formation (integration) status. Tracks whether the session has been added to any dataset(s).
379
+ # Behavior data processing status.
360
380
  tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
361
- manifest["behavior_processing"].append(tracker.is_complete)
381
+ manifest["behavior"].append(tracker.is_complete)
362
382
 
363
383
  # DeepLabCut (video) processing status.
364
384
  tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
365
- manifest["video_processing"].append(tracker.is_complete)
385
+ manifest["video"].append(tracker.is_complete)
386
+
387
+ # Tracks whether the session's data is currently in the processing or dataset integration mode.
388
+ manifest["dataset"].append(session_data.processed_data.p53_path.exists())
366
389
 
367
390
  # If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
368
391
  # them as strings. The latter options are primarily kept for compatibility with Tyche data
@@ -382,11 +405,11 @@ def generate_project_manifest(
382
405
  "type": pl.String,
383
406
  "notes": pl.String,
384
407
  "complete": pl.UInt8,
385
- "integrity_verification": pl.UInt8,
386
- "suite2p_processing": pl.UInt8,
387
- "dataset_formation": pl.UInt8,
388
- "behavior_processing": pl.UInt8,
389
- "video_processing": pl.UInt8,
408
+ "integrity": pl.UInt8,
409
+ "suite2p": pl.UInt8,
410
+ "dataset": pl.UInt8,
411
+ "behavior": pl.UInt8,
412
+ "video": pl.UInt8,
390
413
  }
391
414
  df = pl.DataFrame(manifest, schema=schema, strict=False)
392
415
 
@@ -468,3 +491,86 @@ def verify_session_checksum(
468
491
  # runtime finished with an error to prevent deadlocking the runtime.
469
492
  if tracker.is_running:
470
493
  tracker.error()
494
+
495
+
496
+ def resolve_p53_marker(
497
+ session_path: Path,
498
+ create_processed_data_directory: bool = True,
499
+ processed_data_root: None | Path = None,
500
+ remove: bool = False,
501
+ ) -> None:
502
+ """Depending on configuration, either creates or removes the p53.bin marker file for the target session.
503
+
504
+ The marker file statically determines whether the session can be targeted by data processing or dataset formation
505
+ pipelines.
506
+
507
+ Notes:
508
+ Since dataset integration relies on data processing outputs, it is essential to prevent processing pipelines
509
+ from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
510
+ that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
511
+
512
+ For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
513
+ p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
514
+ dataset integration. This is due to data access hierarchy limitations of the Sun lab BioHPC server.
515
+
516
+ Args:
517
+ session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
518
+ input session directory must contain the 'raw_data' subdirectory.
519
+ create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
520
+ processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
521
+ the root directory where to store the processed data from all projects, and it will be automatically
522
+ modified to include the project name, the animal name, and the session ID.
523
+ remove: Determines whether this function is called to create or remove the p53.bin marker.
524
+ """
525
+
526
+ # Loads session data layout. If configured to do so, also creates the processed data hierarchy
527
+ session_data = SessionData.load(
528
+ session_path=session_path,
529
+ processed_data_root=processed_data_root,
530
+ make_processed_data_directory=create_processed_data_directory,
531
+ )
532
+
533
+ # If the p53.bin marker exists and the runtime is configured to remove it, removes the marker file. If the runtime
534
+ # is configured to create the marker, aborts the runtime (as the marker already exists).
535
+ if session_data.processed_data.p53_path.exists():
536
+ if remove:
537
+ session_data.processed_data.p53_path.unlink()
538
+ return # Ends remove runtime
539
+
540
+ return # Ends create runtime
541
+
542
+ # If the marker does not exist and the function is called in 'remove' mode, aborts the runtime
543
+ elif remove:
544
+ return # Ends remove runtime
545
+
546
+ # The rest of the runtime deals with determining whether it is safe to create the marker file.
547
+ # Queries the type of the processed session
548
+ session_type = session_data.session_type
549
+
550
+ # Window checking sessions are not designed to be integrated into datasets, so they cannot be marked with p53.bin
551
+ # file. Similarly, any incomplete session is automatically excluded from dataset formation.
552
+ if session_type == SessionTypes.WINDOW_CHECKING or not session_data.raw_data.telomere_path.exists():
553
+ return
554
+
555
+ # Training sessions collect similar data and share processing pipeline requirements
556
+ if session_type == SessionTypes.LICK_TRAINING or session_type == SessionTypes.RUN_TRAINING:
557
+ # Ensures that the session is not being processed with one of the supported pipelines.
558
+ behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
559
+ video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
560
+ if behavior_tracker.is_running or video_tracker.is_running:
561
+ # Note, training runtimes do not require suite2p processing.
562
+ return
563
+
564
+ # Mesoscope experiment sessions require additional processing with suite2p
565
+ if session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
566
+ behavior_tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
567
+ suite2p_tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
568
+ video_tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
569
+
570
+ # Similar to above, ensures that the session is not being processed with one of the supported pipelines.
571
+ if behavior_tracker.is_running or suite2p_tracker.is_running or video_tracker.is_running:
572
+ return
573
+
574
+ # If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
575
+ # file, preventing the session from being processed again as long as the marker exists.
576
+ session_data.processed_data.p53_path.touch()
@@ -1,19 +1,18 @@
1
1
  from pathlib import Path
2
2
 
3
3
  import polars as pl
4
- from _typeshed import Incomplete
5
4
 
6
5
  from ..data_classes import (
7
6
  SessionData as SessionData,
7
+ SessionTypes as SessionTypes,
8
8
  ProcessingTracker as ProcessingTracker,
9
9
  RunTrainingDescriptor as RunTrainingDescriptor,
10
10
  LickTrainingDescriptor as LickTrainingDescriptor,
11
+ WindowCheckingDescriptor as WindowCheckingDescriptor,
11
12
  MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
12
13
  )
13
14
  from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
14
15
 
15
- _valid_session_types: Incomplete
16
-
17
16
  class ProjectManifest:
18
17
  """Wraps the contents of a Sun lab project manifest .feather file and exposes methods for visualizing and
19
18
  working with the data stored inside the file.
@@ -69,7 +68,13 @@ class ProjectManifest:
69
68
  This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
70
69
  of the target project.
71
70
  """
72
- def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
71
+ def get_sessions_for_animal(
72
+ self,
73
+ animal: str | int,
74
+ exclude_incomplete: bool = True,
75
+ dataset_ready_only: bool = False,
76
+ not_dataset_ready_only: bool = False,
77
+ ) -> tuple[str, ...]:
73
78
  """Returns all session IDs for the target animal.
74
79
 
75
80
  This provides a tuple of all sessions performed by the target animal as part of the target project.
@@ -78,6 +83,11 @@ class ProjectManifest:
78
83
  animal: The ID of the animal for which to get the session data.
79
84
  exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
80
85
  list.
86
+ dataset_ready_only: Determines whether to exclude sessions not marked as 'dataset' integration ready from
87
+ the output list. Enabling this option only shows sessions that can be integrated into a dataset.
88
+ not_dataset_ready_only: The opposite of 'dataset_ready_only'. Determines whether to exclude sessions marked
89
+ as 'dataset' integration ready from the output list. Note, when both this and 'dataset_ready_only' are
90
+ enabled, the 'dataset_ready_only' option takes precedence.
81
91
 
82
92
  Raises:
83
93
  ValueError: If the specified animal is not found in the manifest file.
@@ -93,8 +103,7 @@ class ProjectManifest:
93
103
 
94
104
  Returns:
95
105
  A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
96
- 'intensity_verification', 'suite2p_processing', 'behavior_processing', 'video_processing',
97
- 'dataset_formation'.
106
+ 'intensity_verification', 'suite2p', 'behavior', 'video', 'dataset'.
98
107
  """
99
108
 
100
109
  def generate_project_manifest(
@@ -146,3 +155,33 @@ def verify_session_checksum(
146
155
  the root directory where to store the processed data from all projects, and it will be automatically
147
156
  modified to include the project name, the animal name, and the session ID.
148
157
  """
158
+
159
+ def resolve_p53_marker(
160
+ session_path: Path,
161
+ create_processed_data_directory: bool = True,
162
+ processed_data_root: None | Path = None,
163
+ remove: bool = False,
164
+ ) -> None:
165
+ """Depending on configuration, either creates or removes the p53.bin marker file for the target session.
166
+
167
+ The marker file statically determines whether the session can be targeted by data processing or dataset formation
168
+ pipelines.
169
+
170
+ Notes:
171
+ Since dataset integration relies on data processing outputs, it is essential to prevent processing pipelines
172
+ from altering the data while it is integrated into a dataset. The p53.bin marker solves this issue by ensuring
173
+ that only one type of runtimes (processing or dataset integration) is allowed to work with the session.
174
+
175
+ For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
176
+ p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
177
+ dataset integration. This is due to data access hierarchy limitations of the Sun lab BioHPC server.
178
+
179
+ Args:
180
+ session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
181
+ input session directory must contain the 'raw_data' subdirectory.
182
+ create_processed_data_directory: Determines whether to create the processed data hierarchy during runtime.
183
+ processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
184
+ the root directory where to store the processed data from all projects, and it will be automatically
185
+ modified to include the project name, the animal name, and the session ID.
186
+ remove: Determines whether this function is called to create or remove the p53.bin marker.
187
+ """
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sl-shared-assets
3
- Version: 2.0.1
4
- Summary: Stores assets shared between multiple Sun (NeuroAI) lab data pipelines.
3
+ Version: 3.0.0
4
+ Summary: Provides data acquisition and processing assets shared between Sun (NeuroAI) lab libraries.
5
5
  Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
6
6
  Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
7
7
  Author: Ivan Kondratyev, Kushaan Gupta, Natalie Yeung
@@ -681,7 +681,7 @@ License: GNU GENERAL PUBLIC LICENSE
681
681
  Public License instead of this License. But first, please read
682
682
  <https://www.gnu.org/licenses/why-not-lgpl.html>.
683
683
  License-File: LICENSE
684
- Keywords: acquisition,assets,data,processing,sunlab
684
+ Keywords: acquisition,assets,data,processing,server,sunlab
685
685
  Classifier: Development Status :: 5 - Production/Stable
686
686
  Classifier: Intended Audience :: Developers
687
687
  Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
@@ -691,13 +691,13 @@ Classifier: Programming Language :: Python :: 3.12
691
691
  Classifier: Programming Language :: Python :: 3.13
692
692
  Requires-Python: >=3.11
693
693
  Requires-Dist: appdirs==1.4.4
694
- Requires-Dist: ataraxis-base-utilities==3.0.1
694
+ Requires-Dist: ataraxis-base-utilities==3.1.0
695
695
  Requires-Dist: ataraxis-data-structures==3.1.1
696
696
  Requires-Dist: ataraxis-time==3.0.0
697
697
  Requires-Dist: click==8.2.1
698
698
  Requires-Dist: filelock==3.18.0
699
699
  Requires-Dist: natsort==8.4.0
700
- Requires-Dist: numpy<2.3.0,>=2.0.2
700
+ Requires-Dist: numpy==2.2.6
701
701
  Requires-Dist: paramiko==3.5.1
702
702
  Requires-Dist: polars==1.31.0
703
703
  Requires-Dist: pyarrow==20.0.0
@@ -706,7 +706,6 @@ Requires-Dist: simple-slurm==0.3.6
706
706
  Requires-Dist: tqdm==4.67.1
707
707
  Requires-Dist: xxhash==3.5.0
708
708
  Provides-Extra: conda
709
- Requires-Dist: grayskull<3,>=2; extra == 'conda'
710
709
  Requires-Dist: hatchling<2,>=1; extra == 'conda'
711
710
  Requires-Dist: importlib-metadata<9,>=8; extra == 'conda'
712
711
  Requires-Dist: mypy<2,>=1; extra == 'conda'
@@ -725,7 +724,7 @@ Requires-Dist: appdirs==1.4.4; extra == 'condarun'
725
724
  Requires-Dist: click==8.2.1; extra == 'condarun'
726
725
  Requires-Dist: filelock==3.18.0; extra == 'condarun'
727
726
  Requires-Dist: natsort==8.4.0; extra == 'condarun'
728
- Requires-Dist: numpy<2.3.0,>=2.0.2; extra == 'condarun'
727
+ Requires-Dist: numpy==2.2.6; extra == 'condarun'
729
728
  Requires-Dist: paramiko==3.5.1; extra == 'condarun'
730
729
  Requires-Dist: polars==1.31.0; extra == 'condarun'
731
730
  Requires-Dist: pyarrow==20.0.0; extra == 'condarun'
@@ -734,7 +733,6 @@ Requires-Dist: tqdm==4.67.1; extra == 'condarun'
734
733
  Provides-Extra: dev
735
734
  Requires-Dist: ataraxis-automation<6,>=5; extra == 'dev'
736
735
  Requires-Dist: build<2,>=1; extra == 'dev'
737
- Requires-Dist: grayskull<3,>=2; extra == 'dev'
738
736
  Requires-Dist: hatchling<2,>=1; extra == 'dev'
739
737
  Requires-Dist: importlib-metadata<9,>=8; extra == 'dev'
740
738
  Requires-Dist: mypy<2,>=1; extra == 'dev'
@@ -774,6 +772,7 @@ A Python library that stores assets shared between multiple Sun (NeuroAI) lab da
774
772
  ![PyPI - License](https://img.shields.io/pypi/l/sl-shared-assets)
775
773
  ![PyPI - Status](https://img.shields.io/pypi/status/sl-shared-assets)
776
774
  ![PyPI - Wheel](https://img.shields.io/pypi/wheel/sl-shared-assets)
775
+
777
776
  ___
778
777
 
779
778
  ## Detailed Description
@@ -815,7 +814,7 @@ All software library dependencies are installed automatically as part of library
815
814
 
816
815
  Note, installation from source is ***highly discouraged*** for everyone who is not an active project developer.
817
816
 
818
- 1. Download this repository to your local machine using your preferred method, such as Git-cloning. Use one
817
+ 1. Download this repository to your local machine using any method, such as Git-cloning. Use one
819
818
  of the stable releases from [GitHub](https://github.com/Sun-Lab-NBB/sl-shared-assets/releases).
820
819
  2. Unpack the downloaded zip and note the path to the binary wheel (`.whl`) file contained in the archive.
821
820
  3. Run ```python -m pip install WHEEL_PATH```, replacing 'WHEEL_PATH' with the path to the wheel file, to install the
@@ -828,12 +827,128 @@ Use the following command to install the library using pip: ```pip install sl-sh
828
827
 
829
828
  ## Usage
830
829
 
831
- All library components are intended to be used via other Sun lab libraries. Developers should study the API and CLI
830
+ Most library components are intended to be used via other Sun lab libraries. Developers should study the API and CLI
832
831
  documentation below to learn how to use library components in other Sun lab libraries. For notes on using shared
833
832
  assets for data acquisition, see the [sl-experiment](https://github.com/Sun-Lab-NBB/sl-experiment) library ReadMe.
834
833
  For notes on using shared assets for data processing, see the [sl-forgery](https://github.com/Sun-Lab-NBB/sl-forgery)
835
834
  library ReadMe.
836
835
 
836
+ The only exception to the note above is the **server.py** package exposed by this library. This package exposes an API
837
+ for running headless and a CLI for running interactive Simple Linux Utility for Resource Management (SLURM)-managed
838
+ jobs on remote compute clusters.
839
+
840
+ ### Generating Access Credentials
841
+
842
+ To access any remote server, the user is required to first generate the access credentials. The credentials are stored
843
+ inside the 'server_credentials.yaml' file, which is generated by using the `sl-create-server-credentials` command.
844
+ **Note!** Users are advised to generate this file in a secure (non-shared) location on their local machine.
845
+
846
+ ### Running Headless Jobs
847
+
848
+ A headless job is a job that does not require any user interaction during runtime. Currently, all headless jobs in the
849
+ lab rely on pip-installable packages that expose a callable Command-Line Interface to carry out some type of
850
+ data processing. In this regard, **running a headless job is equivalent to calling a CLI command on your local
851
+ machine**, except that the command is executed on a remote compute server. Therefore, the primary purpose of the API
852
+ exposed by this library is to transfer the target command request to the remote server, execute it, and monitor the
853
+ runtime status until it is complete.
854
+
855
+ For example, the [sl-suite2p package](https://github.com/Sun-Lab-NBB/suite2p) maintained in the lab exposes a CLI to
856
+ process 2-Photon data from experiment sessions. During data processing by the
857
+ [sl-forgery](https://github.com/Sun-Lab-NBB/sl-forgery) library, a remote job is sent to the server that uses the CLI
858
+ exposed by the sl-suite2p package to process target session(s).
859
+
860
+ ### Creating Jobs
861
+ All remote jobs are sent to the server in the form of an executable *shell* (.sh) script. The script is composed on the
862
+ local machine that uses this library and transferred to a temporary server directory using Secure Shell File
863
+ Transfer Protocol (SFTP). The server is then instructed to evaluate (run) the script using SLURM job manager, via a
864
+ Secure Shell (SSH) session.
865
+
866
+ Broadly, each job consists of three major steps, which correspond to three major sections of the job shell script:
867
+ 1. **Setting up the job environment**. Each job script starts with a SLURM job parameter block, which tells SLURM
868
+ what resources (CPUs, GPUs, RAM, etc.) the job requires. When resources become available, SLURM generates a virtual
869
+ environment and runs the rest of the job script in that environment. This forms the basis for using the shared
870
+ compute resources fairly, as SLURM balances resource allocation and the order of job execution for all users.
871
+ 2. **Activating the target conda environment**. Currently, all jobs are assumed to use Python libraries to execute the
872
+ intended data processing. Similar to processing data locally, each job expects the remote server to provide a
873
+ Conda environment preconfigured with necessary assets (packages) to run the job. Therefore, each job contains a
874
+ section that activates the user-defined conda environment before running the rest of the job.
875
+ 3. **Executing processing**. The final section is typically unique to each job and calls specific CLI commands or runs
876
+ specific Python modules. Since each job is submitted as a shell script, it can do anything a server shell can
877
+ do. Therefore, despite python-centric approach to data processing in the lab, a remote job composed via this library
878
+ can execute ***any*** arbitrary command available to the user on the remove server.
879
+
880
+ Use the *Job* class exposed by this library to compose remote jobs. **Steps 1 and 2** of each job are configured when
881
+ initializing the Job instance, while **step 3** is added via the `add_command()` method of the Job class:
882
+ ```
883
+ # First, import the job class
884
+ from pathlib import Path
885
+ from sl_shared_assets import Job
886
+
887
+ # Next, instantiate a new Job object. For example, this job is used to verify the integrity of raw experiment data as
888
+ # it is transferred to the long-term storage destination (server) by the sl-experiment library.
889
+ job = Job(
890
+ job_name="data_integrity_verification",
891
+ output_log=Path("/temp/output.txt"),
892
+ error_log=Path("/temp/errors.txt"),
893
+ working_directory=Path("/temp/test_job"),
894
+ conda_environment="test_environment",
895
+ cpus_to_use=20,
896
+ ram_gb=50,
897
+ time_limit=20,
898
+ )
899
+
900
+ # Finally, add a CLI command call (the actual work to be done by the job). Here, the job calls the
901
+ # 'sl-verify-session' command exposed by the sl-shared-assets library installed in the target environment on the server.
902
+ # Use this method to add commands as you would type them in your local terminal / shell / command line.
903
+ job.add_command(f"sl-verify-session -sp /temp/test_session")
904
+ ```
905
+
906
+ ### Submitting and Monitoring Jobs:
907
+ To submit the job to the remote server, use a **Server** class instance. This class encapsulates access to the target
908
+ remote compute server and uses the server_credentials.yaml file to determine server access credentials (see above):
909
+ ```
910
+ # Initialize the Server class using precreated server credentials file
911
+ server = Server(credentials_path=Path("/temp/server_credentials.yaml"))
912
+
913
+ # Submit the job (generated in the previous code snippet) to the server
914
+ job = server.submit_job(job)
915
+
916
+ # Wait for the server to complete the job
917
+ delay_timer = PrecisionTimer("s")
918
+ while not server.job_complete(job=job):
919
+ delay_timer.delay_noblock(delay=5, allow_sleep=True)
920
+ ```
921
+
922
+ **Note!** The Server class only checks whether the job is running on the server, but not the outcome of the job. For
923
+ that, you can either manually check the output and error logs for the job or come up with a programmatic way of
924
+ checking the outcome. All developers are highly advised to study the API documentation for the Job and Server classes
925
+ to use them most effectively.
926
+
927
+ **Critical!** Since running remote jobs is largely equivalent to executing them locally, all users are highly encouraged
928
+ to test their job scripts locally before deploying them server-side. If a script works on a local machine, it is likely
929
+ that the script would behave similarly and work on the server.
930
+
931
+ ### Interactive Jobs
932
+
933
+ Interactive jobs are a special extension of the headless job type discussed above. Specifically, an interactive job is
934
+ a headless job, whose only purpose is to **create and maintain a Jupyter lab server** under the SLURM control.
935
+ Specifically, it requests SLURM to set up an isolated environment, starts a Jupyter server in that environment, and
936
+ sends the credentials for the started server back to the user.
937
+
938
+ In essence, this allocates a set of resources the user can use interactively by running various Jupyter notebooks.
939
+ While convenient for certain data analysis cases, this type of jobs has the potential to inefficiently hog server
940
+ resources for prolonged periods of time. Therefore, users are encouraged to only resort to this type of jobs when
941
+ strictly necessary and to minimize the resources and time allocated to running these jobs.
942
+
943
+ To run an interactive job, call the `sl-start-jupyter` CLI command exposed by this library and follow the instructions
944
+ printed to the terminal by the command during runtime.
945
+
946
+ **Critical!** While this command tries to minimize collisions with other users, it is possible that an access port
947
+ collision occurs when multiple users try to instantiate a jupyter server at the same time. If you cannot authenticate
948
+ with the Jupyter server, this likely indicates that the target port was in use and Jupyter automatically incremented the
949
+ port number by 1. In this case, add 1 to your port number and try connecting to that port using the Jupyter credentials
950
+ provided by the command. For example, if your target port was '8888,' try port '8889.'
951
+
837
952
  ---
838
953
 
839
954
  ## API Documentation
@@ -849,7 +964,7 @@ ___
849
964
 
850
965
  ## Versioning
851
966
 
852
- We use [semantic versioning](https://semver.org/) for this project. For the versions available, see the
967
+ This project uses [semantic versioning](https://semver.org/). For the versions available, see the
853
968
  [tags on this repository](https://github.com/Sun-Lab-NBB/sl-shared-assets/tags).
854
969
 
855
970
  ---
@@ -858,7 +973,6 @@ We use [semantic versioning](https://semver.org/) for this project. For the vers
858
973
 
859
974
  - Ivan Kondratyev ([Inkaros](https://github.com/Inkaros))
860
975
  - Kushaan Gupta ([kushaangupta](https://github.com/kushaangupta))
861
- - Yuantao Deng ([YuantaoDeng](https://github.com/YuantaoDeng))
862
976
  - Natalie Yeung
863
977
 
864
978
  ___
@@ -873,7 +987,7 @@ ___
873
987
 
874
988
  - All Sun lab [members](https://neuroai.github.io/sunlab/people) for providing the inspiration and comments during the
875
989
  development of this library.
876
- - The creators of all other projects used in our development automation pipelines and source code
990
+ - The creators of all other projects used in the development automation pipelines and source code of this project
877
991
  [see pyproject.toml](pyproject.toml).
878
992
 
879
993
  ---
@@ -0,0 +1,36 @@
1
+ sl_shared_assets/__init__.py,sha256=rCu1VYs2Lc1l0jqHO3UtfuymU0uY2ccxEn4UyscIut8,2347
2
+ sl_shared_assets/__init__.pyi,sha256=WCWIS-I3ToP4XybNZAi3fA7j2CZ48dl9D-fmd7oZKCo,2615
3
+ sl_shared_assets/cli.py,sha256=1TRpRhkwi0A1WlN125iLxWt4e_ST4s6gpHfORK3FEQk,18126
4
+ sl_shared_assets/cli.pyi,sha256=kQjGw5bxMLBqCdXcYPDz4aSrgrotrR02tX4hny0O9RA,5258
5
+ sl_shared_assets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ sl_shared_assets/data_classes/__init__.py,sha256=bdm0hyQpNF0RL2SPhUgaOz33FsRzpM2L_z5-91HyZBE,1998
7
+ sl_shared_assets/data_classes/__init__.pyi,sha256=J7ZCH9qQ4qz-3Wq9ILdihlmK9zFR3iU1cpLcSaN45Y8,2238
8
+ sl_shared_assets/data_classes/configuration_data.py,sha256=SN2I_HrJkWirBQamMxRpwIyIiv5oW15Bgtl_-We9Ia0,36348
9
+ sl_shared_assets/data_classes/configuration_data.pyi,sha256=MJPBQ2_vkZSYJOOYzwalBxOwLtpYKAnCaO6szqZ6adI,11059
10
+ sl_shared_assets/data_classes/runtime_data.py,sha256=MLIef6s9n2gG6sbp197gpFfzb05e_8vwVzyS_oSmXYQ,16722
11
+ sl_shared_assets/data_classes/runtime_data.pyi,sha256=LzNuEWu-GlPGdyyi8Hn2OFUjGCWOaOplKsRQBbjn2vQ,6768
12
+ sl_shared_assets/data_classes/session_data.py,sha256=PZ7QVUyPXdLIuEJH4wvHRpirQk2GDiGNHIm0VlCU6QU,48237
13
+ sl_shared_assets/data_classes/session_data.pyi,sha256=g53jIe-v8VkQJHc7ITS0KBGRhzn6LOIb6f96SEbEGig,15898
14
+ sl_shared_assets/data_classes/surgery_data.py,sha256=5B1OPKFq4bnzbAoe-_c5dFV3kbSD5YFzXbX2zXmfGs8,7485
15
+ sl_shared_assets/data_classes/surgery_data.pyi,sha256=rf59lJ3tGSYKHQlEGXg75MnjajBwl0DYhL4TClAO4SM,2605
16
+ sl_shared_assets/server/__init__.py,sha256=w7y73RXXjBrWQsjU5g1QNCv_gsXDYnHos3NpOoR2AHA,452
17
+ sl_shared_assets/server/__init__.pyi,sha256=Zc12G90fZdgEMwaVZbFzrRVV1wH_LEj3sxaV3lhk1Cw,316
18
+ sl_shared_assets/server/job.py,sha256=DnEVIswZXm9queBgy6MlpIrCosXvQ_tweOeko7LN9yc,19431
19
+ sl_shared_assets/server/job.pyi,sha256=uYfOuKgPL1hSHQvy5nmXzFkVjS316F3IZTdT-PmluZU,11663
20
+ sl_shared_assets/server/server.py,sha256=MGk1v49aEFeIChMDsiR7CXjVkWwDpD9kA1TK0fwuTXw,32926
21
+ sl_shared_assets/server/server.pyi,sha256=5Yxq4txhjtd9w-6U9fPehzMeIZL5GcprVCHd9mPP6FI,15113
22
+ sl_shared_assets/tools/__init__.py,sha256=NktXk62E_HHOrO_93z_MVmSd6-Oir3mE4xE9Yr8Qa7U,682
23
+ sl_shared_assets/tools/__init__.pyi,sha256=0UXorfCXXmHQOP5z7hODpsqEX0DAkOta5VZqN6FSS-w,623
24
+ sl_shared_assets/tools/ascension_tools.py,sha256=tRV_tpoQURDD03slrRdh12Qbf9_ZQo4RU0PgYbUWOc0,14620
25
+ sl_shared_assets/tools/ascension_tools.pyi,sha256=fs5j7nbnZ4WpgK8D75A7WJcvFMwK_MUO9ULIYo1YkGo,3739
26
+ sl_shared_assets/tools/packaging_tools.py,sha256=cLZu4GBwrmQcBtvcLUahY7UPsucK3_-MZzJdZk5aPMc,7540
27
+ sl_shared_assets/tools/packaging_tools.pyi,sha256=vgGbAQCExwg-0A5F72MzEhzHxu97Nqg1yuz-5P89ycU,3118
28
+ sl_shared_assets/tools/project_management_tools.py,sha256=VpGI4Vt0hBIZ1_6F6Hq9zESw3pR8cNffSJp9oCHQk1Y,28725
29
+ sl_shared_assets/tools/project_management_tools.pyi,sha256=AeBG-8XUygiJndfsBCKACKIZdnvk0avQRibWO24ahtM,10238
30
+ sl_shared_assets/tools/transfer_tools.py,sha256=J26kwOp_NpPSY0-xu5FTw9udte-rm_mW1FJyaTNoqQI,6606
31
+ sl_shared_assets/tools/transfer_tools.pyi,sha256=FoH7eYZe7guGHfPr0MK5ggO62uXKwD2aJ7h1Bu7PaEE,3294
32
+ sl_shared_assets-3.0.0.dist-info/METADATA,sha256=meOnoDUinqxwqgzCNLpflBVDm8ZQxYnGAAsVvnoSKYY,56944
33
+ sl_shared_assets-3.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ sl_shared_assets-3.0.0.dist-info/entry_points.txt,sha256=UmO1rl7ly9N7HWPwWyP9E0b5KBUStpBo4TRoqNtizDY,430
35
+ sl_shared_assets-3.0.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
36
+ sl_shared_assets-3.0.0.dist-info/RECORD,,