sl-shared-assets 1.2.0rc2__py3-none-any.whl → 1.2.0rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

sl_shared_assets/cli.py CHANGED
@@ -226,6 +226,26 @@ def generate_system_configuration_file(output_directory: str, acquisition_system
226
226
  required=True,
227
227
  help="The password to use for server authentication.",
228
228
  )
229
+ @click.option(
230
+ "-rdp",
231
+ "--raw_data_path",
232
+ type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
233
+ required=True,
234
+ help=(
235
+ "The absolute path to the directory used to store raw data from all Sun lab projects, relative to the server "
236
+ "root."
237
+ ),
238
+ )
239
+ @click.option(
240
+ "-pdp",
241
+ "--processed_data_path",
242
+ type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
243
+ required=True,
244
+ help=(
245
+ "The absolute path to the directory used to store processed data from all Sun lab projects, relative to the "
246
+ "server root."
247
+ ),
248
+ )
229
249
  def generate_server_credentials_file(output_directory: str, host: str, username: str, password: str) -> None:
230
250
  """Generates a new server_credentials.yaml file under the specified directory, using input information.
231
251
 
@@ -111,12 +111,6 @@ class MesoscopePaths:
111
111
  sharing protocol, such as SMB."""
112
112
  harvesters_cti_path: Path = Path("/opt/mvIMPACT_Acquire/lib/x86_64/mvGenTLProducer.cti")
113
113
  """The path to the GeniCam CTI file used to connect to Harvesters-managed cameras."""
114
- server_processed_data_root: Path = Path("/workdir/sun_data")
115
- """The absolute path to the BioHPC server directory used to store the processed data from all Sun lab projects.
116
- This path is relative to the server root and is only used when submitting remote jobs to the server."""
117
- server_raw_data_root: Path = Path("/storage/sun_data")
118
- """The absolute path to the BioHPC server directory used to store the raw data from all Sun lab projects.
119
- This path is relative to the server root and is only used when submitting remote jobs to the server."""
120
114
 
121
115
 
122
116
  @dataclass()
@@ -304,8 +298,6 @@ class MesoscopeSystemConfiguration(YamlConfig):
304
298
  self.paths.nas_directory = Path(self.paths.nas_directory)
305
299
  self.paths.mesoscope_directory = Path(self.paths.mesoscope_directory)
306
300
  self.paths.harvesters_cti_path = Path(self.paths.harvesters_cti_path)
307
- self.paths.server_processed_data_root = Path(self.paths.server_processed_data_root)
308
- self.paths.server_raw_data_root = Path(self.paths.server_raw_data_root)
309
301
 
310
302
  # Converts valve_calibration data from dictionary to a tuple of tuples format
311
303
  if not isinstance(self.microcontrollers.valve_calibration_data, tuple):
@@ -354,8 +346,6 @@ class MesoscopeSystemConfiguration(YamlConfig):
354
346
  original.paths.nas_directory = str(original.paths.nas_directory) # type: ignore
355
347
  original.paths.mesoscope_directory = str(original.paths.mesoscope_directory) # type: ignore
356
348
  original.paths.harvesters_cti_path = str(original.paths.harvesters_cti_path) # type: ignore
357
- original.paths.server_processed_data_root = str(original.paths.server_processed_data_root) # type: ignore
358
- original.paths.server_raw_data_root = str(original.paths.server_raw_data_root) # type: ignore
359
349
 
360
350
  # Converts valve calibration data into dictionary format
361
351
  if isinstance(original.microcontrollers.valve_calibration_data, tuple):
@@ -58,8 +58,6 @@ class MesoscopePaths:
58
58
  nas_directory: Path = ...
59
59
  mesoscope_directory: Path = ...
60
60
  harvesters_cti_path: Path = ...
61
- server_processed_data_root: Path = ...
62
- server_raw_data_root: Path = ...
63
61
 
64
62
  @dataclass()
65
63
  class MesoscopeCameras:
@@ -171,9 +171,6 @@ class MesoscopeExperimentDescriptor(YamlConfig):
171
171
  """The weight of the animal, in grams, at the beginning of the session."""
172
172
  dispensed_water_volume_ml: float
173
173
  """Stores the total water volume, in milliliters, dispensed during runtime."""
174
- is_guided: bool = False
175
- """Determines whether the animal has to lick in the reward zone to receive water rewards. If this is set to False,
176
- the system automatically dispenses water when the animal enters the reward zone."""
177
174
  experimenter_notes: str = "Replace this with your notes."
178
175
  """This field is not set during runtime. It is expected that each experimenter will replace this field with their
179
176
  notes made during runtime."""
@@ -88,7 +88,6 @@ class MesoscopeExperimentDescriptor(YamlConfig):
88
88
  experimenter: str
89
89
  mouse_weight_g: float
90
90
  dispensed_water_volume_ml: float
91
- is_guided: bool = ...
92
91
  experimenter_notes: str = ...
93
92
  experimenter_given_water_volume_ml: float = ...
94
93
  incomplete: bool = ...
@@ -21,16 +21,35 @@ from .job import Job
21
21
 
22
22
 
23
23
  def generate_server_credentials(
24
- output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
24
+ output_directory: Path,
25
+ username: str,
26
+ password: str,
27
+ host: str = "cbsuwsun.biohpc.cornell.edu",
28
+ raw_data_root: str = "/workdir/sun_data",
29
+ processed_data_root: str = "/storage/sun_data",
25
30
  ) -> None:
26
31
  """Generates a new server_credentials.yaml file under the specified directory, using input information.
27
32
 
28
33
  This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
29
34
  only used when setting up new host-computers in the lab.
35
+
36
+ Args:
37
+ output_directory: The directory where to save the generated server_credentials.yaml file.
38
+ username: The username to use for server authentication.
39
+ password: The password to use for server authentication.
40
+ host: The hostname or IP address of the server to connect to.
41
+ raw_data_root: The path to the root directory used to store the raw data from all Sun lab projects on the
42
+ server.
43
+ processed_data_root: The path to the root directory used to store the processed data from all Sun lab projects
44
+ on the server.
30
45
  """
31
- ServerCredentials(username=username, password=password, host=host).to_yaml(
32
- file_path=output_directory.joinpath("server_credentials.yaml")
33
- )
46
+ ServerCredentials(
47
+ username=username,
48
+ password=password,
49
+ host=host,
50
+ raw_data_root=raw_data_root,
51
+ processed_data_root=processed_data_root,
52
+ ).to_yaml(file_path=output_directory.joinpath("server_credentials.yaml"))
34
53
 
35
54
 
36
55
  @dataclass()
@@ -49,6 +68,11 @@ class ServerCredentials(YamlConfig):
49
68
  """The password to use for server authentication."""
50
69
  host: str = "cbsuwsun.biohpc.cornell.edu"
51
70
  """The hostname or IP address of the server to connect to."""
71
+ raw_data_root: str = "/workdir/sun_data"
72
+ """The path to the root directory used to store the raw data from all Sun lab projects on the target server."""
73
+ processed_data_root: str = "/storage/sun_data"
74
+ """The path to the root directory used to store the processed data from all Sun lab projects on the target
75
+ server."""
52
76
 
53
77
 
54
78
  class Server:
@@ -248,3 +272,17 @@ class Server:
248
272
  # Prevents closing already closed connections
249
273
  if self._open:
250
274
  self._client.close()
275
+
276
+ @property
277
+ def raw_data_root(self) -> str:
278
+ """Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
279
+ accessible through this class.
280
+ """
281
+ return self._credentials.raw_data_root
282
+
283
+ @property
284
+ def processed_data_root(self) -> str:
285
+ """Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
286
+ server accessible through this class.
287
+ """
288
+ return self._credentials.processed_data_root
@@ -8,12 +8,27 @@ from ataraxis_data_structures import YamlConfig
8
8
  from .job import Job as Job
9
9
 
10
10
  def generate_server_credentials(
11
- output_directory: Path, username: str, password: str, host: str = "cbsuwsun.biohpc.cornell.edu"
11
+ output_directory: Path,
12
+ username: str,
13
+ password: str,
14
+ host: str = "cbsuwsun.biohpc.cornell.edu",
15
+ raw_data_root: str = "/workdir/sun_data",
16
+ processed_data_root: str = "/storage/sun_data",
12
17
  ) -> None:
13
18
  """Generates a new server_credentials.yaml file under the specified directory, using input information.
14
19
 
15
20
  This function provides a convenience interface for generating new BioHPC server credential files. Generally, this is
16
21
  only used when setting up new host-computers in the lab.
22
+
23
+ Args:
24
+ output_directory: The directory where to save the generated server_credentials.yaml file.
25
+ username: The username to use for server authentication.
26
+ password: The password to use for server authentication.
27
+ host: The hostname or IP address of the server to connect to.
28
+ raw_data_root: The path to the root directory used to store the raw data from all Sun lab projects on the
29
+ server.
30
+ processed_data_root: The path to the root directory used to store the processed data from all Sun lab projects
31
+ on the server.
17
32
  """
18
33
  @dataclass()
19
34
  class ServerCredentials(YamlConfig):
@@ -28,6 +43,8 @@ class ServerCredentials(YamlConfig):
28
43
  username: str = ...
29
44
  password: str = ...
30
45
  host: str = ...
46
+ raw_data_root: str = ...
47
+ processed_data_root: str = ...
31
48
 
32
49
  class Server:
33
50
  """Encapsulates access to the Sun lab BioHPC processing server.
@@ -114,3 +131,13 @@ class Server:
114
131
 
115
132
  This method has to be called before destroying the class instance to ensure proper resource cleanup.
116
133
  """
134
+ @property
135
+ def raw_data_root(self) -> str:
136
+ """Returns the absolute path to the directory used to store the raw data for all Sun lab projects on the server
137
+ accessible through this class.
138
+ """
139
+ @property
140
+ def processed_data_root(self) -> str:
141
+ """Returns the absolute path to the directory used to store the processed data for all Sun lab projects on the
142
+ server accessible through this class.
143
+ """
@@ -3,13 +3,212 @@ functionality of SessionData class via a convenient API that allows working with
3
3
  up a given project."""
4
4
 
5
5
  from pathlib import Path
6
+ from datetime import datetime
6
7
 
8
+ import pytz
7
9
  import polars as pl
8
10
  from ataraxis_base_utilities import console
9
11
 
10
- from ..data_classes import SessionData, ProcessingTracker
12
+ from ..data_classes import (
13
+ SessionData,
14
+ ProcessingTracker,
15
+ RunTrainingDescriptor,
16
+ LickTrainingDescriptor,
17
+ MesoscopeExperimentDescriptor,
18
+ )
11
19
  from .packaging_tools import calculate_directory_checksum
12
20
 
21
+ _valid_session_types = {"lick training", "run training", "mesoscope experiment", "window checking"}
22
+
23
+
24
+ class ProjectManifest:
25
+ """Wraps the contents of a Sun lab project manifest .feather file and exposes methods for visualizing and
26
+ working with the data stored inside the file.
27
+
28
+ This class functions as a high-level API for working with Sun lab projects. It is used both to visualize the
29
+ current state of various projects and during automated data processing to determine which processing steps to
30
+ apply to different sessions.
31
+
32
+ Args:
33
+ manifest_file: The path to the .feather manifest file that stores the target project's state data.
34
+
35
+ Attributes:
36
+ _data: Stores the manifest data as a Polars DataFrame.
37
+ _animal_string: Determines whether animal IDs are stored as strings or unsigned integers.
38
+ """
39
+
40
+ def __init__(self, manifest_file: Path):
41
+ # Reads the data from the target manifest file into the class attribute
42
+ self._data: pl.DataFrame = pl.read_ipc(source=manifest_file, use_pyarrow=True)
43
+
44
+ # Determines whether animal IDs are stored as strings or as numbers
45
+ self._animal_string = False
46
+ schema = self._data.collect_schema()
47
+ if isinstance(schema["animal"], pl.String):
48
+ self._animal_string = True
49
+
50
+ def print_data(self) -> None:
51
+ """Prints the entire contents of the manifest file to the terminal."""
52
+ with pl.Config(
53
+ set_tbl_rows=-1, # Displays all rows (-1 means unlimited)
54
+ set_tbl_cols=-1, # Displays all columns (-1 means unlimited)
55
+ set_tbl_hide_column_data_types=True,
56
+ set_tbl_cell_alignment="LEFT",
57
+ set_tbl_width_chars=250, # Sets table width to 200 characters
58
+ set_fmt_str_lengths=600, # Allows longer strings to display properly (default is 32)
59
+ ):
60
+ print(self._data)
61
+
62
+ def print_summary(self, animal: str | int | None = None) -> None:
63
+ """Prints a summary view of the manifest file to the terminal, excluding the 'experimenter notes' data for
64
+ each session.
65
+
66
+ This data view is optimized for tracking which processing steps have been applied to each session inside the
67
+ project.
68
+
69
+ Args:
70
+ animal: The ID of the animal for which to display the data. If an ID is provided, this method will only
71
+ display the data for that animal. Otherwise, it will display the data for all animals.
72
+ """
73
+ summary_cols = [
74
+ "animal",
75
+ "date",
76
+ "session",
77
+ "type",
78
+ "complete",
79
+ "integrity_verification",
80
+ "suite2p_processing",
81
+ "behavior_processing",
82
+ "video_processing",
83
+ "dataset_formation",
84
+ ]
85
+
86
+ # Retrieves the data
87
+ df = self._data.select(summary_cols)
88
+
89
+ # Optionally filters the data for the target animal
90
+ if animal is not None:
91
+ # Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
92
+ if self._animal_string:
93
+ animal = str(animal)
94
+ else:
95
+ animal = int(animal)
96
+ df = df.filter(pl.col("animal") == animal)
97
+
98
+ # Ensures the data displays properly
99
+ with pl.Config(
100
+ set_tbl_rows=-1,
101
+ set_tbl_cols=-1,
102
+ set_tbl_width_chars=250,
103
+ set_tbl_hide_column_data_types=True,
104
+ set_tbl_cell_alignment="CENTER",
105
+ ):
106
+ print(df)
107
+
108
+ def print_notes(self, animal: str | int | None = None) -> None:
109
+ """Prints only animal, session, and notes data from the manifest file.
110
+
111
+ This data view is optimized for experimenters to check what sessions have been recorded for each animal in the
112
+ project and refresh their memory on the outcomes of each session using experimenter notes.
113
+
114
+ Args:
115
+ animal: The ID of the animal for which to display the data. If an ID is provided, this method will only
116
+ display the data for that animal. Otherwise, it will display the data for all animals.
117
+ """
118
+
119
+ # Pre-selects the columns to display
120
+ df = self._data.select(["animal", "date", "session", "type", "notes"])
121
+
122
+ # Optionally filters the data for the target animal
123
+ if animal is not None:
124
+ # Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
125
+ if self._animal_string:
126
+ animal = str(animal)
127
+ else:
128
+ animal = int(animal)
129
+
130
+ df = df.filter(pl.col("animal") == animal)
131
+
132
+ # Prints the extracted data
133
+ with pl.Config(
134
+ set_tbl_rows=-1,
135
+ set_tbl_cols=-1,
136
+ set_tbl_hide_column_data_types=True,
137
+ set_tbl_cell_alignment="LEFT",
138
+ set_tbl_width_chars=250, # Wider columns for notes
139
+ set_fmt_str_lengths=600, # Allows very long strings for notes
140
+ ):
141
+ print(df)
142
+
143
+ @property
144
+ def animals(self) -> tuple[str, ...]:
145
+ """Returns all unique animal IDs stored inside the manifest file.
146
+
147
+ This provides a tuple of all animal IDs participating in the target project.
148
+ """
149
+ return tuple(self._data.select("animal").unique().sort("animal").to_series().to_list())
150
+
151
+ @property
152
+ def sessions(self) -> tuple[str, ...]:
153
+ """Returns all session IDs stored inside the manifest file.
154
+
155
+ This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
156
+ of the target project.
157
+ """
158
+ return tuple(self._data.select("session").sort("session").to_series().to_list())
159
+
160
+ def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
161
+ """Returns all session IDs for the target animal.
162
+
163
+ This provides a tuple of all sessions performed by the target animal as part of the target project.
164
+
165
+ Args:
166
+ animal: The ID of the animal for which to get the session data.
167
+ exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
168
+ list.
169
+
170
+ Raises:
171
+ ValueError: If the specified animal is not found in the manifest file.
172
+ """
173
+
174
+ # Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
175
+ if self._animal_string:
176
+ animal = str(animal)
177
+ else:
178
+ animal = int(animal)
179
+
180
+ if animal not in self.animals:
181
+ message = f"Animal ID '{animal}' not found in manifest. Available animals: {self.animals}"
182
+ console.error(message=message, error=ValueError)
183
+
184
+ # Filters by animal ID
185
+ data = self._data.filter(pl.col("animal") == animal)
186
+
187
+ # Optionally filters out incomplete sessions
188
+ if exclude_incomplete:
189
+ data = data.filter(pl.col("complete") == 1)
190
+
191
+ # Formats and returns session IDs to the caller
192
+ sessions = data.select("session").sort("session").to_series().to_list()
193
+ return tuple(sessions)
194
+
195
+ def get_session_info(self, animal: str | int, session: str) -> pl.DataFrame:
196
+ """Returns a Polars DataFrame that stores detailed information for the specified session and animal combination.
197
+
198
+ Args:
199
+ animal: The ID of the animal for which to retrieve the data.
200
+ session: The ID of the session for which to retrieve the data.
201
+ """
202
+ # Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
203
+ if self._animal_string:
204
+ animal = str(animal)
205
+ else:
206
+ animal = int(animal)
207
+
208
+ df = self._data
209
+ df = df.filter(pl.col("animal").eq(animal) & pl.col("session").eq(session))
210
+ return df
211
+
13
212
 
14
213
  def generate_project_manifest(
15
214
  raw_project_directory: Path, output_directory: Path, processed_project_directory: Path | None = None
@@ -18,8 +217,8 @@ def generate_project_manifest(
18
217
 
19
218
  This function evaluates the input project directory and builds the 'manifest' file for the project. The file
20
219
  includes the descriptive information about every session stored inside the input project folder and the state of
21
- session's data processing (which processing pipelines have been applied to each session). The file will be created
22
- under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
220
+ the session's data processing (which processing pipelines have been applied to each session). The file will be
221
+ created under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
23
222
 
24
223
  Notes:
25
224
  The manifest file is primarily used to capture and move project state information between machines, typically
@@ -42,7 +241,7 @@ def generate_project_manifest(
42
241
  )
43
242
  console.error(message=message, error=FileNotFoundError)
44
243
 
45
- # Finds all raw data directories
244
+ # Finds all session directories
46
245
  session_directories = [directory.parent for directory in raw_project_directory.rglob("raw_data")]
47
246
 
48
247
  if len(session_directories) == 0:
@@ -54,29 +253,35 @@ def generate_project_manifest(
54
253
  console.error(message=message, error=FileNotFoundError)
55
254
 
56
255
  # Precreates the 'manifest' dictionary structure
57
- manifest: dict[str, list[str | bool]] = {
256
+ manifest: dict[str, list[str | bool | datetime | int]] = {
58
257
  "animal": [], # Animal IDs.
59
258
  "session": [], # Session names.
259
+ "date": [], # Session names stored as timezone-aware date-time objects in EST.
60
260
  "type": [], # Type of the session (e.g., Experiment, Training, etc.).
61
- "raw_data": [], # Server-side raw_data folder path.
62
- "processed_data": [], # Server-side processed_data folder path.
63
- # Determines whether the session data is complete. Incomplete sessions are excluded from processing.
261
+ "notes": [], # The experimenter notes about the session.
262
+ # Determines whether the session data is complete (ran for the intended duration and has all expected data).
64
263
  "complete": [],
65
- # Determines whether the session data integrity has been verified upon transfer to storage machine.
264
+ # Determines whether the session data integrity has been verified upon transfer to a storage machine.
66
265
  "integrity_verification": [],
67
266
  "suite2p_processing": [], # Determines whether the session has been processed with the single-day s2p pipeline.
68
- "dataset_formation": [], # Determines whether the session's data has been integrated into a dataset.
69
267
  # Determines whether the session has been processed with the behavior extraction pipeline.
70
268
  "behavior_processing": [],
71
269
  "video_processing": [], # Determines whether the session has been processed with the DeepLabCut pipeline.
270
+ "dataset_formation": [], # Determines whether the session's data has been integrated into a dataset.
72
271
  }
73
272
 
74
273
  # Loops over each session of every animal in the project and extracts session ID information and information
75
274
  # about which processing steps have been successfully applied to the session.
76
275
  for directory in session_directories:
276
+ # Skips processing directories without files (sessions with empty raw-data directories)
277
+ if len([file for file in directory.joinpath("raw_data").glob("*")]) == 0:
278
+ continue
279
+
77
280
  # Instantiates the SessionData instance to resolve the paths to all session's data files and locations.
78
281
  session_data = SessionData.load(
79
- session_path=directory, processed_data_root=processed_project_directory, make_processed_data_directory=False
282
+ session_path=directory,
283
+ processed_data_root=processed_project_directory,
284
+ make_processed_data_directory=False,
80
285
  )
81
286
 
82
287
  # Fills the manifest dictionary with data for the processed session:
@@ -85,8 +290,44 @@ def generate_project_manifest(
85
290
  manifest["animal"].append(session_data.animal_id)
86
291
  manifest["session"].append(session_data.session_name)
87
292
  manifest["type"].append(session_data.session_type)
88
- manifest["raw_data"].append(str(session_data.raw_data.raw_data_path))
89
- manifest["processed_data"].append(str(session_data.processed_data.processed_data_path))
293
+
294
+ # Parses session name into the date-time object to simplify working with date-time data in the future
295
+ date_time_components = session_data.session_name.split("-")
296
+ date_time = datetime(
297
+ year=int(date_time_components[0]),
298
+ month=int(date_time_components[1]),
299
+ day=int(date_time_components[2]),
300
+ hour=int(date_time_components[3]),
301
+ minute=int(date_time_components[4]),
302
+ second=int(date_time_components[5]),
303
+ microsecond=int(date_time_components[6]),
304
+ tzinfo=pytz.UTC,
305
+ )
306
+
307
+ # Converts from UTC to EST / EDT for user convenience
308
+ eastern = pytz.timezone("America/New_York")
309
+ date_time = date_time.astimezone(eastern)
310
+ manifest["date"].append(date_time)
311
+
312
+ # Depending on the session type, instantiates the appropriate descriptor instance and uses it to read the
313
+ # experimenter notes
314
+ if session_data.session_type == "lick training":
315
+ descriptor: LickTrainingDescriptor = LickTrainingDescriptor.from_yaml( # type: ignore
316
+ file_path=session_data.raw_data.session_descriptor_path
317
+ )
318
+ manifest["notes"].append(descriptor.experimenter_notes)
319
+ elif session_data.session_type == "run training":
320
+ descriptor: RunTrainingDescriptor = RunTrainingDescriptor.from_yaml( # type: ignore
321
+ file_path=session_data.raw_data.session_descriptor_path
322
+ )
323
+ manifest["notes"].append(descriptor.experimenter_notes)
324
+ elif session_data.session_type == "mesoscope experiment":
325
+ descriptor: MesoscopeExperimentDescriptor = MesoscopeExperimentDescriptor.from_yaml( # type: ignore
326
+ file_path=session_data.raw_data.session_descriptor_path
327
+ )
328
+ manifest["notes"].append(descriptor.experimenter_notes)
329
+ elif session_data.session_type == "window checking":
330
+ manifest["notes"].append("N/A")
90
331
 
91
332
  # If the session raw_data folder contains the telomere.bin file, marks the session as complete.
92
333
  manifest["complete"].append(session_data.raw_data.telomere_path.exists())
@@ -96,9 +337,9 @@ def generate_project_manifest(
96
337
  manifest["integrity_verification"].append(tracker.is_complete)
97
338
 
98
339
  # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing is
99
- # disabled for incomplete sessions. If the session unverified, the case is even more severe, as its data may be
100
- # corrupted.
101
- if not manifest["complete"][-1] or not not manifest["verified"][-1]:
340
+ # disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its data may
341
+ # be corrupted.
342
+ if not manifest["complete"][-1] or not manifest["integrity_verification"][-1]:
102
343
  manifest["suite2p_processing"].append(False)
103
344
  manifest["dataset_formation"].append(False)
104
345
  manifest["behavior_processing"].append(False)
@@ -118,24 +359,34 @@ def generate_project_manifest(
118
359
  manifest["behavior_processing"].append(tracker.is_complete)
119
360
 
120
361
  # DeepLabCut (video) processing status.
121
- tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
362
+ tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
122
363
  manifest["video_processing"].append(tracker.is_complete)
123
364
 
124
- # Converts the manifest dictionary to a Polars Dataframe
365
+ # If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
366
+ # them as strings. The latter options are primarily kept for compatibility with Tyche data
367
+ animal_type: type[pl.UInt64] | type[pl.String]
368
+ if all([str(animal).isdigit() for animal in manifest["animal"]]):
369
+ # Converts all strings to integers
370
+ manifest["animal"] = [int(animal) for animal in manifest["animal"]] # type: ignore
371
+ animal_type = pl.UInt64 # Uint64 for future proofing
372
+ else:
373
+ animal_type = pl.String
374
+
375
+ # Converts the manifest dictionary to a Polars Dataframe.
125
376
  schema = {
126
- "animal": pl.String,
377
+ "animal": animal_type,
378
+ "date": pl.Datetime,
127
379
  "session": pl.String,
128
- "raw_data": pl.String,
129
- "processed_data": pl.String,
130
380
  "type": pl.String,
131
- "complete": pl.Boolean,
132
- "integrity_verification": pl.Boolean,
133
- "suite2p_processing": pl.Boolean,
134
- "dataset_formation": pl.Boolean,
135
- "behavior_processing": pl.Boolean,
136
- "video_processing": pl.Boolean,
381
+ "notes": pl.String,
382
+ "complete": pl.UInt8,
383
+ "integrity_verification": pl.UInt8,
384
+ "suite2p_processing": pl.UInt8,
385
+ "dataset_formation": pl.UInt8,
386
+ "behavior_processing": pl.UInt8,
387
+ "video_processing": pl.UInt8,
137
388
  }
138
- df = pl.DataFrame(manifest, schema=schema)
389
+ df = pl.DataFrame(manifest, schema=schema, strict=False)
139
390
 
140
391
  # Sorts the DataFrame by animal and then session. Since we assign animal IDs sequentially and 'name' sessions based
141
392
  # on acquisition timestamps, the sort order is chronological.
@@ -158,8 +409,8 @@ def verify_session_checksum(
158
409
  matches and to remove the 'telomere.bin' and 'verified.bin' marker files if it does not.
159
410
 
160
411
  Notes:
161
- Removing the telomere.bin marker file from session's raw_data folder marks the session as incomplete, excluding
162
- it from all further automatic processing.
412
+ Removing the telomere.bin marker file from the session's raw_data folder marks the session as incomplete,
413
+ excluding it from all further automatic processing.
163
414
 
164
415
  This function is also used to create the processed data hierarchy on the BioHPC server, when it is called as
165
416
  part of the data preprocessing runtime performed by a data acquisition system.
@@ -1,11 +1,95 @@
1
1
  from pathlib import Path
2
2
 
3
+ import polars as pl
4
+ from _typeshed import Incomplete
5
+
3
6
  from ..data_classes import (
4
7
  SessionData as SessionData,
5
8
  ProcessingTracker as ProcessingTracker,
9
+ RunTrainingDescriptor as RunTrainingDescriptor,
10
+ LickTrainingDescriptor as LickTrainingDescriptor,
11
+ MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
6
12
  )
7
13
  from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
8
14
 
15
+ _valid_session_types: Incomplete
16
+
17
+ class ProjectManifest:
18
+ """Wraps the contents of a Sun lab project manifest .feather file and exposes methods for visualizing and
19
+ working with the data stored inside the file.
20
+
21
+ This class functions as a high-level API for working with Sun lab projects. It is used both to visualize the
22
+ current state of various projects and during automated data processing to determine which processing steps to
23
+ apply to different sessions.
24
+
25
+ Args:
26
+ manifest_file: The path to the .feather manifest file that stores the target project's state data.
27
+
28
+ Attributes:
29
+ _data: Stores the manifest data as a Polars DataFrame.
30
+ _animal_string: Determines whether animal IDs are stored as strings or unsigned integers.
31
+ """
32
+
33
+ _data: pl.DataFrame
34
+ _animal_string: bool
35
+ def __init__(self, manifest_file: Path) -> None: ...
36
+ def print_data(self) -> None:
37
+ """Prints the entire contents of the manifest file to the terminal."""
38
+ def print_summary(self, animal: str | int | None = None) -> None:
39
+ """Prints a summary view of the manifest file to the terminal, excluding the 'experimenter notes' data for
40
+ each session.
41
+
42
+ This data view is optimized for tracking which processing steps have been applied to each session inside the
43
+ project.
44
+
45
+ Args:
46
+ animal: The ID of the animal for which to display the data. If an ID is provided, this method will only
47
+ display the data for that animal. Otherwise, it will display the data for all animals.
48
+ """
49
+ def print_notes(self, animal: str | int | None = None) -> None:
50
+ """Prints only animal, session, and notes data from the manifest file.
51
+
52
+ This data view is optimized for experimenters to check what sessions have been recorded for each animal in the
53
+ project and refresh their memory on the outcomes of each session using experimenter notes.
54
+
55
+ Args:
56
+ animal: The ID of the animal for which to display the data. If an ID is provided, this method will only
57
+ display the data for that animal. Otherwise, it will display the data for all animals.
58
+ """
59
+ @property
60
+ def animals(self) -> tuple[str, ...]:
61
+ """Returns all unique animal IDs stored inside the manifest file.
62
+
63
+ This provides a tuple of all animal IDs participating in the target project.
64
+ """
65
+ @property
66
+ def sessions(self) -> tuple[str, ...]:
67
+ """Returns all session IDs stored inside the manifest file.
68
+
69
+ This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
70
+ of the target project.
71
+ """
72
+ def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
73
+ """Returns all session IDs for the target animal.
74
+
75
+ This provides a tuple of all sessions performed by the target animal as part of the target project.
76
+
77
+ Args:
78
+ animal: The ID of the animal for which to get the session data.
79
+ exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
80
+ list.
81
+
82
+ Raises:
83
+ ValueError: If the specified animal is not found in the manifest file.
84
+ """
85
+ def get_session_info(self, animal: str | int, session: str) -> pl.DataFrame:
86
+ """Returns a Polars DataFrame that stores detailed information for the specified session and animal combination.
87
+
88
+ Args:
89
+ animal: The ID of the animal for which to retrieve the data.
90
+ session: The ID of the session for which to retrieve the data.
91
+ """
92
+
9
93
  def generate_project_manifest(
10
94
  raw_project_directory: Path, output_directory: Path, processed_project_directory: Path | None = None
11
95
  ) -> None:
@@ -13,8 +97,8 @@ def generate_project_manifest(
13
97
 
14
98
  This function evaluates the input project directory and builds the 'manifest' file for the project. The file
15
99
  includes the descriptive information about every session stored inside the input project folder and the state of
16
- session's data processing (which processing pipelines have been applied to each session). The file will be created
17
- under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
100
+ the session's data processing (which processing pipelines have been applied to each session). The file will be
101
+ created under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
18
102
 
19
103
  Notes:
20
104
  The manifest file is primarily used to capture and move project state information between machines, typically
@@ -41,8 +125,8 @@ def verify_session_checksum(
41
125
  matches and to remove the 'telomere.bin' and 'verified.bin' marker files if it does not.
42
126
 
43
127
  Notes:
44
- Removing the telomere.bin marker file from session's raw_data folder marks the session as incomplete, excluding
45
- it from all further automatic processing.
128
+ Removing the telomere.bin marker file from the session's raw_data folder marks the session as incomplete,
129
+ excluding it from all further automatic processing.
46
130
 
47
131
  This function is also used to create the processed data hierarchy on the BioHPC server, when it is called as
48
132
  part of the data preprocessing runtime performed by a data acquisition system.
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sl-shared-assets
3
- Version: 1.2.0rc2
3
+ Version: 1.2.0rc4
4
4
  Summary: Stores assets shared between multiple Sun (NeuroAI) lab data pipelines.
5
5
  Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
6
6
  Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
7
- Author: Ivan Kondratyev, Kushaan Gupta, Yuantao Deng, Natalie Yeung
7
+ Author: Ivan Kondratyev, Kushaan Gupta, Natalie Yeung
8
8
  Maintainer-email: Ivan Kondratyev <ik278@cornell.edu>
9
9
  License: GNU GENERAL PUBLIC LICENSE
10
10
  Version 3, 29 June 2007
@@ -690,19 +690,21 @@ Classifier: Programming Language :: Python :: 3.11
690
690
  Classifier: Programming Language :: Python :: 3.12
691
691
  Classifier: Programming Language :: Python :: 3.13
692
692
  Requires-Python: >=3.11
693
- Requires-Dist: appdirs<2,>=1
694
- Requires-Dist: ataraxis-base-utilities<4,>=3
695
- Requires-Dist: ataraxis-data-structures<4,>=3.1.1
696
- Requires-Dist: ataraxis-time<4,>=3
697
- Requires-Dist: click<9,>=8
698
- Requires-Dist: filelock<4,>=3
699
- Requires-Dist: natsort<9,>=8
700
- Requires-Dist: paramiko<4,>=3.5.1
701
- Requires-Dist: polars<2,>=1
702
- Requires-Dist: pyarrow<21,>=20
703
- Requires-Dist: simple-slurm<1,>=0
704
- Requires-Dist: tqdm<5,>=4
705
- Requires-Dist: xxhash<4,>=3
693
+ Requires-Dist: appdirs==1.4.4
694
+ Requires-Dist: ataraxis-base-utilities==3.0.1
695
+ Requires-Dist: ataraxis-data-structures==3.1.1
696
+ Requires-Dist: ataraxis-time==3.0.0
697
+ Requires-Dist: click==8.2.1
698
+ Requires-Dist: filelock==3.18.0
699
+ Requires-Dist: natsort==8.4.0
700
+ Requires-Dist: numpy==2.2.6
701
+ Requires-Dist: paramiko==3.5.1
702
+ Requires-Dist: polars==1.31.0
703
+ Requires-Dist: pyarrow==20.0.0
704
+ Requires-Dist: pytz==2025.2
705
+ Requires-Dist: simple-slurm==0.3.6
706
+ Requires-Dist: tqdm==4.67.1
707
+ Requires-Dist: xxhash==3.5.0
706
708
  Provides-Extra: conda
707
709
  Requires-Dist: grayskull<3,>=2; extra == 'conda'
708
710
  Requires-Dist: hatchling<2,>=1; extra == 'conda'
@@ -719,14 +721,16 @@ Requires-Dist: types-filelock<4,>=3; extra == 'conda'
719
721
  Requires-Dist: types-paramiko<4,>=3; extra == 'conda'
720
722
  Requires-Dist: types-tqdm<5,>=4; extra == 'conda'
721
723
  Provides-Extra: condarun
722
- Requires-Dist: appdirs<2,>=1; extra == 'condarun'
723
- Requires-Dist: click<9,>=8; extra == 'condarun'
724
- Requires-Dist: filelock<4,>=3; extra == 'condarun'
725
- Requires-Dist: natsort<9,>=8; extra == 'condarun'
726
- Requires-Dist: paramiko<4,>=3.5.1; extra == 'condarun'
727
- Requires-Dist: polars<2,>=1; extra == 'condarun'
728
- Requires-Dist: pyarrow<21,>=20; extra == 'condarun'
729
- Requires-Dist: tqdm<5,>=4; extra == 'condarun'
724
+ Requires-Dist: appdirs==1.4.4; extra == 'condarun'
725
+ Requires-Dist: click==8.2.1; extra == 'condarun'
726
+ Requires-Dist: filelock==3.18.0; extra == 'condarun'
727
+ Requires-Dist: natsort==8.4.0; extra == 'condarun'
728
+ Requires-Dist: numpy==2.2.6; extra == 'condarun'
729
+ Requires-Dist: paramiko==3.5.1; extra == 'condarun'
730
+ Requires-Dist: polars==1.31.0; extra == 'condarun'
731
+ Requires-Dist: pyarrow==20.0.0; extra == 'condarun'
732
+ Requires-Dist: pytz==2025.2; extra == 'condarun'
733
+ Requires-Dist: tqdm==4.67.1; extra == 'condarun'
730
734
  Provides-Extra: dev
731
735
  Requires-Dist: ataraxis-automation<6,>=5; extra == 'dev'
732
736
  Requires-Dist: build<2,>=1; extra == 'dev'
@@ -746,6 +750,7 @@ Requires-Dist: twine<7,>=6; extra == 'dev'
746
750
  Requires-Dist: types-appdirs<2,>=1; extra == 'dev'
747
751
  Requires-Dist: types-filelock<4,>=3; extra == 'dev'
748
752
  Requires-Dist: types-paramiko<4,>=3; extra == 'dev'
753
+ Requires-Dist: types-pytz<2026,>=2025; extra == 'dev'
749
754
  Requires-Dist: types-tqdm<5,>=4; extra == 'dev'
750
755
  Requires-Dist: uv<1,>=0; extra == 'dev'
751
756
  Provides-Extra: noconda
@@ -754,6 +759,7 @@ Requires-Dist: build<2,>=1; extra == 'noconda'
754
759
  Requires-Dist: sphinx-rtd-dark-mode<2,>=1; extra == 'noconda'
755
760
  Requires-Dist: tox-uv<2,>=1; extra == 'noconda'
756
761
  Requires-Dist: tox<5,>=4; extra == 'noconda'
762
+ Requires-Dist: types-pytz<2026,>=2025; extra == 'noconda'
757
763
  Requires-Dist: uv<1,>=0; extra == 'noconda'
758
764
  Description-Content-Type: text/markdown
759
765
 
@@ -1,14 +1,14 @@
1
1
  sl_shared_assets/__init__.py,sha256=_AOpxu9K_0px_xS07H8mqZeYlBS9aD75XBS0dofJzqw,2280
2
2
  sl_shared_assets/__init__.pyi,sha256=H1kPervb1A2BjG5EOLsLFQGUWFS_aHWy4cpL4_W71Fs,2525
3
- sl_shared_assets/cli.py,sha256=2HAgnD7hHnFp3R7_tJAfWBI_jRbhSuyDBFK3TGIHYsw,17771
3
+ sl_shared_assets/cli.py,sha256=SrzbcYbVQQ_fCz29t7JwOdY_ZSUJLHOuH4fJaIdDd1I,18395
4
4
  sl_shared_assets/cli.pyi,sha256=Fh8GZBSQzII_Iz6k5nLQOsVMbp7q1R5mp4KNZjdGflY,6119
5
5
  sl_shared_assets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  sl_shared_assets/data_classes/__init__.py,sha256=ixn972b-3URCinVLRPjMfDXpO2w24_NkEUUjdqByFrA,1890
7
7
  sl_shared_assets/data_classes/__init__.pyi,sha256=bDBLkyhlosB4t09GxHBNKH0kaVBhHSY_j-i3MD2iKVo,2088
8
- sl_shared_assets/data_classes/configuration_data.py,sha256=ZVk1ynk25CfVuQfMofoH90BUaOPqk7zW8ukY6ls_Pp0,30360
9
- sl_shared_assets/data_classes/configuration_data.pyi,sha256=h7AV3z73SC2ITXWcnsShczuezC1Is7L4WSMnEWGSLPQ,9617
10
- sl_shared_assets/data_classes/runtime_data.py,sha256=5aGp7HMwUUGUdRkkxC3ZA_G604h0ZDMYlFOHvuQGCeI,15719
11
- sl_shared_assets/data_classes/runtime_data.pyi,sha256=F151EwpuHorhIyvJ1MBmEC4dzfLZb2D1YaHwQ-qrDyY,6644
8
+ sl_shared_assets/data_classes/configuration_data.py,sha256=eL8eGl1EF2Sl8J4W6qB78L5r092qnnbEjiApxyK6lCw,29402
9
+ sl_shared_assets/data_classes/configuration_data.pyi,sha256=U-snwWQqYT5-zcd8s3ZV8xX27BEpgy9vKlXvie3NKSE,9537
10
+ sl_shared_assets/data_classes/runtime_data.py,sha256=Q7Ykf9hgrw1jYKXa53mn_LW8G2cPmLLuxgGkP6qQcc4,15483
11
+ sl_shared_assets/data_classes/runtime_data.pyi,sha256=PxaCbeF9COR4ri91pdzh7zVrqaz2KEDYB1EoLhZQC_c,6618
12
12
  sl_shared_assets/data_classes/session_data.py,sha256=DHfjGXvdMRsOl1fTgNFrF3u9THAQFtTruDU0tsd0y8c,51767
13
13
  sl_shared_assets/data_classes/session_data.pyi,sha256=ajVrNwGpk9TQj79WURVYpQ2Bhy-XZsau8VABBgtOzrY,16452
14
14
  sl_shared_assets/data_classes/surgery_data.py,sha256=qsMj3NkjhylAT9b_wHBY-1XwTu2xsZcZatdECmkA7Bs,7437
@@ -17,20 +17,20 @@ sl_shared_assets/server/__init__.py,sha256=nyX6-9ACcrQeRQOCNvBVrWSTHGjRPANIG_u0a
17
17
  sl_shared_assets/server/__init__.pyi,sha256=7o99f8uf6NuBjMZjNAM1FX69Qbu5uBluRSAyaUWbXOU,263
18
18
  sl_shared_assets/server/job.py,sha256=GB31yYPEqXR6MgwNmnQrSQuHRJqUHFXcd6p7hb38q_c,7928
19
19
  sl_shared_assets/server/job.pyi,sha256=cxgHMpuwHsJGf_ZcTSSa2tZNzeR_GxqlICOsYGV_oy0,5655
20
- sl_shared_assets/server/server.py,sha256=s2lOrOxcBGQsELKrWu9yCX4Ga5olyLNmWLSCOFyyC44,11002
21
- sl_shared_assets/server/server.pyi,sha256=h8wI9rMcEuGLrJulndUjASM7E_nU4G6gXnjPge6mWHg,5263
20
+ sl_shared_assets/server/server.py,sha256=DR0nEO1nZgiLzdG958xmQasRRJ5PCQP9JXdCtBE08iU,12700
21
+ sl_shared_assets/server/server.pyi,sha256=4ZpZXkpVC7Zqksq485HgWP8voCFx-Q4VK4mLalgpwvc,6481
22
22
  sl_shared_assets/tools/__init__.py,sha256=N95ZPMz-_HdNPrbVieCFza-QSVS6BV2KRB4K1OLRttc,636
23
23
  sl_shared_assets/tools/__init__.pyi,sha256=xeDF8itMc0JRgLrO_IN_9gW7cp_Ld-Gf-rjtrgWvQ2I,551
24
24
  sl_shared_assets/tools/ascension_tools.py,sha256=kIqYGX9F8lRao_LaVOacIiT8J9SypTvarb9mgaI9ZPs,15387
25
25
  sl_shared_assets/tools/ascension_tools.pyi,sha256=tQCDdWZ20ZjUjpMs8aGIN0yBg5ff3j6spi62b3Han4o,3755
26
26
  sl_shared_assets/tools/packaging_tools.py,sha256=c9U0bKB6Btj7sfyeU7xx2Jiqv930qTnXbm0ZbNR-o2k,7594
27
27
  sl_shared_assets/tools/packaging_tools.pyi,sha256=vgGbAQCExwg-0A5F72MzEhzHxu97Nqg1yuz-5P89ycU,3118
28
- sl_shared_assets/tools/project_management_tools.py,sha256=DgMKd6i3iLG4lwVgcCgQeO8jZEfVoWFKU6882JrDvL4,11993
29
- sl_shared_assets/tools/project_management_tools.pyi,sha256=f_3O8UjnfHRMEe2iZpQxKK9Vb0_lJB2yI1WcJPUqGEU,3498
28
+ sl_shared_assets/tools/project_management_tools.py,sha256=UzvDFvJ8ZohUQlsZya0GdrtUlUQnOMFJEJY5CUXIW3U,22706
29
+ sl_shared_assets/tools/project_management_tools.pyi,sha256=sxjhQzeZ4vIcNwIDtFXYxN9jbTQb-PbCOPZL5P71xa8,7440
30
30
  sl_shared_assets/tools/transfer_tools.py,sha256=J26kwOp_NpPSY0-xu5FTw9udte-rm_mW1FJyaTNoqQI,6606
31
31
  sl_shared_assets/tools/transfer_tools.pyi,sha256=FoH7eYZe7guGHfPr0MK5ggO62uXKwD2aJ7h1Bu7PaEE,3294
32
- sl_shared_assets-1.2.0rc2.dist-info/METADATA,sha256=wiTJc7nWlmRmTlAf_5-oIcZIuqvb51g5gt8QsZ7l6-U,49080
33
- sl_shared_assets-1.2.0rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
- sl_shared_assets-1.2.0rc2.dist-info/entry_points.txt,sha256=76c00fRS4IuXBP2xOBdvycT15Zen-lHiDg2FaSt-HB4,547
35
- sl_shared_assets-1.2.0rc2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
36
- sl_shared_assets-1.2.0rc2.dist-info/RECORD,,
32
+ sl_shared_assets-1.2.0rc4.dist-info/METADATA,sha256=IO6x6Y5-KymLtssNsRRlyGjHxI181b9zBpJF7qwDSlA,49345
33
+ sl_shared_assets-1.2.0rc4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ sl_shared_assets-1.2.0rc4.dist-info/entry_points.txt,sha256=76c00fRS4IuXBP2xOBdvycT15Zen-lHiDg2FaSt-HB4,547
35
+ sl_shared_assets-1.2.0rc4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
36
+ sl_shared_assets-1.2.0rc4.dist-info/RECORD,,