sl-shared-assets 1.2.0rc3__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/cli.py +22 -206
- sl_shared_assets/cli.pyi +0 -39
- sl_shared_assets/data_classes/configuration_data.py +0 -10
- sl_shared_assets/data_classes/configuration_data.pyi +0 -2
- sl_shared_assets/data_classes/runtime_data.py +4 -3
- sl_shared_assets/data_classes/runtime_data.pyi +1 -1
- sl_shared_assets/data_classes/session_data.py +11 -18
- sl_shared_assets/data_classes/session_data.pyi +319 -0
- sl_shared_assets/server/server.py +42 -4
- sl_shared_assets/server/server.pyi +28 -1
- sl_shared_assets/tools/project_management_tools.py +283 -30
- sl_shared_assets/tools/project_management_tools.pyi +95 -4
- {sl_shared_assets-1.2.0rc3.dist-info → sl_shared_assets-2.0.0.dist-info}/METADATA +8 -2
- {sl_shared_assets-1.2.0rc3.dist-info → sl_shared_assets-2.0.0.dist-info}/RECORD +17 -16
- {sl_shared_assets-1.2.0rc3.dist-info → sl_shared_assets-2.0.0.dist-info}/entry_points.txt +0 -3
- {sl_shared_assets-1.2.0rc3.dist-info → sl_shared_assets-2.0.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-1.2.0rc3.dist-info → sl_shared_assets-2.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,13 +3,214 @@ functionality of SessionData class via a convenient API that allows working with
|
|
|
3
3
|
up a given project."""
|
|
4
4
|
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from datetime import datetime
|
|
6
7
|
|
|
8
|
+
import pytz
|
|
7
9
|
import polars as pl
|
|
8
10
|
from ataraxis_base_utilities import console
|
|
9
11
|
|
|
10
|
-
from ..data_classes import
|
|
12
|
+
from ..data_classes import (
|
|
13
|
+
SessionData,
|
|
14
|
+
ProcessingTracker,
|
|
15
|
+
RunTrainingDescriptor,
|
|
16
|
+
LickTrainingDescriptor,
|
|
17
|
+
MesoscopeExperimentDescriptor,
|
|
18
|
+
)
|
|
11
19
|
from .packaging_tools import calculate_directory_checksum
|
|
12
20
|
|
|
21
|
+
_valid_session_types = {"lick training", "run training", "mesoscope experiment", "window checking"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ProjectManifest:
|
|
25
|
+
"""Wraps the contents of a Sun lab project manifest .feather file and exposes methods for visualizing and
|
|
26
|
+
working with the data stored inside the file.
|
|
27
|
+
|
|
28
|
+
This class functions as a high-level API for working with Sun lab projects. It is used both to visualize the
|
|
29
|
+
current state of various projects and during automated data processing to determine which processing steps to
|
|
30
|
+
apply to different sessions.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
manifest_file: The path to the .feather manifest file that stores the target project's state data.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
_data: Stores the manifest data as a Polars DataFrame.
|
|
37
|
+
_animal_string: Determines whether animal IDs are stored as strings or unsigned integers.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, manifest_file: Path):
|
|
41
|
+
# Reads the data from the target manifest file into the class attribute
|
|
42
|
+
self._data: pl.DataFrame = pl.read_ipc(source=manifest_file, use_pyarrow=True)
|
|
43
|
+
|
|
44
|
+
# Determines whether animal IDs are stored as strings or as numbers
|
|
45
|
+
self._animal_string = False
|
|
46
|
+
schema = self._data.collect_schema()
|
|
47
|
+
if isinstance(schema["animal"], pl.String):
|
|
48
|
+
self._animal_string = True
|
|
49
|
+
|
|
50
|
+
def print_data(self) -> None:
|
|
51
|
+
"""Prints the entire contents of the manifest file to the terminal."""
|
|
52
|
+
with pl.Config(
|
|
53
|
+
set_tbl_rows=-1, # Displays all rows (-1 means unlimited)
|
|
54
|
+
set_tbl_cols=-1, # Displays all columns (-1 means unlimited)
|
|
55
|
+
set_tbl_hide_column_data_types=True,
|
|
56
|
+
set_tbl_cell_alignment="LEFT",
|
|
57
|
+
set_tbl_width_chars=250, # Sets table width to 200 characters
|
|
58
|
+
set_fmt_str_lengths=600, # Allows longer strings to display properly (default is 32)
|
|
59
|
+
):
|
|
60
|
+
print(self._data)
|
|
61
|
+
|
|
62
|
+
def print_summary(self, animal: str | int | None = None) -> None:
|
|
63
|
+
"""Prints a summary view of the manifest file to the terminal, excluding the 'experimenter notes' data for
|
|
64
|
+
each session.
|
|
65
|
+
|
|
66
|
+
This data view is optimized for tracking which processing steps have been applied to each session inside the
|
|
67
|
+
project.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
animal: The ID of the animal for which to display the data. If an ID is provided, this method will only
|
|
71
|
+
display the data for that animal. Otherwise, it will display the data for all animals.
|
|
72
|
+
"""
|
|
73
|
+
summary_cols = [
|
|
74
|
+
"animal",
|
|
75
|
+
"date",
|
|
76
|
+
"session",
|
|
77
|
+
"type",
|
|
78
|
+
"complete",
|
|
79
|
+
"integrity_verification",
|
|
80
|
+
"suite2p_processing",
|
|
81
|
+
"behavior_processing",
|
|
82
|
+
"video_processing",
|
|
83
|
+
"dataset_formation",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
# Retrieves the data
|
|
87
|
+
df = self._data.select(summary_cols)
|
|
88
|
+
|
|
89
|
+
# Optionally filters the data for the target animal
|
|
90
|
+
if animal is not None:
|
|
91
|
+
# Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
|
|
92
|
+
if self._animal_string:
|
|
93
|
+
animal = str(animal)
|
|
94
|
+
else:
|
|
95
|
+
animal = int(animal)
|
|
96
|
+
df = df.filter(pl.col("animal") == animal)
|
|
97
|
+
|
|
98
|
+
# Ensures the data displays properly
|
|
99
|
+
with pl.Config(
|
|
100
|
+
set_tbl_rows=-1,
|
|
101
|
+
set_tbl_cols=-1,
|
|
102
|
+
set_tbl_width_chars=250,
|
|
103
|
+
set_tbl_hide_column_data_types=True,
|
|
104
|
+
set_tbl_cell_alignment="CENTER",
|
|
105
|
+
):
|
|
106
|
+
print(df)
|
|
107
|
+
|
|
108
|
+
def print_notes(self, animal: str | int | None = None) -> None:
|
|
109
|
+
"""Prints only animal, session, and notes data from the manifest file.
|
|
110
|
+
|
|
111
|
+
This data view is optimized for experimenters to check what sessions have been recorded for each animal in the
|
|
112
|
+
project and refresh their memory on the outcomes of each session using experimenter notes.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
animal: The ID of the animal for which to display the data. If an ID is provided, this method will only
|
|
116
|
+
display the data for that animal. Otherwise, it will display the data for all animals.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
# Pre-selects the columns to display
|
|
120
|
+
df = self._data.select(["animal", "date", "session", "type", "notes"])
|
|
121
|
+
|
|
122
|
+
# Optionally filters the data for the target animal
|
|
123
|
+
if animal is not None:
|
|
124
|
+
# Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
|
|
125
|
+
if self._animal_string:
|
|
126
|
+
animal = str(animal)
|
|
127
|
+
else:
|
|
128
|
+
animal = int(animal)
|
|
129
|
+
|
|
130
|
+
df = df.filter(pl.col("animal") == animal)
|
|
131
|
+
|
|
132
|
+
# Prints the extracted data
|
|
133
|
+
with pl.Config(
|
|
134
|
+
set_tbl_rows=-1,
|
|
135
|
+
set_tbl_cols=-1,
|
|
136
|
+
set_tbl_hide_column_data_types=True,
|
|
137
|
+
set_tbl_cell_alignment="LEFT",
|
|
138
|
+
set_tbl_width_chars=250, # Wider columns for notes
|
|
139
|
+
set_fmt_str_lengths=600, # Allows very long strings for notes
|
|
140
|
+
):
|
|
141
|
+
print(df)
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def animals(self) -> tuple[str, ...]:
|
|
145
|
+
"""Returns all unique animal IDs stored inside the manifest file.
|
|
146
|
+
|
|
147
|
+
This provides a tuple of all animal IDs participating in the target project.
|
|
148
|
+
"""
|
|
149
|
+
return tuple(self._data.select("animal").unique().sort("animal").to_series().to_list())
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def sessions(self) -> tuple[str, ...]:
|
|
153
|
+
"""Returns all session IDs stored inside the manifest file.
|
|
154
|
+
|
|
155
|
+
This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
|
|
156
|
+
of the target project.
|
|
157
|
+
"""
|
|
158
|
+
return tuple(self._data.select("session").sort("session").to_series().to_list())
|
|
159
|
+
|
|
160
|
+
def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
|
|
161
|
+
"""Returns all session IDs for the target animal.
|
|
162
|
+
|
|
163
|
+
This provides a tuple of all sessions performed by the target animal as part of the target project.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
animal: The ID of the animal for which to get the session data.
|
|
167
|
+
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
168
|
+
list.
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
ValueError: If the specified animal is not found in the manifest file.
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
# Ensures that the 'animal' argument has the same type as the data inside the DataFrame.
|
|
175
|
+
if self._animal_string:
|
|
176
|
+
animal = str(animal)
|
|
177
|
+
else:
|
|
178
|
+
animal = int(animal)
|
|
179
|
+
|
|
180
|
+
if animal not in self.animals:
|
|
181
|
+
message = f"Animal ID '{animal}' not found in manifest. Available animals: {self.animals}"
|
|
182
|
+
console.error(message=message, error=ValueError)
|
|
183
|
+
|
|
184
|
+
# Filters by animal ID
|
|
185
|
+
data = self._data.filter(pl.col("animal") == animal)
|
|
186
|
+
|
|
187
|
+
# Optionally filters out incomplete sessions
|
|
188
|
+
if exclude_incomplete:
|
|
189
|
+
data = data.filter(pl.col("complete") == 1)
|
|
190
|
+
|
|
191
|
+
# Formats and returns session IDs to the caller
|
|
192
|
+
sessions = data.select("session").sort("session").to_series().to_list()
|
|
193
|
+
return tuple(sessions)
|
|
194
|
+
|
|
195
|
+
def get_session_info(self, session: str) -> pl.DataFrame:
|
|
196
|
+
"""Returns a Polars DataFrame that stores detailed information for the specified session.
|
|
197
|
+
|
|
198
|
+
Since session IDs are unique, it is expected that filtering by session ID is enough to get the requested
|
|
199
|
+
information.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
session: The ID of the session for which to retrieve the data.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
|
|
206
|
+
'intensity_verification', 'suite2p_processing', 'behavior_processing', 'video_processing',
|
|
207
|
+
'dataset_formation'.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
df = self._data
|
|
211
|
+
df = df.filter(pl.col("session").eq(session))
|
|
212
|
+
return df
|
|
213
|
+
|
|
13
214
|
|
|
14
215
|
def generate_project_manifest(
|
|
15
216
|
raw_project_directory: Path, output_directory: Path, processed_project_directory: Path | None = None
|
|
@@ -18,8 +219,8 @@ def generate_project_manifest(
|
|
|
18
219
|
|
|
19
220
|
This function evaluates the input project directory and builds the 'manifest' file for the project. The file
|
|
20
221
|
includes the descriptive information about every session stored inside the input project folder and the state of
|
|
21
|
-
session's data processing (which processing pipelines have been applied to each session). The file will be
|
|
22
|
-
under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
|
|
222
|
+
the session's data processing (which processing pipelines have been applied to each session). The file will be
|
|
223
|
+
created under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
|
|
23
224
|
|
|
24
225
|
Notes:
|
|
25
226
|
The manifest file is primarily used to capture and move project state information between machines, typically
|
|
@@ -42,7 +243,7 @@ def generate_project_manifest(
|
|
|
42
243
|
)
|
|
43
244
|
console.error(message=message, error=FileNotFoundError)
|
|
44
245
|
|
|
45
|
-
# Finds all
|
|
246
|
+
# Finds all session directories
|
|
46
247
|
session_directories = [directory.parent for directory in raw_project_directory.rglob("raw_data")]
|
|
47
248
|
|
|
48
249
|
if len(session_directories) == 0:
|
|
@@ -54,29 +255,35 @@ def generate_project_manifest(
|
|
|
54
255
|
console.error(message=message, error=FileNotFoundError)
|
|
55
256
|
|
|
56
257
|
# Precreates the 'manifest' dictionary structure
|
|
57
|
-
manifest: dict[str, list[str | bool]] = {
|
|
258
|
+
manifest: dict[str, list[str | bool | datetime | int]] = {
|
|
58
259
|
"animal": [], # Animal IDs.
|
|
59
260
|
"session": [], # Session names.
|
|
261
|
+
"date": [], # Session names stored as timezone-aware date-time objects in EST.
|
|
60
262
|
"type": [], # Type of the session (e.g., Experiment, Training, etc.).
|
|
61
|
-
"
|
|
62
|
-
|
|
63
|
-
# Determines whether the session data is complete. Incomplete sessions are excluded from processing.
|
|
263
|
+
"notes": [], # The experimenter notes about the session.
|
|
264
|
+
# Determines whether the session data is complete (ran for the intended duration and has all expected data).
|
|
64
265
|
"complete": [],
|
|
65
|
-
# Determines whether the session data integrity has been verified upon transfer to storage machine.
|
|
266
|
+
# Determines whether the session data integrity has been verified upon transfer to a storage machine.
|
|
66
267
|
"integrity_verification": [],
|
|
67
268
|
"suite2p_processing": [], # Determines whether the session has been processed with the single-day s2p pipeline.
|
|
68
|
-
"dataset_formation": [], # Determines whether the session's data has been integrated into a dataset.
|
|
69
269
|
# Determines whether the session has been processed with the behavior extraction pipeline.
|
|
70
270
|
"behavior_processing": [],
|
|
71
271
|
"video_processing": [], # Determines whether the session has been processed with the DeepLabCut pipeline.
|
|
272
|
+
"dataset_formation": [], # Determines whether the session's data has been integrated into a dataset.
|
|
72
273
|
}
|
|
73
274
|
|
|
74
275
|
# Loops over each session of every animal in the project and extracts session ID information and information
|
|
75
276
|
# about which processing steps have been successfully applied to the session.
|
|
76
277
|
for directory in session_directories:
|
|
278
|
+
# Skips processing directories without files (sessions with empty raw-data directories)
|
|
279
|
+
if len([file for file in directory.joinpath("raw_data").glob("*")]) == 0:
|
|
280
|
+
continue
|
|
281
|
+
|
|
77
282
|
# Instantiates the SessionData instance to resolve the paths to all session's data files and locations.
|
|
78
283
|
session_data = SessionData.load(
|
|
79
|
-
session_path=directory,
|
|
284
|
+
session_path=directory,
|
|
285
|
+
processed_data_root=processed_project_directory,
|
|
286
|
+
make_processed_data_directory=False,
|
|
80
287
|
)
|
|
81
288
|
|
|
82
289
|
# Fills the manifest dictionary with data for the processed session:
|
|
@@ -85,8 +292,44 @@ def generate_project_manifest(
|
|
|
85
292
|
manifest["animal"].append(session_data.animal_id)
|
|
86
293
|
manifest["session"].append(session_data.session_name)
|
|
87
294
|
manifest["type"].append(session_data.session_type)
|
|
88
|
-
|
|
89
|
-
|
|
295
|
+
|
|
296
|
+
# Parses session name into the date-time object to simplify working with date-time data in the future
|
|
297
|
+
date_time_components = session_data.session_name.split("-")
|
|
298
|
+
date_time = datetime(
|
|
299
|
+
year=int(date_time_components[0]),
|
|
300
|
+
month=int(date_time_components[1]),
|
|
301
|
+
day=int(date_time_components[2]),
|
|
302
|
+
hour=int(date_time_components[3]),
|
|
303
|
+
minute=int(date_time_components[4]),
|
|
304
|
+
second=int(date_time_components[5]),
|
|
305
|
+
microsecond=int(date_time_components[6]),
|
|
306
|
+
tzinfo=pytz.UTC,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Converts from UTC to EST / EDT for user convenience
|
|
310
|
+
eastern = pytz.timezone("America/New_York")
|
|
311
|
+
date_time = date_time.astimezone(eastern)
|
|
312
|
+
manifest["date"].append(date_time)
|
|
313
|
+
|
|
314
|
+
# Depending on the session type, instantiates the appropriate descriptor instance and uses it to read the
|
|
315
|
+
# experimenter notes
|
|
316
|
+
if session_data.session_type == "lick training":
|
|
317
|
+
descriptor: LickTrainingDescriptor = LickTrainingDescriptor.from_yaml( # type: ignore
|
|
318
|
+
file_path=session_data.raw_data.session_descriptor_path
|
|
319
|
+
)
|
|
320
|
+
manifest["notes"].append(descriptor.experimenter_notes)
|
|
321
|
+
elif session_data.session_type == "run training":
|
|
322
|
+
descriptor: RunTrainingDescriptor = RunTrainingDescriptor.from_yaml( # type: ignore
|
|
323
|
+
file_path=session_data.raw_data.session_descriptor_path
|
|
324
|
+
)
|
|
325
|
+
manifest["notes"].append(descriptor.experimenter_notes)
|
|
326
|
+
elif session_data.session_type == "mesoscope experiment":
|
|
327
|
+
descriptor: MesoscopeExperimentDescriptor = MesoscopeExperimentDescriptor.from_yaml( # type: ignore
|
|
328
|
+
file_path=session_data.raw_data.session_descriptor_path
|
|
329
|
+
)
|
|
330
|
+
manifest["notes"].append(descriptor.experimenter_notes)
|
|
331
|
+
elif session_data.session_type == "window checking":
|
|
332
|
+
manifest["notes"].append("N/A")
|
|
90
333
|
|
|
91
334
|
# If the session raw_data folder contains the telomere.bin file, marks the session as complete.
|
|
92
335
|
manifest["complete"].append(session_data.raw_data.telomere_path.exists())
|
|
@@ -96,9 +339,9 @@ def generate_project_manifest(
|
|
|
96
339
|
manifest["integrity_verification"].append(tracker.is_complete)
|
|
97
340
|
|
|
98
341
|
# If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing is
|
|
99
|
-
# disabled for incomplete sessions. If the session unverified, the case is even more severe, as its data may
|
|
100
|
-
# corrupted.
|
|
101
|
-
if not manifest["complete"][-1] or not
|
|
342
|
+
# disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its data may
|
|
343
|
+
# be corrupted.
|
|
344
|
+
if not manifest["complete"][-1] or not manifest["integrity_verification"][-1]:
|
|
102
345
|
manifest["suite2p_processing"].append(False)
|
|
103
346
|
manifest["dataset_formation"].append(False)
|
|
104
347
|
manifest["behavior_processing"].append(False)
|
|
@@ -118,24 +361,34 @@ def generate_project_manifest(
|
|
|
118
361
|
manifest["behavior_processing"].append(tracker.is_complete)
|
|
119
362
|
|
|
120
363
|
# DeepLabCut (video) processing status.
|
|
121
|
-
tracker = ProcessingTracker(file_path=session_data.processed_data.
|
|
364
|
+
tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
|
|
122
365
|
manifest["video_processing"].append(tracker.is_complete)
|
|
123
366
|
|
|
124
|
-
#
|
|
367
|
+
# If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
|
|
368
|
+
# them as strings. The latter options are primarily kept for compatibility with Tyche data
|
|
369
|
+
animal_type: type[pl.UInt64] | type[pl.String]
|
|
370
|
+
if all([str(animal).isdigit() for animal in manifest["animal"]]):
|
|
371
|
+
# Converts all strings to integers
|
|
372
|
+
manifest["animal"] = [int(animal) for animal in manifest["animal"]] # type: ignore
|
|
373
|
+
animal_type = pl.UInt64 # Uint64 for future proofing
|
|
374
|
+
else:
|
|
375
|
+
animal_type = pl.String
|
|
376
|
+
|
|
377
|
+
# Converts the manifest dictionary to a Polars Dataframe.
|
|
125
378
|
schema = {
|
|
126
|
-
"animal":
|
|
379
|
+
"animal": animal_type,
|
|
380
|
+
"date": pl.Datetime,
|
|
127
381
|
"session": pl.String,
|
|
128
|
-
"raw_data": pl.String,
|
|
129
|
-
"processed_data": pl.String,
|
|
130
382
|
"type": pl.String,
|
|
131
|
-
"
|
|
132
|
-
"
|
|
133
|
-
"
|
|
134
|
-
"
|
|
135
|
-
"
|
|
136
|
-
"
|
|
383
|
+
"notes": pl.String,
|
|
384
|
+
"complete": pl.UInt8,
|
|
385
|
+
"integrity_verification": pl.UInt8,
|
|
386
|
+
"suite2p_processing": pl.UInt8,
|
|
387
|
+
"dataset_formation": pl.UInt8,
|
|
388
|
+
"behavior_processing": pl.UInt8,
|
|
389
|
+
"video_processing": pl.UInt8,
|
|
137
390
|
}
|
|
138
|
-
df = pl.DataFrame(manifest, schema=schema)
|
|
391
|
+
df = pl.DataFrame(manifest, schema=schema, strict=False)
|
|
139
392
|
|
|
140
393
|
# Sorts the DataFrame by animal and then session. Since we assign animal IDs sequentially and 'name' sessions based
|
|
141
394
|
# on acquisition timestamps, the sort order is chronological.
|
|
@@ -158,8 +411,8 @@ def verify_session_checksum(
|
|
|
158
411
|
matches and to remove the 'telomere.bin' and 'verified.bin' marker files if it does not.
|
|
159
412
|
|
|
160
413
|
Notes:
|
|
161
|
-
Removing the telomere.bin marker file from session's raw_data folder marks the session as incomplete,
|
|
162
|
-
it from all further automatic processing.
|
|
414
|
+
Removing the telomere.bin marker file from the session's raw_data folder marks the session as incomplete,
|
|
415
|
+
excluding it from all further automatic processing.
|
|
163
416
|
|
|
164
417
|
This function is also used to create the processed data hierarchy on the BioHPC server, when it is called as
|
|
165
418
|
part of the data preprocessing runtime performed by a data acquisition system.
|
|
@@ -1,11 +1,102 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
+
import polars as pl
|
|
4
|
+
from _typeshed import Incomplete
|
|
5
|
+
|
|
3
6
|
from ..data_classes import (
|
|
4
7
|
SessionData as SessionData,
|
|
5
8
|
ProcessingTracker as ProcessingTracker,
|
|
9
|
+
RunTrainingDescriptor as RunTrainingDescriptor,
|
|
10
|
+
LickTrainingDescriptor as LickTrainingDescriptor,
|
|
11
|
+
MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
|
|
6
12
|
)
|
|
7
13
|
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
8
14
|
|
|
15
|
+
_valid_session_types: Incomplete
|
|
16
|
+
|
|
17
|
+
class ProjectManifest:
|
|
18
|
+
"""Wraps the contents of a Sun lab project manifest .feather file and exposes methods for visualizing and
|
|
19
|
+
working with the data stored inside the file.
|
|
20
|
+
|
|
21
|
+
This class functions as a high-level API for working with Sun lab projects. It is used both to visualize the
|
|
22
|
+
current state of various projects and during automated data processing to determine which processing steps to
|
|
23
|
+
apply to different sessions.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
manifest_file: The path to the .feather manifest file that stores the target project's state data.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
_data: Stores the manifest data as a Polars DataFrame.
|
|
30
|
+
_animal_string: Determines whether animal IDs are stored as strings or unsigned integers.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
_data: pl.DataFrame
|
|
34
|
+
_animal_string: bool
|
|
35
|
+
def __init__(self, manifest_file: Path) -> None: ...
|
|
36
|
+
def print_data(self) -> None:
|
|
37
|
+
"""Prints the entire contents of the manifest file to the terminal."""
|
|
38
|
+
def print_summary(self, animal: str | int | None = None) -> None:
|
|
39
|
+
"""Prints a summary view of the manifest file to the terminal, excluding the 'experimenter notes' data for
|
|
40
|
+
each session.
|
|
41
|
+
|
|
42
|
+
This data view is optimized for tracking which processing steps have been applied to each session inside the
|
|
43
|
+
project.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
animal: The ID of the animal for which to display the data. If an ID is provided, this method will only
|
|
47
|
+
display the data for that animal. Otherwise, it will display the data for all animals.
|
|
48
|
+
"""
|
|
49
|
+
def print_notes(self, animal: str | int | None = None) -> None:
|
|
50
|
+
"""Prints only animal, session, and notes data from the manifest file.
|
|
51
|
+
|
|
52
|
+
This data view is optimized for experimenters to check what sessions have been recorded for each animal in the
|
|
53
|
+
project and refresh their memory on the outcomes of each session using experimenter notes.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
animal: The ID of the animal for which to display the data. If an ID is provided, this method will only
|
|
57
|
+
display the data for that animal. Otherwise, it will display the data for all animals.
|
|
58
|
+
"""
|
|
59
|
+
@property
|
|
60
|
+
def animals(self) -> tuple[str, ...]:
|
|
61
|
+
"""Returns all unique animal IDs stored inside the manifest file.
|
|
62
|
+
|
|
63
|
+
This provides a tuple of all animal IDs participating in the target project.
|
|
64
|
+
"""
|
|
65
|
+
@property
|
|
66
|
+
def sessions(self) -> tuple[str, ...]:
|
|
67
|
+
"""Returns all session IDs stored inside the manifest file.
|
|
68
|
+
|
|
69
|
+
This provides a tuple of all sessions, independent of the participating animal, that were recorded as part
|
|
70
|
+
of the target project.
|
|
71
|
+
"""
|
|
72
|
+
def get_sessions_for_animal(self, animal: str | int, exclude_incomplete: bool = True) -> tuple[str, ...]:
|
|
73
|
+
"""Returns all session IDs for the target animal.
|
|
74
|
+
|
|
75
|
+
This provides a tuple of all sessions performed by the target animal as part of the target project.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
animal: The ID of the animal for which to get the session data.
|
|
79
|
+
exclude_incomplete: Determines whether to exclude sessions not marked as 'complete' from the output
|
|
80
|
+
list.
|
|
81
|
+
|
|
82
|
+
Raises:
|
|
83
|
+
ValueError: If the specified animal is not found in the manifest file.
|
|
84
|
+
"""
|
|
85
|
+
def get_session_info(self, session: str) -> pl.DataFrame:
|
|
86
|
+
"""Returns a Polars DataFrame that stores detailed information for the specified session.
|
|
87
|
+
|
|
88
|
+
Since session IDs are unique, it is expected that filtering by session ID is enough to get the requested
|
|
89
|
+
information.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
session: The ID of the session for which to retrieve the data.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
A Polars DataFrame with the following columns: 'animal', 'date', 'notes', 'session', 'type', 'complete',
|
|
96
|
+
'intensity_verification', 'suite2p_processing', 'behavior_processing', 'video_processing',
|
|
97
|
+
'dataset_formation'.
|
|
98
|
+
"""
|
|
99
|
+
|
|
9
100
|
def generate_project_manifest(
|
|
10
101
|
raw_project_directory: Path, output_directory: Path, processed_project_directory: Path | None = None
|
|
11
102
|
) -> None:
|
|
@@ -13,8 +104,8 @@ def generate_project_manifest(
|
|
|
13
104
|
|
|
14
105
|
This function evaluates the input project directory and builds the 'manifest' file for the project. The file
|
|
15
106
|
includes the descriptive information about every session stored inside the input project folder and the state of
|
|
16
|
-
session's data processing (which processing pipelines have been applied to each session). The file will be
|
|
17
|
-
under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
|
|
107
|
+
the session's data processing (which processing pipelines have been applied to each session). The file will be
|
|
108
|
+
created under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
|
|
18
109
|
|
|
19
110
|
Notes:
|
|
20
111
|
The manifest file is primarily used to capture and move project state information between machines, typically
|
|
@@ -41,8 +132,8 @@ def verify_session_checksum(
|
|
|
41
132
|
matches and to remove the 'telomere.bin' and 'verified.bin' marker files if it does not.
|
|
42
133
|
|
|
43
134
|
Notes:
|
|
44
|
-
Removing the telomere.bin marker file from session's raw_data folder marks the session as incomplete,
|
|
45
|
-
it from all further automatic processing.
|
|
135
|
+
Removing the telomere.bin marker file from the session's raw_data folder marks the session as incomplete,
|
|
136
|
+
excluding it from all further automatic processing.
|
|
46
137
|
|
|
47
138
|
This function is also used to create the processed data hierarchy on the BioHPC server, when it is called as
|
|
48
139
|
part of the data preprocessing runtime performed by a data acquisition system.
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sl-shared-assets
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: Stores assets shared between multiple Sun (NeuroAI) lab data pipelines.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
|
|
6
6
|
Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
|
|
7
|
-
Author: Ivan Kondratyev, Kushaan Gupta,
|
|
7
|
+
Author: Ivan Kondratyev, Kushaan Gupta, Natalie Yeung
|
|
8
8
|
Maintainer-email: Ivan Kondratyev <ik278@cornell.edu>
|
|
9
9
|
License: GNU GENERAL PUBLIC LICENSE
|
|
10
10
|
Version 3, 29 June 2007
|
|
@@ -697,9 +697,11 @@ Requires-Dist: ataraxis-time==3.0.0
|
|
|
697
697
|
Requires-Dist: click==8.2.1
|
|
698
698
|
Requires-Dist: filelock==3.18.0
|
|
699
699
|
Requires-Dist: natsort==8.4.0
|
|
700
|
+
Requires-Dist: numpy<2.3.0,>=2.0.2
|
|
700
701
|
Requires-Dist: paramiko==3.5.1
|
|
701
702
|
Requires-Dist: polars==1.31.0
|
|
702
703
|
Requires-Dist: pyarrow==20.0.0
|
|
704
|
+
Requires-Dist: pytz==2025.2
|
|
703
705
|
Requires-Dist: simple-slurm==0.3.6
|
|
704
706
|
Requires-Dist: tqdm==4.67.1
|
|
705
707
|
Requires-Dist: xxhash==3.5.0
|
|
@@ -723,9 +725,11 @@ Requires-Dist: appdirs==1.4.4; extra == 'condarun'
|
|
|
723
725
|
Requires-Dist: click==8.2.1; extra == 'condarun'
|
|
724
726
|
Requires-Dist: filelock==3.18.0; extra == 'condarun'
|
|
725
727
|
Requires-Dist: natsort==8.4.0; extra == 'condarun'
|
|
728
|
+
Requires-Dist: numpy<2.3.0,>=2.0.2; extra == 'condarun'
|
|
726
729
|
Requires-Dist: paramiko==3.5.1; extra == 'condarun'
|
|
727
730
|
Requires-Dist: polars==1.31.0; extra == 'condarun'
|
|
728
731
|
Requires-Dist: pyarrow==20.0.0; extra == 'condarun'
|
|
732
|
+
Requires-Dist: pytz==2025.2; extra == 'condarun'
|
|
729
733
|
Requires-Dist: tqdm==4.67.1; extra == 'condarun'
|
|
730
734
|
Provides-Extra: dev
|
|
731
735
|
Requires-Dist: ataraxis-automation<6,>=5; extra == 'dev'
|
|
@@ -746,6 +750,7 @@ Requires-Dist: twine<7,>=6; extra == 'dev'
|
|
|
746
750
|
Requires-Dist: types-appdirs<2,>=1; extra == 'dev'
|
|
747
751
|
Requires-Dist: types-filelock<4,>=3; extra == 'dev'
|
|
748
752
|
Requires-Dist: types-paramiko<4,>=3; extra == 'dev'
|
|
753
|
+
Requires-Dist: types-pytz<2026,>=2025; extra == 'dev'
|
|
749
754
|
Requires-Dist: types-tqdm<5,>=4; extra == 'dev'
|
|
750
755
|
Requires-Dist: uv<1,>=0; extra == 'dev'
|
|
751
756
|
Provides-Extra: noconda
|
|
@@ -754,6 +759,7 @@ Requires-Dist: build<2,>=1; extra == 'noconda'
|
|
|
754
759
|
Requires-Dist: sphinx-rtd-dark-mode<2,>=1; extra == 'noconda'
|
|
755
760
|
Requires-Dist: tox-uv<2,>=1; extra == 'noconda'
|
|
756
761
|
Requires-Dist: tox<5,>=4; extra == 'noconda'
|
|
762
|
+
Requires-Dist: types-pytz<2026,>=2025; extra == 'noconda'
|
|
757
763
|
Requires-Dist: uv<1,>=0; extra == 'noconda'
|
|
758
764
|
Description-Content-Type: text/markdown
|
|
759
765
|
|
|
@@ -1,35 +1,36 @@
|
|
|
1
1
|
sl_shared_assets/__init__.py,sha256=_AOpxu9K_0px_xS07H8mqZeYlBS9aD75XBS0dofJzqw,2280
|
|
2
2
|
sl_shared_assets/__init__.pyi,sha256=H1kPervb1A2BjG5EOLsLFQGUWFS_aHWy4cpL4_W71Fs,2525
|
|
3
|
-
sl_shared_assets/cli.py,sha256=
|
|
4
|
-
sl_shared_assets/cli.pyi,sha256=
|
|
3
|
+
sl_shared_assets/cli.py,sha256=Rk6Axt0v-274O9mvhPChRj6sQBbHOnkd1Lkc3adgvao,9090
|
|
4
|
+
sl_shared_assets/cli.pyi,sha256=2zTqKYIJNV5cJkzOHTU2h4Hj0uUm-CiP-27vujgUaFI,3342
|
|
5
5
|
sl_shared_assets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
sl_shared_assets/data_classes/__init__.py,sha256=ixn972b-3URCinVLRPjMfDXpO2w24_NkEUUjdqByFrA,1890
|
|
7
7
|
sl_shared_assets/data_classes/__init__.pyi,sha256=bDBLkyhlosB4t09GxHBNKH0kaVBhHSY_j-i3MD2iKVo,2088
|
|
8
|
-
sl_shared_assets/data_classes/configuration_data.py,sha256=
|
|
9
|
-
sl_shared_assets/data_classes/configuration_data.pyi,sha256=
|
|
10
|
-
sl_shared_assets/data_classes/runtime_data.py,sha256=
|
|
11
|
-
sl_shared_assets/data_classes/runtime_data.pyi,sha256=
|
|
12
|
-
sl_shared_assets/data_classes/session_data.py,sha256=
|
|
8
|
+
sl_shared_assets/data_classes/configuration_data.py,sha256=eL8eGl1EF2Sl8J4W6qB78L5r092qnnbEjiApxyK6lCw,29402
|
|
9
|
+
sl_shared_assets/data_classes/configuration_data.pyi,sha256=U-snwWQqYT5-zcd8s3ZV8xX27BEpgy9vKlXvie3NKSE,9537
|
|
10
|
+
sl_shared_assets/data_classes/runtime_data.py,sha256=TpqT2lRNq6fJyOgwXEqqI1gfTinEpCVTp8RKlvfqgkk,15789
|
|
11
|
+
sl_shared_assets/data_classes/runtime_data.pyi,sha256=o_eLLHWoAp_w58C2YEIeSHCD36ZYsTsCllRUEIxusSo,6660
|
|
12
|
+
sl_shared_assets/data_classes/session_data.py,sha256=Ob-Lv7mcegHX38o_xIiF37OUBQ6eZ5LeI0weoOGX6mk,51073
|
|
13
|
+
sl_shared_assets/data_classes/session_data.pyi,sha256=ajVrNwGpk9TQj79WURVYpQ2Bhy-XZsau8VABBgtOzrY,16452
|
|
13
14
|
sl_shared_assets/data_classes/surgery_data.py,sha256=qsMj3NkjhylAT9b_wHBY-1XwTu2xsZcZatdECmkA7Bs,7437
|
|
14
15
|
sl_shared_assets/data_classes/surgery_data.pyi,sha256=rf59lJ3tGSYKHQlEGXg75MnjajBwl0DYhL4TClAO4SM,2605
|
|
15
16
|
sl_shared_assets/server/__init__.py,sha256=nyX6-9ACcrQeRQOCNvBVrWSTHGjRPANIG_u0aq7HPTg,426
|
|
16
17
|
sl_shared_assets/server/__init__.pyi,sha256=7o99f8uf6NuBjMZjNAM1FX69Qbu5uBluRSAyaUWbXOU,263
|
|
17
18
|
sl_shared_assets/server/job.py,sha256=GB31yYPEqXR6MgwNmnQrSQuHRJqUHFXcd6p7hb38q_c,7928
|
|
18
19
|
sl_shared_assets/server/job.pyi,sha256=cxgHMpuwHsJGf_ZcTSSa2tZNzeR_GxqlICOsYGV_oy0,5655
|
|
19
|
-
sl_shared_assets/server/server.py,sha256=
|
|
20
|
-
sl_shared_assets/server/server.pyi,sha256=
|
|
20
|
+
sl_shared_assets/server/server.py,sha256=DR0nEO1nZgiLzdG958xmQasRRJ5PCQP9JXdCtBE08iU,12700
|
|
21
|
+
sl_shared_assets/server/server.pyi,sha256=4ZpZXkpVC7Zqksq485HgWP8voCFx-Q4VK4mLalgpwvc,6481
|
|
21
22
|
sl_shared_assets/tools/__init__.py,sha256=N95ZPMz-_HdNPrbVieCFza-QSVS6BV2KRB4K1OLRttc,636
|
|
22
23
|
sl_shared_assets/tools/__init__.pyi,sha256=xeDF8itMc0JRgLrO_IN_9gW7cp_Ld-Gf-rjtrgWvQ2I,551
|
|
23
24
|
sl_shared_assets/tools/ascension_tools.py,sha256=kIqYGX9F8lRao_LaVOacIiT8J9SypTvarb9mgaI9ZPs,15387
|
|
24
25
|
sl_shared_assets/tools/ascension_tools.pyi,sha256=tQCDdWZ20ZjUjpMs8aGIN0yBg5ff3j6spi62b3Han4o,3755
|
|
25
26
|
sl_shared_assets/tools/packaging_tools.py,sha256=c9U0bKB6Btj7sfyeU7xx2Jiqv930qTnXbm0ZbNR-o2k,7594
|
|
26
27
|
sl_shared_assets/tools/packaging_tools.pyi,sha256=vgGbAQCExwg-0A5F72MzEhzHxu97Nqg1yuz-5P89ycU,3118
|
|
27
|
-
sl_shared_assets/tools/project_management_tools.py,sha256=
|
|
28
|
-
sl_shared_assets/tools/project_management_tools.pyi,sha256=
|
|
28
|
+
sl_shared_assets/tools/project_management_tools.py,sha256=HatI5QmzUbUMI6MmKiqzylEp_CzQ-JpEyO5V5TJ40Sk,22758
|
|
29
|
+
sl_shared_assets/tools/project_management_tools.pyi,sha256=kteTbOom4KVm9Eem1WhmUZw54MOheUTkFQzOPIvV0vw,7731
|
|
29
30
|
sl_shared_assets/tools/transfer_tools.py,sha256=J26kwOp_NpPSY0-xu5FTw9udte-rm_mW1FJyaTNoqQI,6606
|
|
30
31
|
sl_shared_assets/tools/transfer_tools.pyi,sha256=FoH7eYZe7guGHfPr0MK5ggO62uXKwD2aJ7h1Bu7PaEE,3294
|
|
31
|
-
sl_shared_assets-
|
|
32
|
-
sl_shared_assets-
|
|
33
|
-
sl_shared_assets-
|
|
34
|
-
sl_shared_assets-
|
|
35
|
-
sl_shared_assets-
|
|
32
|
+
sl_shared_assets-2.0.0.dist-info/METADATA,sha256=j83QXlX_oIZ7npkT9zpZUfrrGEGXnJui3R2RJvMz09o,49356
|
|
33
|
+
sl_shared_assets-2.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
34
|
+
sl_shared_assets-2.0.0.dist-info/entry_points.txt,sha256=356d5zNg2v3hil8K7VGen6nDHggdDP-LY7zKJmB8ExI,305
|
|
35
|
+
sl_shared_assets-2.0.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
36
|
+
sl_shared_assets-2.0.0.dist-info/RECORD,,
|
|
@@ -1,8 +1,5 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
2
|
sl-ascend-tyche = sl_shared_assets.cli:ascend_tyche_directory
|
|
3
|
-
sl-create-experiment = sl_shared_assets.cli:generate_experiment_configuration_file
|
|
4
|
-
sl-create-project = sl_shared_assets.cli:generate_project_configuration_file
|
|
5
3
|
sl-create-server-credentials = sl_shared_assets.cli:generate_server_credentials_file
|
|
6
|
-
sl-create-system-config = sl_shared_assets.cli:generate_system_configuration_file
|
|
7
4
|
sl-project-manifest = sl_shared_assets.cli:generate_project_manifest_file
|
|
8
5
|
sl-verify-session = sl_shared_assets.cli:verify_session_integrity
|
|
File without changes
|
|
File without changes
|