sl-shared-assets 1.0.0rc20__py3-none-any.whl → 1.0.0rc21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +27 -27
- sl_shared_assets/__init__.pyi +24 -22
- sl_shared_assets/cli.py +266 -40
- sl_shared_assets/cli.pyi +73 -14
- sl_shared_assets/data_classes/__init__.py +23 -20
- sl_shared_assets/data_classes/__init__.pyi +18 -18
- sl_shared_assets/data_classes/configuration_data.py +407 -26
- sl_shared_assets/data_classes/configuration_data.pyi +172 -15
- sl_shared_assets/data_classes/runtime_data.py +49 -43
- sl_shared_assets/data_classes/runtime_data.pyi +37 -40
- sl_shared_assets/data_classes/session_data.py +168 -914
- sl_shared_assets/data_classes/session_data.pyi +55 -350
- sl_shared_assets/data_classes/surgery_data.py +3 -3
- sl_shared_assets/data_classes/surgery_data.pyi +2 -2
- sl_shared_assets/tools/__init__.py +8 -1
- sl_shared_assets/tools/__init__.pyi +11 -1
- sl_shared_assets/tools/ascension_tools.py +27 -26
- sl_shared_assets/tools/ascension_tools.pyi +5 -5
- sl_shared_assets/tools/packaging_tools.py +14 -1
- sl_shared_assets/tools/packaging_tools.pyi +4 -0
- sl_shared_assets/tools/project_management_tools.py +164 -0
- sl_shared_assets/tools/project_management_tools.pyi +48 -0
- {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/METADATA +21 -4
- sl_shared_assets-1.0.0rc21.dist-info/RECORD +36 -0
- sl_shared_assets-1.0.0rc21.dist-info/entry_points.txt +8 -0
- sl_shared_assets/suite2p/__init__.py +0 -8
- sl_shared_assets/suite2p/__init__.pyi +0 -4
- sl_shared_assets/suite2p/multi_day.py +0 -224
- sl_shared_assets/suite2p/multi_day.pyi +0 -104
- sl_shared_assets/suite2p/single_day.py +0 -564
- sl_shared_assets/suite2p/single_day.pyi +0 -220
- sl_shared_assets-1.0.0rc20.dist-info/RECORD +0 -40
- sl_shared_assets-1.0.0rc20.dist-info/entry_points.txt +0 -4
- {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/WHEEL +0 -0
- {sl_shared_assets-1.0.0rc20.dist-info → sl_shared_assets-1.0.0rc21.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,13 +5,12 @@ an example for how to convert other data formats to match use the Sun lab data s
|
|
|
5
5
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import datetime
|
|
8
|
-
import tempfile
|
|
9
8
|
|
|
10
9
|
import numpy as np
|
|
11
|
-
from ataraxis_base_utilities import LogLevel, console
|
|
10
|
+
from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
|
|
12
11
|
from ataraxis_time.time_helpers import extract_timestamp_from_bytes
|
|
13
12
|
|
|
14
|
-
from ..data_classes import SessionData, ProjectConfiguration
|
|
13
|
+
from ..data_classes import SessionData, ProjectConfiguration, get_system_configuration_data
|
|
15
14
|
from .transfer_tools import transfer_directory
|
|
16
15
|
from .packaging_tools import calculate_directory_checksum
|
|
17
16
|
|
|
@@ -170,7 +169,7 @@ def _reorganize_data(session_data: SessionData, source_root: Path) -> bool:
|
|
|
170
169
|
return True
|
|
171
170
|
|
|
172
171
|
|
|
173
|
-
def ascend_tyche_data(root_directory: Path
|
|
172
|
+
def ascend_tyche_data(root_directory: Path) -> None:
|
|
174
173
|
"""Reformats the old Tyche data to use the modern Sun lab layout and metadata files.
|
|
175
174
|
|
|
176
175
|
This function is used to convert old Tyche data to the modern data management standard. This is used to make the
|
|
@@ -188,30 +187,24 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
|
|
|
188
187
|
this function for a large number of sessions will result in a long processing time due to the network data
|
|
189
188
|
transfer.
|
|
190
189
|
|
|
190
|
+
Since SessionData can only be created on a PC that has a valid acquisition system config, this function will
|
|
191
|
+
only work on a machine that is part of an active Sun lab acquisition system.
|
|
192
|
+
|
|
191
193
|
Args:
|
|
192
194
|
root_directory: The directory that stores one or more Tyche animal folders. This can be conceptualized as the
|
|
193
195
|
root directory for the Tyche project.
|
|
194
|
-
output_root_directory: The path to the local directory where to generate the converted Tyche project hierarchy.
|
|
195
|
-
Typically, this is the 'root' directory where all other Sun lab projects are stored.
|
|
196
|
-
server_root_directory: The path to the local filesystem-mounted BioHPC server storage directory. Note, this
|
|
197
|
-
directory hs to be mapped to the local filesystem via the SMB or equivalent protocol.
|
|
198
196
|
"""
|
|
199
197
|
# Generates a (shared) project configuration file.
|
|
200
198
|
project_configuration = ProjectConfiguration()
|
|
201
199
|
|
|
202
|
-
#
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
temp_nas_dir = Path(tempfile.mkdtemp(prefix="nas_temp_"))
|
|
207
|
-
temp_mesoscope_dir = Path(tempfile.mkdtemp(prefix="mesoscope_temp_"))
|
|
200
|
+
# The acquisition system config resolves most paths and filesystem configuration arguments
|
|
201
|
+
acquisition_system = get_system_configuration_data()
|
|
202
|
+
output_root_directory = acquisition_system.paths.root_directory
|
|
203
|
+
server_root_directory = acquisition_system.paths.server_storage_directory
|
|
208
204
|
|
|
209
205
|
# Statically defines project name and local root paths
|
|
210
|
-
|
|
211
|
-
project_configuration.
|
|
212
|
-
project_configuration.local_server_directory = server_root_directory
|
|
213
|
-
project_configuration.local_nas_directory = temp_nas_dir
|
|
214
|
-
project_configuration.local_mesoscope_directory = temp_mesoscope_dir
|
|
206
|
+
project_name = "Tyche"
|
|
207
|
+
project_configuration.project_name = project_name
|
|
215
208
|
|
|
216
209
|
# Uses nonsensical google sheet IDs. Tyche project did not use Google Sheet processing like our modern projects do.
|
|
217
210
|
project_configuration.water_log_sheet_id = "1xFh9Q2zT7pL3mVkJdR8bN6yXoE4wS5aG0cHu2Kf7D3v"
|
|
@@ -219,13 +212,14 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
|
|
|
219
212
|
|
|
220
213
|
# Dumps project configuration into the 'configuration' subfolder of the Tyche project.
|
|
221
214
|
configuration_path = output_root_directory.joinpath("Tyche", "configuration", "project_configuration.yaml")
|
|
215
|
+
ensure_directory_exists(configuration_path)
|
|
222
216
|
project_configuration.save(path=configuration_path)
|
|
223
217
|
|
|
224
218
|
# Assumes that root directory stores all animal folders to be processed
|
|
225
219
|
for animal_folder in root_directory.iterdir():
|
|
226
220
|
# Each animal folder is named to include project name and a static animal ID, e.g.: Tyche-A7. This extracts each
|
|
227
221
|
# animal ID.
|
|
228
|
-
animal_name = animal_folder.
|
|
222
|
+
animal_name = animal_folder.stem.split(sep="-")[1]
|
|
229
223
|
|
|
230
224
|
# Under each animal root folder, there are day folders that use YYYY-MM-DD timestamps
|
|
231
225
|
for session_folder in animal_folder.iterdir():
|
|
@@ -240,11 +234,11 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
|
|
|
240
234
|
# session data hierarchy using the output root. This generates a 'standard' Sun lab directory structure
|
|
241
235
|
# for the Tyche data.
|
|
242
236
|
session_data = SessionData.create(
|
|
237
|
+
project_name=project_configuration.project_name,
|
|
243
238
|
session_name=session_name,
|
|
244
239
|
animal_id=animal_name,
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
experiment_name=None, # Has to be none, otherwise the system tries to copy a configuration file.
|
|
240
|
+
session_type="mesoscope experiment",
|
|
241
|
+
experiment_name=None,
|
|
248
242
|
)
|
|
249
243
|
|
|
250
244
|
# Moves the data from the old hierarchy to the new hierarchy. If the process runs as expected, and
|
|
@@ -259,15 +253,22 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
|
|
|
259
253
|
# noinspection PyTypeChecker
|
|
260
254
|
console.echo(message=message, level=LogLevel.WARNING)
|
|
261
255
|
else:
|
|
262
|
-
#
|
|
256
|
+
# Generates the telomere.bin file to mark the session as 'complete'
|
|
257
|
+
session_data.raw_data.telomere_path.touch()
|
|
258
|
+
|
|
259
|
+
# If the local transfer process was successful, generates a new checksum for the moved data
|
|
263
260
|
calculate_directory_checksum(directory=Path(session_data.raw_data.raw_data_path))
|
|
261
|
+
|
|
264
262
|
# Next, copies the data to the BioHPC server for further processing
|
|
265
263
|
transfer_directory(
|
|
266
264
|
source=Path(session_data.raw_data.raw_data_path),
|
|
267
|
-
destination=Path(
|
|
265
|
+
destination=Path(
|
|
266
|
+
server_root_directory.joinpath(project_name, animal_name, session_name, "raw_data")
|
|
267
|
+
),
|
|
268
268
|
verify_integrity=False,
|
|
269
269
|
)
|
|
270
|
-
|
|
270
|
+
|
|
271
|
+
# Removes the now-empty old session data directory.
|
|
271
272
|
acquisition_folder.rmdir()
|
|
272
273
|
|
|
273
274
|
# If the loop above removed all acquisition folders, all data for that day has been successfully converted
|
|
@@ -3,6 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
from ..data_classes import (
|
|
4
4
|
SessionData as SessionData,
|
|
5
5
|
ProjectConfiguration as ProjectConfiguration,
|
|
6
|
+
get_system_configuration_data as get_system_configuration_data,
|
|
6
7
|
)
|
|
7
8
|
from .transfer_tools import transfer_directory as transfer_directory
|
|
8
9
|
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
@@ -40,7 +41,7 @@ def _reorganize_data(session_data: SessionData, source_root: Path) -> bool:
|
|
|
40
41
|
to finalize the process manually.
|
|
41
42
|
"""
|
|
42
43
|
|
|
43
|
-
def ascend_tyche_data(root_directory: Path
|
|
44
|
+
def ascend_tyche_data(root_directory: Path) -> None:
|
|
44
45
|
"""Reformats the old Tyche data to use the modern Sun lab layout and metadata files.
|
|
45
46
|
|
|
46
47
|
This function is used to convert old Tyche data to the modern data management standard. This is used to make the
|
|
@@ -58,11 +59,10 @@ def ascend_tyche_data(root_directory: Path, output_root_directory: Path, server_
|
|
|
58
59
|
this function for a large number of sessions will result in a long processing time due to the network data
|
|
59
60
|
transfer.
|
|
60
61
|
|
|
62
|
+
Since SessionData can only be created on a PC that has a valid acquisition system config, this function will
|
|
63
|
+
only work on a machine that is part of an active Sun lab acquisition system.
|
|
64
|
+
|
|
61
65
|
Args:
|
|
62
66
|
root_directory: The directory that stores one or more Tyche animal folders. This can be conceptualized as the
|
|
63
67
|
root directory for the Tyche project.
|
|
64
|
-
output_root_directory: The path to the local directory where to generate the converted Tyche project hierarchy.
|
|
65
|
-
Typically, this is the 'root' directory where all other Sun lab projects are stored.
|
|
66
|
-
server_root_directory: The path to the local filesystem-mounted BioHPC server storage directory. Note, this
|
|
67
|
-
directory hs to be mapped to the local filesystem via the SMB or equivalent protocol.
|
|
68
68
|
"""
|
|
@@ -10,6 +10,19 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
import xxhash
|
|
12
12
|
|
|
13
|
+
# Defines a 'blacklist' set of files. Primarily, this lit contains the service files that may change after the session
|
|
14
|
+
# data has been acquired. Therefore, it does not make sense to include them in the checksum, as they do not reflect the
|
|
15
|
+
# data that should remain permanently unchanged. Note, make sure all service files are added to this set!
|
|
16
|
+
_excluded_files = {
|
|
17
|
+
"ax_checksum.txt",
|
|
18
|
+
"ubiquitin.bin",
|
|
19
|
+
"telomere.bin",
|
|
20
|
+
"single_day_suite2p.bin",
|
|
21
|
+
"multi_day_suite2p.bin",
|
|
22
|
+
"behavior.bin",
|
|
23
|
+
"dlc.bin",
|
|
24
|
+
}
|
|
25
|
+
|
|
13
26
|
|
|
14
27
|
def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
|
|
15
28
|
"""Calculates xxHash3-128 checksum for a single file and its path relative to the base directory.
|
|
@@ -89,7 +102,7 @@ def calculate_directory_checksum(
|
|
|
89
102
|
files = sorted(
|
|
90
103
|
path
|
|
91
104
|
for path in directory.rglob("*")
|
|
92
|
-
if path.is_file() and path.stem
|
|
105
|
+
if path.is_file() and f"{path.stem}{path.suffix}" not in _excluded_files # Excludes service files
|
|
93
106
|
)
|
|
94
107
|
|
|
95
108
|
# Precreates the directory checksum
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
+
from _typeshed import Incomplete
|
|
4
|
+
|
|
5
|
+
_excluded_files: Incomplete
|
|
6
|
+
|
|
3
7
|
def _calculate_file_checksum(base_directory: Path, file_path: Path) -> tuple[str, bytes]:
|
|
4
8
|
"""Calculates xxHash3-128 checksum for a single file and its path relative to the base directory.
|
|
5
9
|
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""This module provides tools for managing the data of any Sun lab project. Tools from this module extend the
|
|
2
|
+
functionality of SessionData class via a convenient API that allows working with the data of multiple sessions making
|
|
3
|
+
up a given project."""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from ..data_classes import SessionData
|
|
10
|
+
from .packaging_tools import calculate_directory_checksum
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def generate_project_manifest(
|
|
14
|
+
raw_project_directory: Path, output_directory: Path, processed_project_directory: Path | None = None
|
|
15
|
+
) -> None:
|
|
16
|
+
"""Builds and saves the project manifest .feather file under the specified output directory.
|
|
17
|
+
|
|
18
|
+
This function evaluates the input project directory and builds the 'manifest' file for the project. The file
|
|
19
|
+
includes the descriptive information about every session stored inside the input project folder and the state of
|
|
20
|
+
session's data processing (which processing pipelines have been applied to each session). The file will be created
|
|
21
|
+
under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
|
|
22
|
+
|
|
23
|
+
Notes:
|
|
24
|
+
The manifest file is primarily used to capture and move project state information between machines, typically
|
|
25
|
+
in the context of working with data stored on a remote compute server or cluster. However, it can also be used
|
|
26
|
+
on a local machine, since an up-to-date manifest file is required to run most data processing pipelines in the
|
|
27
|
+
lab regardless of the runtime context.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
raw_project_directory: The path to the root project directory used to store raw session data.
|
|
31
|
+
output_directory: The path to the directory where to save the generated manifest file.
|
|
32
|
+
processed_project_directory: The path to the root project directory used to store processed session data if it
|
|
33
|
+
is different from the 'raw_project_directory'. Typically, this would be the case on remote compute server(s)
|
|
34
|
+
and not on local machines.
|
|
35
|
+
"""
|
|
36
|
+
# Finds all raw data directories
|
|
37
|
+
session_directories = [directory.parent for directory in raw_project_directory.rglob("raw_data")]
|
|
38
|
+
|
|
39
|
+
# Precreates the 'manifest' dictionary structure
|
|
40
|
+
manifest: dict[str, list[str | bool]] = {
|
|
41
|
+
"animal": [], # Animal IDs.
|
|
42
|
+
"session": [], # Session names.
|
|
43
|
+
"type": [], # Type of the session (e.g., Experiment, Training, etc.).
|
|
44
|
+
"raw_data": [], # Server-side raw_data folder path.
|
|
45
|
+
"processed_data": [], # Server-side processed_data folder path.
|
|
46
|
+
"complete": [], # Determines if the session data is complete. Incomplete sessions are excluded from processing.
|
|
47
|
+
"single_day_suite2p": [], # Determines whether the session has been processed with the single-day s2p pipeline.
|
|
48
|
+
"multi_day_suite2p": [], # Determines whether the session has been processed with the multi-day s2p pipeline.
|
|
49
|
+
"behavior": [], # Determines whether the session has been processed with the behavior extraction pipeline.
|
|
50
|
+
"dlc": [], # Determines whether the session has been processed with the DeepLabCut pipeline.
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Loops over each session of every animal in the project and extracts session ID information and information
|
|
54
|
+
# about which processing steps have been successfully applied to the session.
|
|
55
|
+
for directory in session_directories:
|
|
56
|
+
# Instantiates the SessionData instance to resolve the paths to all session's data files and locations.
|
|
57
|
+
session_data = SessionData.load(
|
|
58
|
+
session_path=directory, processed_data_root=processed_project_directory, make_processed_data_directory=False
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Fills the manifest dictionary with data for the processed session:
|
|
62
|
+
|
|
63
|
+
# Extracts ID and data path information from the SessionData instance
|
|
64
|
+
manifest["animal"].append(session_data.animal_id)
|
|
65
|
+
manifest["session"].append(session_data.session_name)
|
|
66
|
+
manifest["type"].append(session_data.session_type)
|
|
67
|
+
manifest["raw_data"].append(str(session_data.raw_data.raw_data_path))
|
|
68
|
+
manifest["processed_data"].append(str(session_data.processed_data.processed_data_path))
|
|
69
|
+
|
|
70
|
+
# If the session raw_data folder contains the telomere.bin file, marks the session as complete.
|
|
71
|
+
manifest["complete"].append(session_data.raw_data.telomere_path.exists())
|
|
72
|
+
|
|
73
|
+
# If the session is incomplete, marks all processing steps as FALSE, as automatic processing is disabled for
|
|
74
|
+
# incomplete sessions.
|
|
75
|
+
if not manifest["complete"][-1]:
|
|
76
|
+
manifest["single_day_suite2p"].append(False)
|
|
77
|
+
manifest["multi_day_suite2p"].append(False)
|
|
78
|
+
manifest["behavior"].append(False)
|
|
79
|
+
manifest["dlc"].append(False)
|
|
80
|
+
continue # Cycles to the next session
|
|
81
|
+
|
|
82
|
+
# If the session processed_data folder contains the single-day suite2p.bin file, marks the single-day suite2p
|
|
83
|
+
# processing step as complete.
|
|
84
|
+
manifest["single_day_suite2p"].append(session_data.processed_data.single_day_suite2p_bin_path.exists())
|
|
85
|
+
|
|
86
|
+
# If the session processed_data folder contains the multi-day suite2p.bin file, marks the multi-day suite2p
|
|
87
|
+
# processing step as complete.
|
|
88
|
+
manifest["multi_day_suite2p"].append(session_data.processed_data.multi_day_suite2p_bin_path.exists())
|
|
89
|
+
|
|
90
|
+
# If the session processed_data folder contains the behavior.bin file, marks the behavior processing step as
|
|
91
|
+
# complete.
|
|
92
|
+
manifest["behavior"].append(session_data.processed_data.behavior_data_path.exists())
|
|
93
|
+
|
|
94
|
+
# If the session processed_data folder contains the dlc.bin file, marks the dlc processing step as
|
|
95
|
+
# complete.
|
|
96
|
+
manifest["dlc"].append(session_data.processed_data.dlc_bin_path.exists())
|
|
97
|
+
|
|
98
|
+
# Converts the manifest dictionary to a Polars Dataframe
|
|
99
|
+
schema = {
|
|
100
|
+
"animal": pl.String,
|
|
101
|
+
"session": pl.String,
|
|
102
|
+
"raw_data": pl.String,
|
|
103
|
+
"processed_data": pl.String,
|
|
104
|
+
"type": pl.String,
|
|
105
|
+
"complete": pl.Boolean,
|
|
106
|
+
"single_day_suite2p": pl.Boolean,
|
|
107
|
+
"multi_day_suite2p": pl.Boolean,
|
|
108
|
+
"behavior": pl.Boolean,
|
|
109
|
+
"dlc": pl.Boolean,
|
|
110
|
+
}
|
|
111
|
+
df = pl.DataFrame(manifest, schema=schema)
|
|
112
|
+
|
|
113
|
+
# Sorts the DataFrame by animal and then session. Since we assign animal IDs sequentially and 'name' sessions based
|
|
114
|
+
# on acquisition timestamps, the sort order is chronological.
|
|
115
|
+
sorted_df = df.sort(["animal", "session"])
|
|
116
|
+
|
|
117
|
+
# Saves the generated manifest to the project-specific manifest .feather file for further processing.
|
|
118
|
+
sorted_df.write_ipc(
|
|
119
|
+
file=output_directory.joinpath(f"{raw_project_directory.stem}_manifest.feather"), compression="lz4"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def verify_session_checksum(session_path: Path) -> bool:
|
|
124
|
+
"""Verifies the integrity of the session's raw data by generating the checksum of the raw_data directory and
|
|
125
|
+
comparing it against the checksum stored in the ax_checksum.txt file.
|
|
126
|
+
|
|
127
|
+
Primarily, this function is used to verify data integrity after transferring it from a local PC to the remote
|
|
128
|
+
server for long-term storage. This function is designed to do nothing if the checksum matches and to remove the
|
|
129
|
+
'telomere.bin' marker file if it does not.
|
|
130
|
+
|
|
131
|
+
Notes:
|
|
132
|
+
Removing the telomere.bin marker file from session's raw_data folder marks the session as incomplete, excluding
|
|
133
|
+
it from all further automatic processing.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
session_path: The path to the session directory to be verified. Note, the input session directory must contain
|
|
137
|
+
the 'raw_data' subdirectory.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
True if the checksum matches, False otherwise.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
# Loads session data layout
|
|
144
|
+
session_data = SessionData.load(session_path=session_path)
|
|
145
|
+
|
|
146
|
+
# Re-calculates the checksum for the raw_data directory
|
|
147
|
+
calculated_checksum = calculate_directory_checksum(
|
|
148
|
+
directory=session_data.raw_data.raw_data_path, batch=False, save_checksum=False
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Loads the checksum stored inside the ax_checksum.txt file
|
|
152
|
+
with open(session_data.raw_data.checksum_path, "r") as f:
|
|
153
|
+
stored_checksum = f.read().strip()
|
|
154
|
+
|
|
155
|
+
# If the two checksums do not match, this likely indicates data corruption.
|
|
156
|
+
if stored_checksum != calculated_checksum:
|
|
157
|
+
# If the telomere.bin file exists, removes this file. This automatically marks the session as incomplete for
|
|
158
|
+
# all other Sun lab runtimes. The presence of the telomere.bin file after integrity verification is used as a
|
|
159
|
+
# heuristic for determining whether the session has passed the verification process.
|
|
160
|
+
if session_data.raw_data.telomere_path.exists():
|
|
161
|
+
session_data.raw_data.telomere_path.unlink()
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
return True
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from ..data_classes import SessionData as SessionData
|
|
4
|
+
from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
|
|
5
|
+
|
|
6
|
+
def generate_project_manifest(
|
|
7
|
+
raw_project_directory: Path, output_directory: Path, processed_project_directory: Path | None = None
|
|
8
|
+
) -> None:
|
|
9
|
+
"""Builds and saves the project manifest .feather file under the specified output directory.
|
|
10
|
+
|
|
11
|
+
This function evaluates the input project directory and builds the 'manifest' file for the project. The file
|
|
12
|
+
includes the descriptive information about every session stored inside the input project folder and the state of
|
|
13
|
+
session's data processing (which processing pipelines have been applied to each session). The file will be created
|
|
14
|
+
under the 'output_path' directory and use the following name pattern: {ProjectName}}_manifest.feather.
|
|
15
|
+
|
|
16
|
+
Notes:
|
|
17
|
+
The manifest file is primarily used to capture and move project state information between machines, typically
|
|
18
|
+
in the context of working with data stored on a remote compute server or cluster. However, it can also be used
|
|
19
|
+
on a local machine, since an up-to-date manifest file is required to run most data processing pipelines in the
|
|
20
|
+
lab regardless of the runtime context.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
raw_project_directory: The path to the root project directory used to store raw session data.
|
|
24
|
+
output_directory: The path to the directory where to save the generated manifest file.
|
|
25
|
+
processed_project_directory: The path to the root project directory used to store processed session data if it
|
|
26
|
+
is different from the 'raw_project_directory'. Typically, this would be the case on remote compute server(s)
|
|
27
|
+
and not on local machines.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def verify_session_checksum(session_path: Path) -> bool:
|
|
31
|
+
"""Verifies the integrity of the session's raw data by generating the checksum of the raw_data directory and
|
|
32
|
+
comparing it against the checksum stored in the ax_checksum.txt file.
|
|
33
|
+
|
|
34
|
+
Primarily, this function is used to verify data integrity after transferring it from a local PC to the remote
|
|
35
|
+
server for long-term storage. This function is designed to do nothing if the checksum matches and to remove the
|
|
36
|
+
'telomere.bin' marker file if it does not.
|
|
37
|
+
|
|
38
|
+
Notes:
|
|
39
|
+
Removing the telomere.bin marker file from session's raw_data folder marks the session as incomplete, excluding
|
|
40
|
+
it from all further automatic processing.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
session_path: The path to the session directory to be verified. Note, the input session directory must contain
|
|
44
|
+
the 'raw_data' subdirectory.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
True if the checksum matches, False otherwise.
|
|
48
|
+
"""
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sl-shared-assets
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.0rc21
|
|
4
4
|
Summary: Stores assets shared between multiple Sun (NeuroAI) lab data pipelines.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
|
|
6
6
|
Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
|
|
7
|
-
Author: Ivan Kondratyev, Kushaan Gupta, Yuantao Deng
|
|
7
|
+
Author: Ivan Kondratyev, Kushaan Gupta, Yuantao Deng, Natalie Yeung
|
|
8
8
|
Maintainer-email: Ivan Kondratyev <ik278@cornell.edu>
|
|
9
9
|
License: GNU GENERAL PUBLIC LICENSE
|
|
10
10
|
Version 3, 29 June 2007
|
|
@@ -695,8 +695,10 @@ Requires-Dist: ataraxis-base-utilities<4,>=3
|
|
|
695
695
|
Requires-Dist: ataraxis-data-structures<4,>=3.1.1
|
|
696
696
|
Requires-Dist: ataraxis-time<4,>=3
|
|
697
697
|
Requires-Dist: click<9,>=8
|
|
698
|
-
Requires-Dist:
|
|
698
|
+
Requires-Dist: natsort<9,>=8
|
|
699
699
|
Requires-Dist: paramiko<4,>=3.5.1
|
|
700
|
+
Requires-Dist: polars<2,>=1
|
|
701
|
+
Requires-Dist: pyarrow<21,>=20
|
|
700
702
|
Requires-Dist: simple-slurm<1,>=0
|
|
701
703
|
Requires-Dist: tqdm<5,>=4
|
|
702
704
|
Requires-Dist: xxhash<4,>=3
|
|
@@ -717,8 +719,10 @@ Requires-Dist: types-tqdm<5,>=4; extra == 'conda'
|
|
|
717
719
|
Provides-Extra: condarun
|
|
718
720
|
Requires-Dist: appdirs<2,>=1; extra == 'condarun'
|
|
719
721
|
Requires-Dist: click<9,>=8; extra == 'condarun'
|
|
720
|
-
Requires-Dist:
|
|
722
|
+
Requires-Dist: natsort<9,>=8; extra == 'condarun'
|
|
721
723
|
Requires-Dist: paramiko<4,>=3.5.1; extra == 'condarun'
|
|
724
|
+
Requires-Dist: polars<2,>=1; extra == 'condarun'
|
|
725
|
+
Requires-Dist: pyarrow<21,>=20; extra == 'condarun'
|
|
722
726
|
Requires-Dist: tqdm<5,>=4; extra == 'condarun'
|
|
723
727
|
Provides-Extra: dev
|
|
724
728
|
Requires-Dist: ataraxis-automation<5,>=4; extra == 'dev'
|
|
@@ -781,6 +785,7 @@ acquisition and processing and provides the API for accessing the lab’s main c
|
|
|
781
785
|
|
|
782
786
|
- [Dependencies](#dependencies)
|
|
783
787
|
- [Installation](#installation)
|
|
788
|
+
- [Usage](#usage)
|
|
784
789
|
- [API Documentation](#api-documentation)
|
|
785
790
|
- [Versioning](#versioning)
|
|
786
791
|
- [Authors](#authors)
|
|
@@ -811,11 +816,22 @@ Use the following command to install the library using pip: ```pip install sl-sh
|
|
|
811
816
|
|
|
812
817
|
---
|
|
813
818
|
|
|
819
|
+
## Usage
|
|
820
|
+
|
|
821
|
+
All library components are intended to be used via other Sun lab libraries. Developers should study the API and CLI
|
|
822
|
+
documentation below to learn how to use library components in other Sun lab libraries.
|
|
823
|
+
|
|
824
|
+
---
|
|
825
|
+
|
|
814
826
|
## API Documentation
|
|
815
827
|
|
|
816
828
|
See the [API documentation](https://sl-shared-assets-api-docs.netlify.app/) for the
|
|
817
829
|
detailed description of the methods and classes exposed by components of this library.
|
|
818
830
|
|
|
831
|
+
**Note!** The API documentation includes important information about Command-Line-Interfaces (CLIs) exposed by this
|
|
832
|
+
library as part of installation into a Python environment. All users are highly encouraged to study the CLI
|
|
833
|
+
documentation to learn how to use library components via the terminal.
|
|
834
|
+
|
|
819
835
|
___
|
|
820
836
|
|
|
821
837
|
## Versioning
|
|
@@ -830,6 +846,7 @@ We use [semantic versioning](https://semver.org/) for this project. For the vers
|
|
|
830
846
|
- Ivan Kondratyev ([Inkaros](https://github.com/Inkaros))
|
|
831
847
|
- Kushaan Gupta ([kushaangupta](https://github.com/kushaangupta))
|
|
832
848
|
- Yuantao Deng ([YuantaoDeng](https://github.com/YuantaoDeng))
|
|
849
|
+
- Natalie Yeung
|
|
833
850
|
|
|
834
851
|
___
|
|
835
852
|
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
sl_shared_assets/__init__.py,sha256=rscR353jiyUQ-wpguTLOM-A5Lqr1ftQtuTan1D0AtR0,2196
|
|
2
|
+
sl_shared_assets/__init__.pyi,sha256=Ye6eY_y_l9CTqwbCf-OxXOphxXKfn9UJnuw09DdLEtc,2405
|
|
3
|
+
sl_shared_assets/cli.py,sha256=FRc452bUQCDMtMLVxIRr178pbuCVTYWplJAzumy0Ruo,14971
|
|
4
|
+
sl_shared_assets/cli.pyi,sha256=dmkg30arGgA3pLipiuQFb8jq2IPy53srE6CdFAaCdwc,5620
|
|
5
|
+
sl_shared_assets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
sl_shared_assets/data_classes/__init__.py,sha256=b0BwTAJCD1zbwjd2UdpXheq61q1sgBuYhtAL-GHA2h4,1835
|
|
7
|
+
sl_shared_assets/data_classes/__init__.pyi,sha256=e2082sm5pSw3bxNZGqwcRhuKLH8T7hcEo6kWtOZNDBU,1968
|
|
8
|
+
sl_shared_assets/data_classes/configuration_data.py,sha256=qCLQfG22qWQGX0Uc2WyWAsNsc4kkonMSmV0CRM7QfhQ,27826
|
|
9
|
+
sl_shared_assets/data_classes/configuration_data.pyi,sha256=dTgLSwgqmnIVpoDxN1mETX-cwwlBZ9PJ1jlFaktitbs,9413
|
|
10
|
+
sl_shared_assets/data_classes/runtime_data.py,sha256=Q7Ykf9hgrw1jYKXa53mn_LW8G2cPmLLuxgGkP6qQcc4,15483
|
|
11
|
+
sl_shared_assets/data_classes/runtime_data.pyi,sha256=PxaCbeF9COR4ri91pdzh7zVrqaz2KEDYB1EoLhZQC_c,6618
|
|
12
|
+
sl_shared_assets/data_classes/session_data.py,sha256=tQMt1i9Bloj5K9kYS1gkCdOtksd_8_nHFMZQHLaKCoE,36754
|
|
13
|
+
sl_shared_assets/data_classes/session_data.pyi,sha256=a3nPC42mQniUPk2HsAM1_DWUa-BfhzDljQfDQh2rSus,13071
|
|
14
|
+
sl_shared_assets/data_classes/surgery_data.py,sha256=qsMj3NkjhylAT9b_wHBY-1XwTu2xsZcZatdECmkA7Bs,7437
|
|
15
|
+
sl_shared_assets/data_classes/surgery_data.pyi,sha256=rf59lJ3tGSYKHQlEGXg75MnjajBwl0DYhL4TClAO4SM,2605
|
|
16
|
+
sl_shared_assets/server/__init__.py,sha256=nyX6-9ACcrQeRQOCNvBVrWSTHGjRPANIG_u0aq7HPTg,426
|
|
17
|
+
sl_shared_assets/server/__init__.pyi,sha256=7o99f8uf6NuBjMZjNAM1FX69Qbu5uBluRSAyaUWbXOU,263
|
|
18
|
+
sl_shared_assets/server/job.py,sha256=M7ZytqhluEV-YQPM9VQV3SqK-F9egQ3UcLc1SLBH4rA,7885
|
|
19
|
+
sl_shared_assets/server/job.pyi,sha256=cxgHMpuwHsJGf_ZcTSSa2tZNzeR_GxqlICOsYGV_oy0,5655
|
|
20
|
+
sl_shared_assets/server/server.py,sha256=NpqWVjvxdtxHZIooBVaKY2jEBDN3dGohBzrFA0TSKIU,9380
|
|
21
|
+
sl_shared_assets/server/server.pyi,sha256=uu-bs5LBuOF-AL7qroYFbGc1aWgBalm-uHUXzZNxheU,4144
|
|
22
|
+
sl_shared_assets/tools/__init__.py,sha256=N95ZPMz-_HdNPrbVieCFza-QSVS6BV2KRB4K1OLRttc,636
|
|
23
|
+
sl_shared_assets/tools/__init__.pyi,sha256=xeDF8itMc0JRgLrO_IN_9gW7cp_Ld-Gf-rjtrgWvQ2I,551
|
|
24
|
+
sl_shared_assets/tools/ascension_tools.py,sha256=kIqYGX9F8lRao_LaVOacIiT8J9SypTvarb9mgaI9ZPs,15387
|
|
25
|
+
sl_shared_assets/tools/ascension_tools.pyi,sha256=tQCDdWZ20ZjUjpMs8aGIN0yBg5ff3j6spi62b3Han4o,3755
|
|
26
|
+
sl_shared_assets/tools/packaging_tools.py,sha256=oY-EWuTiMfWobYllVZy0piGlVnTHCpPY1GF-WmqQdj4,7269
|
|
27
|
+
sl_shared_assets/tools/packaging_tools.pyi,sha256=vgGbAQCExwg-0A5F72MzEhzHxu97Nqg1yuz-5P89ycU,3118
|
|
28
|
+
sl_shared_assets/tools/project_management_tools.py,sha256=IV4xAfy3_SpV50Xvt7EdVPVLojp9di1rEmO4KoAgFtY,8740
|
|
29
|
+
sl_shared_assets/tools/project_management_tools.pyi,sha256=pQY2--Kn3pKSADsArDRmbwGR7JqHD_2qdB0LJBbW_xo,2735
|
|
30
|
+
sl_shared_assets/tools/transfer_tools.py,sha256=J26kwOp_NpPSY0-xu5FTw9udte-rm_mW1FJyaTNoqQI,6606
|
|
31
|
+
sl_shared_assets/tools/transfer_tools.pyi,sha256=FoH7eYZe7guGHfPr0MK5ggO62uXKwD2aJ7h1Bu7PaEE,3294
|
|
32
|
+
sl_shared_assets-1.0.0rc21.dist-info/METADATA,sha256=jPQkQ9cED7lsKrqOjYWL7iWzqDZbZ4PJz6IvJx9BoEM,48613
|
|
33
|
+
sl_shared_assets-1.0.0rc21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
34
|
+
sl_shared_assets-1.0.0rc21.dist-info/entry_points.txt,sha256=76c00fRS4IuXBP2xOBdvycT15Zen-lHiDg2FaSt-HB4,547
|
|
35
|
+
sl_shared_assets-1.0.0rc21.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
36
|
+
sl_shared_assets-1.0.0rc21.dist-info/RECORD,,
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
sl-ascend-tyche = sl_shared_assets.cli:ascend_tyche_directory
|
|
3
|
+
sl-create-experiment = sl_shared_assets.cli:generate_experiment_configuration_file
|
|
4
|
+
sl-create-project = sl_shared_assets.cli:generate_project_configuration_file
|
|
5
|
+
sl-create-server-credentials = sl_shared_assets.cli:generate_server_credentials_file
|
|
6
|
+
sl-create-system-config = sl_shared_assets.cli:generate_system_configuration_file
|
|
7
|
+
sl-project-manifest = sl_shared_assets.cli:generate_project_manifest_file
|
|
8
|
+
sl-verify-session = sl_shared_assets.cli:verify_session_integrity
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
"""This package provides the configuration classes used by the Sun lab maintained version of the suite2p library
|
|
2
|
-
(sl-suite2p package, https://github.com/Sun-Lab-NBB/suite2p) to process brain activity data within and across sessions
|
|
3
|
-
(days)."""
|
|
4
|
-
|
|
5
|
-
from .multi_day import MultiDayS2PConfiguration
|
|
6
|
-
from .single_day import SingleDayS2PConfiguration
|
|
7
|
-
|
|
8
|
-
__all__ = ["MultiDayS2PConfiguration", "SingleDayS2PConfiguration"]
|