sl-shared-assets 4.0.0__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sl-shared-assets might be problematic. Click here for more details.
- sl_shared_assets/__init__.py +45 -42
- sl_shared_assets/command_line_interfaces/__init__.py +3 -0
- sl_shared_assets/command_line_interfaces/configure.py +173 -0
- sl_shared_assets/command_line_interfaces/manage.py +226 -0
- sl_shared_assets/data_classes/__init__.py +33 -32
- sl_shared_assets/data_classes/configuration_data.py +267 -79
- sl_shared_assets/data_classes/runtime_data.py +11 -11
- sl_shared_assets/data_classes/session_data.py +226 -289
- sl_shared_assets/data_classes/surgery_data.py +6 -6
- sl_shared_assets/server/__init__.py +24 -4
- sl_shared_assets/server/job.py +6 -7
- sl_shared_assets/server/pipeline.py +570 -0
- sl_shared_assets/server/server.py +57 -25
- sl_shared_assets/tools/__init__.py +9 -8
- sl_shared_assets/tools/packaging_tools.py +14 -25
- sl_shared_assets/tools/project_management_tools.py +602 -523
- sl_shared_assets/tools/transfer_tools.py +88 -23
- {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/METADATA +46 -202
- sl_shared_assets-5.0.0.dist-info/RECORD +23 -0
- sl_shared_assets-5.0.0.dist-info/entry_points.txt +3 -0
- sl_shared_assets/__init__.pyi +0 -91
- sl_shared_assets/cli.py +0 -500
- sl_shared_assets/cli.pyi +0 -106
- sl_shared_assets/data_classes/__init__.pyi +0 -75
- sl_shared_assets/data_classes/configuration_data.pyi +0 -235
- sl_shared_assets/data_classes/runtime_data.pyi +0 -157
- sl_shared_assets/data_classes/session_data.pyi +0 -379
- sl_shared_assets/data_classes/surgery_data.pyi +0 -89
- sl_shared_assets/server/__init__.pyi +0 -11
- sl_shared_assets/server/job.pyi +0 -205
- sl_shared_assets/server/server.pyi +0 -298
- sl_shared_assets/tools/__init__.pyi +0 -19
- sl_shared_assets/tools/ascension_tools.py +0 -265
- sl_shared_assets/tools/ascension_tools.pyi +0 -68
- sl_shared_assets/tools/packaging_tools.pyi +0 -58
- sl_shared_assets/tools/project_management_tools.pyi +0 -239
- sl_shared_assets/tools/transfer_tools.pyi +0 -53
- sl_shared_assets-4.0.0.dist-info/RECORD +0 -36
- sl_shared_assets-4.0.0.dist-info/entry_points.txt +0 -7
- {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/WHEEL +0 -0
- {sl_shared_assets-4.0.0.dist-info → sl_shared_assets-5.0.0.dist-info}/licenses/LICENSE +0 -0
sl_shared_assets/cli.py
DELETED
|
@@ -1,500 +0,0 @@
|
|
|
1
|
-
"""This module stores the Command-Line Interfaces (CLIs) exposes by the library as part of the installation process."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
import click
|
|
6
|
-
from ataraxis_base_utilities import LogLevel, console, ensure_directory_exists
|
|
7
|
-
|
|
8
|
-
from .tools import ascend_tyche_data, resolve_p53_marker, verify_session_checksum, generate_project_manifest
|
|
9
|
-
from .server import Server, JupyterJob, generate_server_credentials
|
|
10
|
-
from .data_classes import SessionData, TrackerFileNames, get_processing_tracker
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@click.command()
|
|
14
|
-
@click.option(
|
|
15
|
-
"-sp",
|
|
16
|
-
"--session_path",
|
|
17
|
-
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
18
|
-
required=True,
|
|
19
|
-
help="The absolute path to the session directory whose raw data needs to be verified for potential corruption.",
|
|
20
|
-
)
|
|
21
|
-
@click.option(
|
|
22
|
-
"-id",
|
|
23
|
-
"--manager_id",
|
|
24
|
-
required=True,
|
|
25
|
-
default=0,
|
|
26
|
-
show_default=True,
|
|
27
|
-
help=(
|
|
28
|
-
"The xxHash-64 hash value that represents the unique identifier for the process that manages this runtime. "
|
|
29
|
-
"This is primarily used when calling this CLI on remote compute servers to ensure that only a single process "
|
|
30
|
-
"can execute the CLI at a time."
|
|
31
|
-
),
|
|
32
|
-
)
|
|
33
|
-
@click.option(
|
|
34
|
-
"-c",
|
|
35
|
-
"--create_processed_directories",
|
|
36
|
-
is_flag=True,
|
|
37
|
-
show_default=True,
|
|
38
|
-
default=False,
|
|
39
|
-
help=(
|
|
40
|
-
"Determines whether to create the processed data hierarchy. This flag should be disabled for most runtimes. "
|
|
41
|
-
"Primarily, it is used by acquisition systems to generate processed data directories on the remote "
|
|
42
|
-
"compute servers as part of the data preprocessing pipeline."
|
|
43
|
-
),
|
|
44
|
-
)
|
|
45
|
-
@click.option(
|
|
46
|
-
"-pdr",
|
|
47
|
-
"--processed_data_root",
|
|
48
|
-
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
49
|
-
required=False,
|
|
50
|
-
help=(
|
|
51
|
-
"The absolute path to the directory where processed data from all projects is stored on the machine that runs "
|
|
52
|
-
"this command. This argument is used when calling the CLI on the BioHPC server, which uses different data "
|
|
53
|
-
"volumes for raw and processed data. Note, the input path must point to the root directory, as it will be "
|
|
54
|
-
"automatically modified to include the project name, the animal id, and the session ID. This argument is only "
|
|
55
|
-
"used if 'create_processed_directories' flag is True."
|
|
56
|
-
),
|
|
57
|
-
)
|
|
58
|
-
@click.option(
|
|
59
|
-
"-um",
|
|
60
|
-
"--update_manifest",
|
|
61
|
-
is_flag=True,
|
|
62
|
-
help=(
|
|
63
|
-
"Determines whether to (re)generate the manifest file for the processed session's project. This flag "
|
|
64
|
-
"should always be enabled when this CLI is executed on the remote compute server(s) to ensure that the "
|
|
65
|
-
"manifest file always reflects the most actual state of each project."
|
|
66
|
-
),
|
|
67
|
-
)
|
|
68
|
-
def verify_session_integrity(
|
|
69
|
-
session_path: Path,
|
|
70
|
-
manager_id: int,
|
|
71
|
-
create_processed_directories: bool,
|
|
72
|
-
processed_data_root: Path | None,
|
|
73
|
-
update_manifest: bool,
|
|
74
|
-
) -> None:
|
|
75
|
-
"""Checks the integrity of the target session's raw data (contents of the raw_data directory).
|
|
76
|
-
|
|
77
|
-
This command assumes that the data has been checksummed during acquisition and contains an ax_checksum.txt file
|
|
78
|
-
that stores the data checksum generated before transferring the data to the long-term storage destination. This
|
|
79
|
-
function always verified the integrity of the 'raw_data' directory. It does not work with 'processed_data' or any
|
|
80
|
-
other directories. If the session data was corrupted, the command removes the 'telomere.bin' file, marking the
|
|
81
|
-
session as 'incomplete' and automatically excluding it from all further automated processing runtimes. If the
|
|
82
|
-
session data is intact, it generates a 'verified.bin' marker file inside the session's raw_data folder.
|
|
83
|
-
|
|
84
|
-
The command is also used by Sun lab data acquisition systems to generate the processed data hierarchy for each
|
|
85
|
-
processed session. This use case is fully automated and should not be triggered manually by the user.
|
|
86
|
-
"""
|
|
87
|
-
session = Path(session_path)
|
|
88
|
-
session_data = SessionData.load(session_path=session)
|
|
89
|
-
|
|
90
|
-
# Runs the verification process
|
|
91
|
-
verify_session_checksum(
|
|
92
|
-
session_path=session,
|
|
93
|
-
manager_id=manager_id,
|
|
94
|
-
create_processed_data_directory=create_processed_directories,
|
|
95
|
-
processed_data_root=processed_data_root,
|
|
96
|
-
update_manifest=update_manifest,
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
# Checks the outcome of the verification process
|
|
100
|
-
tracker = get_processing_tracker(root=session_data.raw_data.raw_data_path, file_name=TrackerFileNames.INTEGRITY)
|
|
101
|
-
if tracker.is_complete:
|
|
102
|
-
# noinspection PyTypeChecker
|
|
103
|
-
console.echo(message=f"Session {session.stem} raw data integrity: Verified.", level=LogLevel.SUCCESS)
|
|
104
|
-
else:
|
|
105
|
-
# noinspection PyTypeChecker
|
|
106
|
-
console.echo(message=f"Session {session.stem} raw data integrity: Compromised!", level=LogLevel.ERROR)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
@click.command()
|
|
110
|
-
@click.option(
|
|
111
|
-
"-pp",
|
|
112
|
-
"--project_path",
|
|
113
|
-
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
114
|
-
required=True,
|
|
115
|
-
help="The absolute path to the project directory where raw session data is stored.",
|
|
116
|
-
)
|
|
117
|
-
@click.option(
|
|
118
|
-
"-od",
|
|
119
|
-
"--output_directory",
|
|
120
|
-
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
121
|
-
required=True,
|
|
122
|
-
help="The absolute path to the directory where to store the generated project manifest file.",
|
|
123
|
-
)
|
|
124
|
-
@click.option(
|
|
125
|
-
"-pdr",
|
|
126
|
-
"--processed_data_root",
|
|
127
|
-
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
128
|
-
required=False,
|
|
129
|
-
help=(
|
|
130
|
-
"The absolute path to the directory where processed data from all projects is stored on the machine that runs "
|
|
131
|
-
"this command. This argument is used when calling the CLI on the BioHPC server, which uses different data "
|
|
132
|
-
"volumes for raw and processed data. Note, the input path must point to the root directory, as it will be "
|
|
133
|
-
"automatically modified to include the project name. Note, if the system cannot properly resolve the path to "
|
|
134
|
-
"the processed data, the generated manifest will indicate that no data processing has been performed for the "
|
|
135
|
-
"project."
|
|
136
|
-
),
|
|
137
|
-
)
|
|
138
|
-
def generate_project_manifest_file(
|
|
139
|
-
project_path: Path, output_directory: Path, processed_data_root: Path | None
|
|
140
|
-
) -> None:
|
|
141
|
-
"""Generates the manifest .feather file that provides information about the data-processing state of all available
|
|
142
|
-
project sessions.
|
|
143
|
-
|
|
144
|
-
The manifest file is typically used when batch-processing session data on the remote compute server. It contains the
|
|
145
|
-
comprehensive snapshot of the available project's data in a table-compatible format that can also be transferred
|
|
146
|
-
between machines (as it is cached in a file).
|
|
147
|
-
"""
|
|
148
|
-
generate_project_manifest(
|
|
149
|
-
raw_project_directory=Path(project_path),
|
|
150
|
-
output_directory=Path(output_directory),
|
|
151
|
-
processed_data_root=Path(processed_data_root) if processed_data_root else None,
|
|
152
|
-
)
|
|
153
|
-
# noinspection PyTypeChecker
|
|
154
|
-
console.echo(message=f"Project {Path(project_path).stem} data manifest file: generated.", level=LogLevel.SUCCESS)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
@click.command()
|
|
158
|
-
@click.option(
|
|
159
|
-
"-od",
|
|
160
|
-
"--output_directory",
|
|
161
|
-
type=click.Path(exists=False, file_okay=False, dir_okay=True, path_type=Path),
|
|
162
|
-
required=True,
|
|
163
|
-
help="The absolute path to the directory where to store the generated server credentials file.",
|
|
164
|
-
)
|
|
165
|
-
@click.option(
|
|
166
|
-
"-h",
|
|
167
|
-
"--host",
|
|
168
|
-
type=str,
|
|
169
|
-
required=True,
|
|
170
|
-
show_default=True,
|
|
171
|
-
default="cbsuwsun.biohpc.cornell.edu",
|
|
172
|
-
help="The host name or IP address of the server to connect to.",
|
|
173
|
-
)
|
|
174
|
-
@click.option(
|
|
175
|
-
"-u",
|
|
176
|
-
"--username",
|
|
177
|
-
type=str,
|
|
178
|
-
required=True,
|
|
179
|
-
help="The username to use for server authentication.",
|
|
180
|
-
)
|
|
181
|
-
@click.option(
|
|
182
|
-
"-p",
|
|
183
|
-
"--password",
|
|
184
|
-
type=str,
|
|
185
|
-
required=True,
|
|
186
|
-
help="The password to use for server authentication.",
|
|
187
|
-
)
|
|
188
|
-
@click.option(
|
|
189
|
-
"-sr",
|
|
190
|
-
"--storage_root",
|
|
191
|
-
type=str,
|
|
192
|
-
required=True,
|
|
193
|
-
show_default=True,
|
|
194
|
-
default="/local/storage",
|
|
195
|
-
help=(
|
|
196
|
-
"The absolute path to to the root storage (slow) server directory. Typically, this is the path to the "
|
|
197
|
-
"top-level (root) directory of the HDD RAID volume."
|
|
198
|
-
),
|
|
199
|
-
)
|
|
200
|
-
@click.option(
|
|
201
|
-
"-wr",
|
|
202
|
-
"--working_root",
|
|
203
|
-
type=str,
|
|
204
|
-
required=True,
|
|
205
|
-
show_default=True,
|
|
206
|
-
default="/local/workdir",
|
|
207
|
-
help=(
|
|
208
|
-
"The absolute path to the root working (fast) server directory. Typically, this is the path to the top-level "
|
|
209
|
-
"(root) directory of the NVME RAID volume. If the server uses the same volume for both storage and working "
|
|
210
|
-
"directories, enter the same path under both 'storage_root' and 'working_root'."
|
|
211
|
-
),
|
|
212
|
-
)
|
|
213
|
-
@click.option(
|
|
214
|
-
"-sdn",
|
|
215
|
-
"--shared_directory_name",
|
|
216
|
-
type=str,
|
|
217
|
-
required=True,
|
|
218
|
-
show_default=True,
|
|
219
|
-
default="sun_data",
|
|
220
|
-
help=(
|
|
221
|
-
"The name of the shared directory used to store all Sun lab project data on the storage and working server "
|
|
222
|
-
"volumes."
|
|
223
|
-
),
|
|
224
|
-
)
|
|
225
|
-
def generate_server_credentials_file(
|
|
226
|
-
output_directory: Path,
|
|
227
|
-
host: str,
|
|
228
|
-
username: str,
|
|
229
|
-
password: str,
|
|
230
|
-
storage_root: str,
|
|
231
|
-
working_root: str,
|
|
232
|
-
shared_directory_name: str,
|
|
233
|
-
) -> None:
|
|
234
|
-
"""Generates a new server_credentials.yaml file under the specified directory, using input information.
|
|
235
|
-
|
|
236
|
-
This command is used to set up access to compute servers and clusters on new machines (PCs). The data stored inside
|
|
237
|
-
the server_credentials.yaml file generated by this command is used by the Server and Job classes used in many Sun
|
|
238
|
-
lab data processing libraries.
|
|
239
|
-
"""
|
|
240
|
-
|
|
241
|
-
# If necessary, generates the output directory hierarchy before creating the credentials' file.
|
|
242
|
-
ensure_directory_exists(output_directory)
|
|
243
|
-
|
|
244
|
-
# Generates the credentials' file
|
|
245
|
-
generate_server_credentials(
|
|
246
|
-
output_directory=Path(output_directory),
|
|
247
|
-
username=username,
|
|
248
|
-
password=password,
|
|
249
|
-
host=host,
|
|
250
|
-
storage_root=storage_root,
|
|
251
|
-
working_root=working_root,
|
|
252
|
-
shared_directory_name=shared_directory_name,
|
|
253
|
-
)
|
|
254
|
-
message = (
|
|
255
|
-
f"Server access credentials file: generated. If necessary, remember to edit the data acquisition system "
|
|
256
|
-
f"configuration file to include the path to the credentials file generated via this CLI."
|
|
257
|
-
)
|
|
258
|
-
# noinspection PyTypeChecker
|
|
259
|
-
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
260
|
-
message = f"File location: {output_directory}"
|
|
261
|
-
# noinspection PyTypeChecker
|
|
262
|
-
console.echo(message=message, level=LogLevel.SUCCESS)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
@click.command()
|
|
266
|
-
@click.option(
|
|
267
|
-
"-id",
|
|
268
|
-
"--input_directory",
|
|
269
|
-
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
270
|
-
required=True,
|
|
271
|
-
help="The absolute path to the directory that stores original Tyche animal folders.",
|
|
272
|
-
)
|
|
273
|
-
def ascend_tyche_directory(input_directory: Path) -> None:
|
|
274
|
-
"""Restructures old Tyche project data to use the modern Sun lab data structure and uploads them to the processing
|
|
275
|
-
server.
|
|
276
|
-
|
|
277
|
-
This command is used to convert ('ascend') the old Tyche project data to the modern Sun lab structure. After
|
|
278
|
-
ascension, the data can be processed and analyzed using all modern Sun lab (sl-) tools and libraries. Note, this
|
|
279
|
-
process expects the input data to be preprocessed using an old Sun lab mesoscope data preprocessing pipeline. It
|
|
280
|
-
will not work for any other project or data. Also, this command will only work on a machine (PC) that belongs to a
|
|
281
|
-
valid Sun lab data acquisition system, such as VRPC of the Mesoscope-VR system.
|
|
282
|
-
"""
|
|
283
|
-
ascend_tyche_data(root_directory=Path(input_directory))
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
@click.command()
|
|
287
|
-
@click.option(
|
|
288
|
-
"-cp",
|
|
289
|
-
"--credentials_path",
|
|
290
|
-
type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path),
|
|
291
|
-
required=True,
|
|
292
|
-
help=(
|
|
293
|
-
"The absolute path to the server_credentials.yaml file that stores access credentials for the target Sun lab "
|
|
294
|
-
"server. If necessary, use the 'sl-create-server-credentials' command to generate the file."
|
|
295
|
-
),
|
|
296
|
-
)
|
|
297
|
-
@click.option(
|
|
298
|
-
"-n",
|
|
299
|
-
"--name",
|
|
300
|
-
type=str,
|
|
301
|
-
required=True,
|
|
302
|
-
show_default=True,
|
|
303
|
-
default="jupyter_server",
|
|
304
|
-
help=(
|
|
305
|
-
"The descriptive name to be given to the remote Jupyter server job. Primarily, this is used to identify the "
|
|
306
|
-
"job inside the log files."
|
|
307
|
-
),
|
|
308
|
-
)
|
|
309
|
-
@click.option(
|
|
310
|
-
"-e",
|
|
311
|
-
"--environment",
|
|
312
|
-
type=str,
|
|
313
|
-
required=True,
|
|
314
|
-
help=(
|
|
315
|
-
"The name of the conda environment to use for running the Jupyter server. At a minimum, the target environment "
|
|
316
|
-
"must contain the 'jupyterlab' and the 'notebook' Python packages. Note, the user whose credentials are used "
|
|
317
|
-
"to connect to the server must have a configured conda / mamba shell that exposes the target environment for "
|
|
318
|
-
"the job to run as expected."
|
|
319
|
-
),
|
|
320
|
-
)
|
|
321
|
-
@click.option(
|
|
322
|
-
"-d",
|
|
323
|
-
"--directory",
|
|
324
|
-
type=click.Path(exists=False, file_okay=True, dir_okay=True, path_type=Path),
|
|
325
|
-
required=False,
|
|
326
|
-
help=(
|
|
327
|
-
"The absolute path to the server directory to use as the root directory for the jupyter session. If not "
|
|
328
|
-
"provided, this is automatically resolved to user's working directory. Note, during runtime, Jupyter will only "
|
|
329
|
-
"have access to files stored in or under that root directory."
|
|
330
|
-
),
|
|
331
|
-
)
|
|
332
|
-
@click.option(
|
|
333
|
-
"-c",
|
|
334
|
-
"--cores",
|
|
335
|
-
type=int,
|
|
336
|
-
required=True,
|
|
337
|
-
show_default=True,
|
|
338
|
-
default=2,
|
|
339
|
-
help=(
|
|
340
|
-
"The number of CPU cores to allocate to the Jupyter server. Note, during the interactive Jupyter runtime, it "
|
|
341
|
-
"is be impossible to use more than this number of CPU cores."
|
|
342
|
-
),
|
|
343
|
-
)
|
|
344
|
-
@click.option(
|
|
345
|
-
"-m",
|
|
346
|
-
"--memory",
|
|
347
|
-
type=int,
|
|
348
|
-
required=True,
|
|
349
|
-
show_default=True,
|
|
350
|
-
default=32,
|
|
351
|
-
help=(
|
|
352
|
-
"The RAM, in Gigabytes, to allocate to the Jupyter server. Note, during the interactive Jupyter runtime, it "
|
|
353
|
-
"is be impossible to use more than this amount of RAM."
|
|
354
|
-
),
|
|
355
|
-
)
|
|
356
|
-
@click.option(
|
|
357
|
-
"-t",
|
|
358
|
-
"--time",
|
|
359
|
-
type=int,
|
|
360
|
-
required=True,
|
|
361
|
-
show_default=True,
|
|
362
|
-
default=240,
|
|
363
|
-
help=(
|
|
364
|
-
"The maximum runtime duration for this Jupyter server instance, in minutes. If the server job is still running "
|
|
365
|
-
"at the end of this time limit, the job will be forcibly terminated by SLURM. Note, to prevent hogging the "
|
|
366
|
-
"server, make sure this parameter is always set to the smallest feasible period of time you intend to interact "
|
|
367
|
-
"with the server."
|
|
368
|
-
),
|
|
369
|
-
)
|
|
370
|
-
@click.option(
|
|
371
|
-
"-p",
|
|
372
|
-
"--port",
|
|
373
|
-
type=int,
|
|
374
|
-
required=True,
|
|
375
|
-
show_default=True,
|
|
376
|
-
default=0,
|
|
377
|
-
help=(
|
|
378
|
-
"The port to use for the Jupyter server communication on the remote server. Valid port values are from 8888 "
|
|
379
|
-
"to 9999. Most runtimes should leave this set to the default value (0), which will randomly select one of the "
|
|
380
|
-
"valid ports. Using random selection minimizes the chances of colliding with other interactive jupyter "
|
|
381
|
-
"sessions."
|
|
382
|
-
),
|
|
383
|
-
)
|
|
384
|
-
def start_jupyter_server(
|
|
385
|
-
credentials_path: Path, name: str, environment: str, directory: Path, cores: int, memory: int, time: int, port: int
|
|
386
|
-
) -> None:
|
|
387
|
-
"""Starts an interactive Jupyter session on the remote Sun lab server.
|
|
388
|
-
|
|
389
|
-
This command should be used to run Jupyter lab and notebooks sessions on the remote Sun lab server. Since all lab
|
|
390
|
-
data is stored on the server, this allows running light interactive analysis sessions on the same node as the data,
|
|
391
|
-
while leveraging considerable compute resources of the server.
|
|
392
|
-
|
|
393
|
-
Calling this command initializes a SLURM session that runs the interactive Jupyter server. Since this server
|
|
394
|
-
directly competes for resources with all other headless jobs running on the server, it is imperative that each
|
|
395
|
-
jupyter runtime uses only the minimum amount of resources and run-time as necessary. Do not use this command to run
|
|
396
|
-
heavy data processing pipelines! Instead, consult with library documentation and use the headless Job class.
|
|
397
|
-
"""
|
|
398
|
-
# Initializes server connection
|
|
399
|
-
server = Server(credentials_path)
|
|
400
|
-
job: JupyterJob | None = None
|
|
401
|
-
try:
|
|
402
|
-
# If the caller did not provide an explicit notebook directory, defaults to the user's working directory
|
|
403
|
-
if directory is None:
|
|
404
|
-
directory = (server.user_working_root,)
|
|
405
|
-
|
|
406
|
-
# Launches the specified Jupyter server
|
|
407
|
-
job = server.launch_jupyter_server(
|
|
408
|
-
job_name=name,
|
|
409
|
-
conda_environment=environment,
|
|
410
|
-
notebook_directory=directory,
|
|
411
|
-
cpus_to_use=cores,
|
|
412
|
-
ram_gb=memory,
|
|
413
|
-
port=port,
|
|
414
|
-
time_limit=time,
|
|
415
|
-
)
|
|
416
|
-
|
|
417
|
-
# Displays the server connection details to the user via terminal
|
|
418
|
-
job.print_connection_info()
|
|
419
|
-
|
|
420
|
-
# Blocks in-place until the user shuts down the server. This allows terminating the jupyter job early if the
|
|
421
|
-
# user is done working with the server
|
|
422
|
-
input("Enter anything to shut down the server: ")
|
|
423
|
-
|
|
424
|
-
# Ensures that the server created as part of this CLI is always terminated when the CLI terminates
|
|
425
|
-
finally:
|
|
426
|
-
# Terminates the server job
|
|
427
|
-
if job is not None:
|
|
428
|
-
server.abort_job(job)
|
|
429
|
-
|
|
430
|
-
# Closes the server connection if it is still open
|
|
431
|
-
server.close()
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
@click.command()
|
|
435
|
-
@click.option(
|
|
436
|
-
"-sp",
|
|
437
|
-
"--session_path",
|
|
438
|
-
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
439
|
-
required=True,
|
|
440
|
-
help="The absolute path to the session directory for which to resolve the dataset integration readiness marker.",
|
|
441
|
-
)
|
|
442
|
-
@click.option(
|
|
443
|
-
"-c",
|
|
444
|
-
"--create_processed_directories",
|
|
445
|
-
is_flag=True,
|
|
446
|
-
show_default=True,
|
|
447
|
-
default=False,
|
|
448
|
-
help="Determines whether to create the processed data hierarchy. This flag should be disabled for most runtimes.",
|
|
449
|
-
)
|
|
450
|
-
@click.option(
|
|
451
|
-
"-pdr",
|
|
452
|
-
"--processed_data_root",
|
|
453
|
-
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
|
|
454
|
-
required=False,
|
|
455
|
-
help=(
|
|
456
|
-
"The absolute path to the directory where processed data from all projects is stored on the machine that runs "
|
|
457
|
-
"this command. This argument is used when calling the CLI on the BioHPC server, which uses different data "
|
|
458
|
-
"volumes for raw and processed data. Note, the input path must point to the root directory, as it will be "
|
|
459
|
-
"automatically modified to include the project name."
|
|
460
|
-
),
|
|
461
|
-
)
|
|
462
|
-
@click.option(
|
|
463
|
-
"-r",
|
|
464
|
-
"--remove",
|
|
465
|
-
is_flag=True,
|
|
466
|
-
show_default=True,
|
|
467
|
-
default=False,
|
|
468
|
-
help="Determines whether the command should create or remove the dataset integration marker.",
|
|
469
|
-
)
|
|
470
|
-
@click.option(
|
|
471
|
-
"-um",
|
|
472
|
-
"--update_manifest",
|
|
473
|
-
is_flag=True,
|
|
474
|
-
help=(
|
|
475
|
-
"Determines whether to (re)generate the manifest file for the processed session's project. This flag "
|
|
476
|
-
"should always be enabled when this CLI is executed on the remote compute server(s) to ensure that the "
|
|
477
|
-
"manifest file always reflects the most actual state of each project."
|
|
478
|
-
),
|
|
479
|
-
)
|
|
480
|
-
def resolve_dataset_marker(
|
|
481
|
-
session_path: Path,
|
|
482
|
-
create_processed_directories: bool,
|
|
483
|
-
processed_data_root: Path | None,
|
|
484
|
-
remove: bool,
|
|
485
|
-
update_manifest: bool,
|
|
486
|
-
) -> None:
|
|
487
|
-
"""Depending on configuration, either creates or removes the p53.bin marker from the target session.
|
|
488
|
-
|
|
489
|
-
The p53.bin marker determines whether the session is ready for dataset integration. When the marker exists,
|
|
490
|
-
processing pipelines are not allowed to work with the session data, ensuring that all processed data remains
|
|
491
|
-
unchanged. If the marker does not exist, dataset integration pipelines are not allowed to work with the session
|
|
492
|
-
data, enabling processing pipelines to safely modify the data at any time.
|
|
493
|
-
"""
|
|
494
|
-
resolve_p53_marker(
|
|
495
|
-
session_path=session_path,
|
|
496
|
-
create_processed_data_directory=create_processed_directories,
|
|
497
|
-
processed_data_root=processed_data_root,
|
|
498
|
-
remove=remove,
|
|
499
|
-
update_manifest=update_manifest,
|
|
500
|
-
)
|
sl_shared_assets/cli.pyi
DELETED
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
from .tools import (
|
|
4
|
-
ascend_tyche_data as ascend_tyche_data,
|
|
5
|
-
resolve_p53_marker as resolve_p53_marker,
|
|
6
|
-
verify_session_checksum as verify_session_checksum,
|
|
7
|
-
generate_project_manifest as generate_project_manifest,
|
|
8
|
-
)
|
|
9
|
-
from .server import (
|
|
10
|
-
Server as Server,
|
|
11
|
-
JupyterJob as JupyterJob,
|
|
12
|
-
generate_server_credentials as generate_server_credentials,
|
|
13
|
-
)
|
|
14
|
-
from .data_classes import (
|
|
15
|
-
SessionData as SessionData,
|
|
16
|
-
TrackerFileNames as TrackerFileNames,
|
|
17
|
-
get_processing_tracker as get_processing_tracker,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
def verify_session_integrity(
|
|
21
|
-
session_path: Path,
|
|
22
|
-
manager_id: int,
|
|
23
|
-
create_processed_directories: bool,
|
|
24
|
-
processed_data_root: Path | None,
|
|
25
|
-
update_manifest: bool,
|
|
26
|
-
) -> None:
|
|
27
|
-
"""Checks the integrity of the target session's raw data (contents of the raw_data directory).
|
|
28
|
-
|
|
29
|
-
This command assumes that the data has been checksummed during acquisition and contains an ax_checksum.txt file
|
|
30
|
-
that stores the data checksum generated before transferring the data to the long-term storage destination. This
|
|
31
|
-
function always verified the integrity of the 'raw_data' directory. It does not work with 'processed_data' or any
|
|
32
|
-
other directories. If the session data was corrupted, the command removes the 'telomere.bin' file, marking the
|
|
33
|
-
session as 'incomplete' and automatically excluding it from all further automated processing runtimes. If the
|
|
34
|
-
session data is intact, it generates a 'verified.bin' marker file inside the session's raw_data folder.
|
|
35
|
-
|
|
36
|
-
The command is also used by Sun lab data acquisition systems to generate the processed data hierarchy for each
|
|
37
|
-
processed session. This use case is fully automated and should not be triggered manually by the user.
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
def generate_project_manifest_file(
|
|
41
|
-
project_path: Path, output_directory: Path, processed_data_root: Path | None
|
|
42
|
-
) -> None:
|
|
43
|
-
"""Generates the manifest .feather file that provides information about the data-processing state of all available
|
|
44
|
-
project sessions.
|
|
45
|
-
|
|
46
|
-
The manifest file is typically used when batch-processing session data on the remote compute server. It contains the
|
|
47
|
-
comprehensive snapshot of the available project's data in a table-compatible format that can also be transferred
|
|
48
|
-
between machines (as it is cached in a file).
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
def generate_server_credentials_file(
|
|
52
|
-
output_directory: Path,
|
|
53
|
-
host: str,
|
|
54
|
-
username: str,
|
|
55
|
-
password: str,
|
|
56
|
-
storage_root: str,
|
|
57
|
-
working_root: str,
|
|
58
|
-
shared_directory_name: str,
|
|
59
|
-
) -> None:
|
|
60
|
-
"""Generates a new server_credentials.yaml file under the specified directory, using input information.
|
|
61
|
-
|
|
62
|
-
This command is used to set up access to compute servers and clusters on new machines (PCs). The data stored inside
|
|
63
|
-
the server_credentials.yaml file generated by this command is used by the Server and Job classes used in many Sun
|
|
64
|
-
lab data processing libraries.
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
def ascend_tyche_directory(input_directory: Path) -> None:
|
|
68
|
-
"""Restructures old Tyche project data to use the modern Sun lab data structure and uploads them to the processing
|
|
69
|
-
server.
|
|
70
|
-
|
|
71
|
-
This command is used to convert ('ascend') the old Tyche project data to the modern Sun lab structure. After
|
|
72
|
-
ascension, the data can be processed and analyzed using all modern Sun lab (sl-) tools and libraries. Note, this
|
|
73
|
-
process expects the input data to be preprocessed using an old Sun lab mesoscope data preprocessing pipeline. It
|
|
74
|
-
will not work for any other project or data. Also, this command will only work on a machine (PC) that belongs to a
|
|
75
|
-
valid Sun lab data acquisition system, such as VRPC of the Mesoscope-VR system.
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
def start_jupyter_server(
|
|
79
|
-
credentials_path: Path, name: str, environment: str, directory: Path, cores: int, memory: int, time: int, port: int
|
|
80
|
-
) -> None:
|
|
81
|
-
"""Starts an interactive Jupyter session on the remote Sun lab server.
|
|
82
|
-
|
|
83
|
-
This command should be used to run Jupyter lab and notebooks sessions on the remote Sun lab server. Since all lab
|
|
84
|
-
data is stored on the server, this allows running light interactive analysis sessions on the same node as the data,
|
|
85
|
-
while leveraging considerable compute resources of the server.
|
|
86
|
-
|
|
87
|
-
Calling this command initializes a SLURM session that runs the interactive Jupyter server. Since this server
|
|
88
|
-
directly competes for resources with all other headless jobs running on the server, it is imperative that each
|
|
89
|
-
jupyter runtime uses only the minimum amount of resources and run-time as necessary. Do not use this command to run
|
|
90
|
-
heavy data processing pipelines! Instead, consult with library documentation and use the headless Job class.
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
def resolve_dataset_marker(
|
|
94
|
-
session_path: Path,
|
|
95
|
-
create_processed_directories: bool,
|
|
96
|
-
processed_data_root: Path | None,
|
|
97
|
-
remove: bool,
|
|
98
|
-
update_manifest: bool,
|
|
99
|
-
) -> None:
|
|
100
|
-
"""Depending on configuration, either creates or removes the p53.bin marker from the target session.
|
|
101
|
-
|
|
102
|
-
The p53.bin marker determines whether the session is ready for dataset integration. When the marker exists,
|
|
103
|
-
processing pipelines are not allowed to work with the session data, ensuring that all processed data remains
|
|
104
|
-
unchanged. If the marker does not exist, dataset integration pipelines are not allowed to work with the session
|
|
105
|
-
data, enabling processing pipelines to safely modify the data at any time.
|
|
106
|
-
"""
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
from .runtime_data import (
|
|
2
|
-
ZaberPositions as ZaberPositions,
|
|
3
|
-
MesoscopePositions as MesoscopePositions,
|
|
4
|
-
RunTrainingDescriptor as RunTrainingDescriptor,
|
|
5
|
-
LickTrainingDescriptor as LickTrainingDescriptor,
|
|
6
|
-
MesoscopeHardwareState as MesoscopeHardwareState,
|
|
7
|
-
WindowCheckingDescriptor as WindowCheckingDescriptor,
|
|
8
|
-
MesoscopeExperimentDescriptor as MesoscopeExperimentDescriptor,
|
|
9
|
-
)
|
|
10
|
-
from .session_data import (
|
|
11
|
-
RawData as RawData,
|
|
12
|
-
SessionData as SessionData,
|
|
13
|
-
SessionTypes as SessionTypes,
|
|
14
|
-
ProcessedData as ProcessedData,
|
|
15
|
-
TrackerFileNames as TrackerFileNames,
|
|
16
|
-
ProcessingTracker as ProcessingTracker,
|
|
17
|
-
generate_manager_id as generate_manager_id,
|
|
18
|
-
get_processing_tracker as get_processing_tracker,
|
|
19
|
-
)
|
|
20
|
-
from .surgery_data import (
|
|
21
|
-
DrugData as DrugData,
|
|
22
|
-
ImplantData as ImplantData,
|
|
23
|
-
SubjectData as SubjectData,
|
|
24
|
-
SurgeryData as SurgeryData,
|
|
25
|
-
InjectionData as InjectionData,
|
|
26
|
-
ProcedureData as ProcedureData,
|
|
27
|
-
)
|
|
28
|
-
from .configuration_data import (
|
|
29
|
-
MesoscopePaths as MesoscopePaths,
|
|
30
|
-
ExperimentState as ExperimentState,
|
|
31
|
-
ExperimentTrial as ExperimentTrial,
|
|
32
|
-
MesoscopeCameras as MesoscopeCameras,
|
|
33
|
-
AcquisitionSystems as AcquisitionSystems,
|
|
34
|
-
MesoscopeMicroControllers as MesoscopeMicroControllers,
|
|
35
|
-
MesoscopeAdditionalFirmware as MesoscopeAdditionalFirmware,
|
|
36
|
-
MesoscopeSystemConfiguration as MesoscopeSystemConfiguration,
|
|
37
|
-
MesoscopeExperimentConfiguration as MesoscopeExperimentConfiguration,
|
|
38
|
-
get_system_configuration_data as get_system_configuration_data,
|
|
39
|
-
set_system_configuration_file as set_system_configuration_file,
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
__all__ = [
|
|
43
|
-
"DrugData",
|
|
44
|
-
"ImplantData",
|
|
45
|
-
"SessionData",
|
|
46
|
-
"RawData",
|
|
47
|
-
"ProcessedData",
|
|
48
|
-
"SubjectData",
|
|
49
|
-
"SurgeryData",
|
|
50
|
-
"InjectionData",
|
|
51
|
-
"ProcedureData",
|
|
52
|
-
"ZaberPositions",
|
|
53
|
-
"ExperimentState",
|
|
54
|
-
"MesoscopePositions",
|
|
55
|
-
"MesoscopeHardwareState",
|
|
56
|
-
"RunTrainingDescriptor",
|
|
57
|
-
"LickTrainingDescriptor",
|
|
58
|
-
"MesoscopeExperimentConfiguration",
|
|
59
|
-
"MesoscopeExperimentDescriptor",
|
|
60
|
-
"MesoscopeSystemConfiguration",
|
|
61
|
-
"set_system_configuration_file",
|
|
62
|
-
"get_system_configuration_data",
|
|
63
|
-
"MesoscopePaths",
|
|
64
|
-
"MesoscopeCameras",
|
|
65
|
-
"MesoscopeMicroControllers",
|
|
66
|
-
"MesoscopeAdditionalFirmware",
|
|
67
|
-
"ProcessingTracker",
|
|
68
|
-
"ExperimentTrial",
|
|
69
|
-
"AcquisitionSystems",
|
|
70
|
-
"SessionTypes",
|
|
71
|
-
"WindowCheckingDescriptor",
|
|
72
|
-
"get_processing_tracker",
|
|
73
|
-
"generate_manager_id",
|
|
74
|
-
"TrackerFileNames",
|
|
75
|
-
]
|