sl-shared-assets 3.1.0__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

sl_shared_assets/cli.py CHANGED
@@ -128,7 +128,7 @@ def generate_project_manifest_file(
128
128
  generate_project_manifest(
129
129
  raw_project_directory=Path(project_path),
130
130
  output_directory=Path(output_directory),
131
- processed_project_directory=Path(project_processed_path) if project_processed_path else None,
131
+ processed_data_root=Path(project_processed_path) if project_processed_path else None,
132
132
  )
133
133
  # noinspection PyTypeChecker
134
134
  console.echo(message=f"Project {Path(project_path).stem} data manifest file: generated.", level=LogLevel.SUCCESS)
@@ -7,6 +7,7 @@ from datetime import datetime
7
7
 
8
8
  import pytz
9
9
  import polars as pl
10
+ from filelock import FileLock
10
11
  from ataraxis_base_utilities import console
11
12
 
12
13
  from ..data_classes import (
@@ -229,7 +230,7 @@ class ProjectManifest:
229
230
 
230
231
 
231
232
  def generate_project_manifest(
232
- raw_project_directory: Path, output_directory: Path, processed_project_directory: Path | None = None
233
+ raw_project_directory: Path, output_directory: Path, processed_data_root: Path | None = None
233
234
  ) -> None:
234
235
  """Builds and saves the project manifest .feather file under the specified output directory.
235
236
 
@@ -247,9 +248,9 @@ def generate_project_manifest(
247
248
  Args:
248
249
  raw_project_directory: The path to the root project directory used to store raw session data.
249
250
  output_directory: The path to the directory where to save the generated manifest file.
250
- processed_project_directory: The path to the root project directory used to store processed session data if it
251
- is different from the 'raw_project_directory'. Typically, this would be the case on remote compute server(s)
252
- and not on local machines.
251
+ processed_data_root: The path to the root directory (volume) used to store processed data for all Sun lab
252
+ projects if it is different from the parent of the 'raw_project_directory'. Typically, this would be the
253
+ case on remote compute server(s) and not on local machines.
253
254
  """
254
255
 
255
256
  if not raw_project_directory.exists():
@@ -288,139 +289,146 @@ def generate_project_manifest(
288
289
  "dataset": [], # Determines whether the session's data is ready to be integrated into a dataset.
289
290
  }
290
291
 
291
- # Loops over each session of every animal in the project and extracts session ID information and information
292
- # about which processing steps have been successfully applied to the session.
293
- for directory in session_directories:
294
- # Skips processing directories without files (sessions with empty raw-data directories)
295
- if len([file for file in directory.joinpath("raw_data").glob("*")]) == 0:
296
- continue
297
-
298
- # Instantiates the SessionData instance to resolve the paths to all session's data files and locations.
299
- session_data = SessionData.load(
300
- session_path=directory,
301
- processed_data_root=processed_project_directory,
302
- make_processed_data_directory=False,
303
- )
292
+ # Resolves the path to the manifest .feather file to be created and the .lock file for the generated manifest
293
+ manifest_path = output_directory.joinpath(f"{raw_project_directory.stem}_manifest.feather")
294
+ manifest_lock = manifest_path.with_suffix(manifest_path.suffix + ".lock")
295
+
296
+ # Acquires the lock
297
+ lock = FileLock(str(manifest_lock))
298
+ with lock.acquire(timeout=20.0):
299
+ # Loops over each session of every animal in the project and extracts session ID information and information
300
+ # about which processing steps have been successfully applied to the session.
301
+ for directory in session_directories:
302
+ # Skips processing directories without files (sessions with empty raw-data directories)
303
+ if len([file for file in directory.joinpath("raw_data").glob("*")]) == 0:
304
+ continue
305
+
306
+ # Instantiates the SessionData instance to resolve the paths to all session's data files and locations.
307
+ session_data = SessionData.load(
308
+ session_path=directory,
309
+ processed_data_root=processed_data_root,
310
+ make_processed_data_directory=False,
311
+ )
304
312
 
305
- # Fills the manifest dictionary with data for the processed session:
306
-
307
- # Extracts ID and data path information from the SessionData instance
308
- manifest["animal"].append(session_data.animal_id)
309
- manifest["session"].append(session_data.session_name)
310
- manifest["type"].append(session_data.session_type)
311
-
312
- # Parses session name into the date-time object to simplify working with date-time data in the future
313
- date_time_components = session_data.session_name.split("-")
314
- date_time = datetime(
315
- year=int(date_time_components[0]),
316
- month=int(date_time_components[1]),
317
- day=int(date_time_components[2]),
318
- hour=int(date_time_components[3]),
319
- minute=int(date_time_components[4]),
320
- second=int(date_time_components[5]),
321
- microsecond=int(date_time_components[6]),
322
- tzinfo=pytz.UTC,
323
- )
313
+ # Fills the manifest dictionary with data for the processed session:
314
+
315
+ # Extracts ID and data path information from the SessionData instance
316
+ manifest["animal"].append(session_data.animal_id)
317
+ manifest["session"].append(session_data.session_name)
318
+ manifest["type"].append(session_data.session_type)
319
+
320
+ # Parses session name into the date-time object to simplify working with date-time data in the future
321
+ date_time_components = session_data.session_name.split("-")
322
+ date_time = datetime(
323
+ year=int(date_time_components[0]),
324
+ month=int(date_time_components[1]),
325
+ day=int(date_time_components[2]),
326
+ hour=int(date_time_components[3]),
327
+ minute=int(date_time_components[4]),
328
+ second=int(date_time_components[5]),
329
+ microsecond=int(date_time_components[6]),
330
+ tzinfo=pytz.UTC,
331
+ )
324
332
 
325
- # Converts from UTC to EST / EDT for user convenience
326
- eastern = pytz.timezone("America/New_York")
327
- date_time = date_time.astimezone(eastern)
328
- manifest["date"].append(date_time)
333
+ # Converts from UTC to EST / EDT for user convenience
334
+ eastern = pytz.timezone("America/New_York")
335
+ date_time = date_time.astimezone(eastern)
336
+ manifest["date"].append(date_time)
329
337
 
330
- # Depending on the session type, instantiates the appropriate descriptor instance and uses it to read the
331
- # experimenter notes
332
- if session_data.session_type == SessionTypes.LICK_TRAINING:
333
- descriptor: LickTrainingDescriptor = LickTrainingDescriptor.from_yaml( # type: ignore
334
- file_path=session_data.raw_data.session_descriptor_path
335
- )
336
- manifest["notes"].append(descriptor.experimenter_notes)
337
- elif session_data.session_type == SessionTypes.RUN_TRAINING:
338
- descriptor: RunTrainingDescriptor = RunTrainingDescriptor.from_yaml( # type: ignore
339
- file_path=session_data.raw_data.session_descriptor_path
340
- )
341
- manifest["notes"].append(descriptor.experimenter_notes)
342
- elif session_data.session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
343
- descriptor: MesoscopeExperimentDescriptor = MesoscopeExperimentDescriptor.from_yaml( # type: ignore
344
- file_path=session_data.raw_data.session_descriptor_path
345
- )
346
- manifest["notes"].append(descriptor.experimenter_notes)
347
- elif session_data.session_type == SessionTypes.WINDOW_CHECKING:
348
- # sl-experiment version 3.0.0 added session descriptors to Window Checking runtimes. Since the file does not
349
- # exist in prior versions, this section is written to statically handle the discrepancy.
350
- try:
351
- descriptor: WindowCheckingDescriptor = WindowCheckingDescriptor.from_yaml( # type: ignore
338
+ # Depending on the session type, instantiates the appropriate descriptor instance and uses it to read the
339
+ # experimenter notes
340
+ if session_data.session_type == SessionTypes.LICK_TRAINING:
341
+ descriptor: LickTrainingDescriptor = LickTrainingDescriptor.from_yaml( # type: ignore
352
342
  file_path=session_data.raw_data.session_descriptor_path
353
343
  )
354
344
  manifest["notes"].append(descriptor.experimenter_notes)
355
- except Exception:
356
- manifest["notes"].append("N/A")
357
-
358
- # If the session raw_data folder contains the telomere.bin file, marks the session as complete.
359
- manifest["complete"].append(session_data.raw_data.telomere_path.exists())
360
-
361
- # Data verification status
362
- tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
363
- manifest["integrity"].append(tracker.is_complete)
364
-
365
- # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing is
366
- # disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its data may
367
- # be corrupted.
368
- if not manifest["complete"][-1] or not manifest["integrity"][-1]:
369
- manifest["suite2p"].append(False)
370
- manifest["dataset"].append(False)
371
- manifest["behavior"].append(False)
372
- manifest["video"].append(False)
373
- continue # Cycles to the next session
374
-
375
- # Suite2p (single-day) processing status.
376
- tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
377
- manifest["suite2p"].append(tracker.is_complete)
378
-
379
- # Behavior data processing status.
380
- tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
381
- manifest["behavior"].append(tracker.is_complete)
382
-
383
- # DeepLabCut (video) processing status.
384
- tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
385
- manifest["video"].append(tracker.is_complete)
386
-
387
- # Tracks whether the session's data is currently in the processing or dataset integration mode.
388
- manifest["dataset"].append(session_data.processed_data.p53_path.exists())
389
-
390
- # If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
391
- # them as strings. The latter options are primarily kept for compatibility with Tyche data
392
- animal_type: type[pl.UInt64] | type[pl.String]
393
- if all([str(animal).isdigit() for animal in manifest["animal"]]):
394
- # Converts all strings to integers
395
- manifest["animal"] = [int(animal) for animal in manifest["animal"]] # type: ignore
396
- animal_type = pl.UInt64 # Uint64 for future proofing
397
- else:
398
- animal_type = pl.String
399
-
400
- # Converts the manifest dictionary to a Polars Dataframe.
401
- schema = {
402
- "animal": animal_type,
403
- "date": pl.Datetime,
404
- "session": pl.String,
405
- "type": pl.String,
406
- "notes": pl.String,
407
- "complete": pl.UInt8,
408
- "integrity": pl.UInt8,
409
- "suite2p": pl.UInt8,
410
- "dataset": pl.UInt8,
411
- "behavior": pl.UInt8,
412
- "video": pl.UInt8,
413
- }
414
- df = pl.DataFrame(manifest, schema=schema, strict=False)
415
-
416
- # Sorts the DataFrame by animal and then session. Since we assign animal IDs sequentially and 'name' sessions based
417
- # on acquisition timestamps, the sort order is chronological.
418
- sorted_df = df.sort(["animal", "session"])
419
-
420
- # Saves the generated manifest to the project-specific manifest .feather file for further processing.
421
- sorted_df.write_ipc(
422
- file=output_directory.joinpath(f"{raw_project_directory.stem}_manifest.feather"), compression="lz4"
423
- )
345
+ elif session_data.session_type == SessionTypes.RUN_TRAINING:
346
+ descriptor: RunTrainingDescriptor = RunTrainingDescriptor.from_yaml( # type: ignore
347
+ file_path=session_data.raw_data.session_descriptor_path
348
+ )
349
+ manifest["notes"].append(descriptor.experimenter_notes)
350
+ elif session_data.session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
351
+ descriptor: MesoscopeExperimentDescriptor = MesoscopeExperimentDescriptor.from_yaml( # type: ignore
352
+ file_path=session_data.raw_data.session_descriptor_path
353
+ )
354
+ manifest["notes"].append(descriptor.experimenter_notes)
355
+ elif session_data.session_type == SessionTypes.WINDOW_CHECKING:
356
+ # sl-experiment version 3.0.0 added session descriptors to Window Checking runtimes. Since the file
357
+ # does not exist in prior versions, this section is written to statically handle the discrepancy.
358
+ try:
359
+ descriptor: WindowCheckingDescriptor = WindowCheckingDescriptor.from_yaml( # type: ignore
360
+ file_path=session_data.raw_data.session_descriptor_path
361
+ )
362
+ manifest["notes"].append(descriptor.experimenter_notes)
363
+ except Exception:
364
+ manifest["notes"].append("N/A")
365
+
366
+ # If the session raw_data folder contains the telomere.bin file, marks the session as complete.
367
+ manifest["complete"].append(session_data.raw_data.telomere_path.exists())
368
+
369
+ # Data verification status
370
+ tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
371
+ manifest["integrity"].append(tracker.is_complete)
372
+
373
+ # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing
374
+ # is disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its
375
+ # data may be corrupted.
376
+ if not manifest["complete"][-1] or not manifest["integrity"][-1]:
377
+ manifest["suite2p"].append(False)
378
+ manifest["dataset"].append(False)
379
+ manifest["behavior"].append(False)
380
+ manifest["video"].append(False)
381
+ continue # Cycles to the next session
382
+
383
+ # Suite2p (single-day) processing status.
384
+ tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
385
+ manifest["suite2p"].append(tracker.is_complete)
386
+
387
+ # Behavior data processing status.
388
+ tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
389
+ manifest["behavior"].append(tracker.is_complete)
390
+
391
+ # DeepLabCut (video) processing status.
392
+ tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
393
+ manifest["video"].append(tracker.is_complete)
394
+
395
+ # Tracks whether the session's data is currently in the processing or dataset integration mode.
396
+ manifest["dataset"].append(session_data.processed_data.p53_path.exists())
397
+
398
+ # If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
399
+ # them as strings. The latter options are primarily kept for compatibility with Tyche data
400
+ animal_type: type[pl.UInt64] | type[pl.String]
401
+ if all([str(animal).isdigit() for animal in manifest["animal"]]):
402
+ # Converts all strings to integers
403
+ manifest["animal"] = [int(animal) for animal in manifest["animal"]] # type: ignore
404
+ animal_type = pl.UInt64 # Uint64 for future proofing
405
+ else:
406
+ animal_type = pl.String
407
+
408
+ # Converts the manifest dictionary to a Polars Dataframe.
409
+ schema = {
410
+ "animal": animal_type,
411
+ "date": pl.Datetime,
412
+ "session": pl.String,
413
+ "type": pl.String,
414
+ "notes": pl.String,
415
+ "complete": pl.UInt8,
416
+ "integrity": pl.UInt8,
417
+ "suite2p": pl.UInt8,
418
+ "dataset": pl.UInt8,
419
+ "behavior": pl.UInt8,
420
+ "video": pl.UInt8,
421
+ }
422
+ df = pl.DataFrame(manifest, schema=schema, strict=False)
423
+
424
+ # Sorts the DataFrame by animal and then session. Since we assign animal IDs sequentially and 'name' sessions
425
+ # based on acquisition timestamps, the sort order is chronological.
426
+ sorted_df = df.sort(["animal", "session"])
427
+
428
+ # Saves the generated manifest to the project-specific manifest .feather file for further processing.
429
+ sorted_df.write_ipc(
430
+ file=output_directory.joinpath(f"{raw_project_directory.stem}_manifest.feather"), compression="lz4"
431
+ )
424
432
 
425
433
 
426
434
  def verify_session_checksum(
@@ -508,16 +516,10 @@ def verify_session_checksum(
508
516
  # the raw project directory.
509
517
  raw_directory = session_path.parents[1]
510
518
 
511
- # Depending on the processed_data_root configuration, determines the path for the project's processed
512
- # data directory.
513
- processed_directory: Path | None = None
514
- if processed_data_root is not None:
515
- processed_directory = processed_data_root.joinpath(session_data.project_name)
516
-
517
519
  # Generates the manifest file inside the root raw data project directory
518
520
  generate_project_manifest(
519
521
  raw_project_directory=session_path.parents[1],
520
- processed_project_directory=processed_directory,
522
+ processed_data_root=processed_data_root,
521
523
  output_directory=raw_directory,
522
524
  )
523
525
 
@@ -618,15 +620,9 @@ def resolve_p53_marker(
618
620
  # the raw project directory.
619
621
  raw_directory = session_path.parents[1]
620
622
 
621
- # Depending on the processed_data_root configuration, determines the path for the project's processed
622
- # data directory.
623
- processed_directory: Path | None = None
624
- if processed_data_root is not None:
625
- processed_directory = processed_data_root.joinpath(session_data.project_name)
626
-
627
623
  # Generates the manifest file inside the root raw data project directory
628
624
  generate_project_manifest(
629
625
  raw_project_directory=session_path.parents[1],
630
- processed_project_directory=processed_directory,
626
+ processed_data_root=processed_data_root,
631
627
  output_directory=raw_directory,
632
628
  )
@@ -107,7 +107,7 @@ class ProjectManifest:
107
107
  """
108
108
 
109
109
  def generate_project_manifest(
110
- raw_project_directory: Path, output_directory: Path, processed_project_directory: Path | None = None
110
+ raw_project_directory: Path, output_directory: Path, processed_data_root: Path | None = None
111
111
  ) -> None:
112
112
  """Builds and saves the project manifest .feather file under the specified output directory.
113
113
 
@@ -125,9 +125,9 @@ def generate_project_manifest(
125
125
  Args:
126
126
  raw_project_directory: The path to the root project directory used to store raw session data.
127
127
  output_directory: The path to the directory where to save the generated manifest file.
128
- processed_project_directory: The path to the root project directory used to store processed session data if it
129
- is different from the 'raw_project_directory'. Typically, this would be the case on remote compute server(s)
130
- and not on local machines.
128
+ processed_data_root: The path to the root directory (volume) used to store processed data for all Sun lab
129
+ projects if it is different from the parent of the 'raw_project_directory'. Typically, this would be the
130
+ case on remote compute server(s) and not on local machines.
131
131
  """
132
132
 
133
133
  def verify_session_checksum(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sl-shared-assets
3
- Version: 3.1.0
3
+ Version: 3.1.2
4
4
  Summary: Provides data acquisition and processing assets shared between Sun (NeuroAI) lab libraries.
5
5
  Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
6
6
  Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
@@ -1,6 +1,6 @@
1
1
  sl_shared_assets/__init__.py,sha256=ybThh0XDtijjwahKkSEnnQ44rxrN2SVyjB5dHaXts0E,2391
2
2
  sl_shared_assets/__init__.pyi,sha256=Cb-umRqvnynk2udbgqAJ6h5_tiJyvVtWmx0kLKrL2Yg,2678
3
- sl_shared_assets/cli.py,sha256=OIwXf6pNPnzqzUPL7mSmEw17KIa3yAOpP0Mpo1Zpf88,19087
3
+ sl_shared_assets/cli.py,sha256=ERSU9iTLZxL0eIseQA-5d7eTBjlmfjHhEozWTg0dkJA,19079
4
4
  sl_shared_assets/cli.pyi,sha256=5hEbOnYaH4q5qdqJ-zhM9-ElzgcaBeMAX34tuHaUDos,5328
5
5
  sl_shared_assets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  sl_shared_assets/data_classes/__init__.py,sha256=mP__bBIIjMf0EETM4PgQzKy1ZKsjp6paRPNDWWbPRV4,1962
@@ -25,12 +25,12 @@ sl_shared_assets/tools/ascension_tools.py,sha256=xI-hrkR9NIgb7lyhj-ntc8tCYQvDEv6
25
25
  sl_shared_assets/tools/ascension_tools.pyi,sha256=fs5j7nbnZ4WpgK8D75A7WJcvFMwK_MUO9ULIYo1YkGo,3739
26
26
  sl_shared_assets/tools/packaging_tools.py,sha256=VxQoluGPDUWjPj1ftEt2dvUcdmj0g7T1frGZhZPM8NE,7541
27
27
  sl_shared_assets/tools/packaging_tools.pyi,sha256=vgGbAQCExwg-0A5F72MzEhzHxu97Nqg1yuz-5P89ycU,3118
28
- sl_shared_assets/tools/project_management_tools.py,sha256=vutKi0pdQn5dxBk1OcxPB4XspzQyJwzerNhGi4Vg4iw,31935
29
- sl_shared_assets/tools/project_management_tools.pyi,sha256=hdn0U9e3_j9McJH75Dzoas-FxcB9nVCTHEFHPofdLtg,11361
28
+ sl_shared_assets/tools/project_management_tools.py,sha256=vGCysu-mvK1JebDOnmyb7IvseLwpHY4T3XhAxcA-bJI,32153
29
+ sl_shared_assets/tools/project_management_tools.pyi,sha256=r45nLPP51mrtn0ajm9iSVq-aR37CS71DGZuRXqd29Zc,11377
30
30
  sl_shared_assets/tools/transfer_tools.py,sha256=vqYO4sERZV0W1DFNFnTpJA6QBZ4QJA94a2TyUhZW2Qk,6605
31
31
  sl_shared_assets/tools/transfer_tools.pyi,sha256=WtUGfaKV9FP_CnhBg_UvclpuDvOlEESOSMlEDtWpOLg,3293
32
- sl_shared_assets-3.1.0.dist-info/METADATA,sha256=SbnWSGHffTfwIaQGGP04zsSZ-T2yFga1jL79eLLoib8,56944
33
- sl_shared_assets-3.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
- sl_shared_assets-3.1.0.dist-info/entry_points.txt,sha256=UmO1rl7ly9N7HWPwWyP9E0b5KBUStpBo4TRoqNtizDY,430
35
- sl_shared_assets-3.1.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
36
- sl_shared_assets-3.1.0.dist-info/RECORD,,
32
+ sl_shared_assets-3.1.2.dist-info/METADATA,sha256=TOn08frNpw_CDhAGpYrjtIrxdcJ4TwMloOT4kqllvLU,56944
33
+ sl_shared_assets-3.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ sl_shared_assets-3.1.2.dist-info/entry_points.txt,sha256=UmO1rl7ly9N7HWPwWyP9E0b5KBUStpBo4TRoqNtizDY,430
35
+ sl_shared_assets-3.1.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
36
+ sl_shared_assets-3.1.2.dist-info/RECORD,,