sl-shared-assets 3.0.1__py3-none-any.whl → 3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sl-shared-assets might be problematic. Click here for more details.

@@ -8,6 +8,7 @@ Authors: Ivan Kondratyev (Inkaros), Kushaan Gupta, Natalie Yeung
8
8
  from ataraxis_base_utilities import console
9
9
 
10
10
  from .tools import (
11
+ ProjectManifest,
11
12
  resolve_p53_marker,
12
13
  transfer_directory,
13
14
  calculate_directory_checksum,
@@ -86,6 +87,7 @@ __all__ = [
86
87
  "AcquisitionSystems",
87
88
  "WindowCheckingDescriptor",
88
89
  # Tools package
90
+ "ProjectManifest",
89
91
  "resolve_p53_marker",
90
92
  "transfer_directory",
91
93
  "calculate_directory_checksum",
@@ -1,4 +1,5 @@
1
1
  from .tools import (
2
+ ProjectManifest as ProjectManifest,
2
3
  resolve_p53_marker as resolve_p53_marker,
3
4
  transfer_directory as transfer_directory,
4
5
  calculate_directory_checksum as calculate_directory_checksum,
@@ -75,6 +76,7 @@ __all__ = [
75
76
  "SessionTypes",
76
77
  "AcquisitionSystems",
77
78
  "WindowCheckingDescriptor",
79
+ "ProjectManifest",
78
80
  "resolve_p53_marker",
79
81
  "transfer_directory",
80
82
  "calculate_directory_checksum",
sl_shared_assets/cli.py CHANGED
@@ -43,8 +43,18 @@ from .data_classes import SessionData, ProcessingTracker
43
43
  "used if 'create_processed_directories' flag is True."
44
44
  ),
45
45
  )
46
+ @click.option(
47
+ "-um",
48
+ "--update_manifest",
49
+ is_flag=True,
50
+ help=(
51
+ "Determines whether to (re)generate the manifest file for the processed session's project. This flag "
52
+ "should always be enabled when this CLI is executed on the remote compute server(s) to ensure that the "
53
+ "manifest file always reflects the most actual state of each project."
54
+ ),
55
+ )
46
56
  def verify_session_integrity(
47
- session_path: Path, create_processed_directories: bool, processed_data_root: Path | None
57
+ session_path: Path, create_processed_directories: bool, processed_data_root: Path | None, update_manifest: bool
48
58
  ) -> None:
49
59
  """Checks the integrity of the target session's raw data (contents of the raw_data directory).
50
60
 
@@ -63,7 +73,10 @@ def verify_session_integrity(
63
73
 
64
74
  # Runs the verification process
65
75
  verify_session_checksum(
66
- session, create_processed_data_directory=create_processed_directories, processed_data_root=processed_data_root
76
+ session,
77
+ create_processed_data_directory=create_processed_directories,
78
+ processed_data_root=processed_data_root,
79
+ update_manifest=update_manifest,
67
80
  )
68
81
 
69
82
  # Checks the outcome of the verification process
@@ -437,8 +450,22 @@ def start_jupyter_server(
437
450
  "being integrated into any datasets."
438
451
  ),
439
452
  )
453
+ @click.option(
454
+ "-um",
455
+ "--update_manifest",
456
+ is_flag=True,
457
+ help=(
458
+ "Determines whether to (re)generate the manifest file for the processed session's project. This flag "
459
+ "should always be enabled when this CLI is executed on the remote compute server(s) to ensure that the "
460
+ "manifest file always reflects the most actual state of each project."
461
+ ),
462
+ )
440
463
  def resolve_dataset_marker(
441
- session_path: Path, create_processed_directories: bool, project_processed_path: Path | None, remove: bool
464
+ session_path: Path,
465
+ create_processed_directories: bool,
466
+ project_processed_path: Path | None,
467
+ remove: bool,
468
+ update_manifest: bool,
442
469
  ) -> None:
443
470
  """Depending on configuration, either creates or removes the p53.bin marker from the target session.
444
471
 
@@ -452,4 +479,5 @@ def resolve_dataset_marker(
452
479
  create_processed_data_directory=create_processed_directories,
453
480
  processed_data_root=project_processed_path,
454
481
  remove=remove,
482
+ update_manifest=update_manifest,
455
483
  )
sl_shared_assets/cli.pyi CHANGED
@@ -17,7 +17,7 @@ from .data_classes import (
17
17
  )
18
18
 
19
19
  def verify_session_integrity(
20
- session_path: Path, create_processed_directories: bool, processed_data_root: Path | None
20
+ session_path: Path, create_processed_directories: bool, processed_data_root: Path | None, update_manifest: bool
21
21
  ) -> None:
22
22
  """Checks the integrity of the target session's raw data (contents of the raw_data directory).
23
23
 
@@ -86,7 +86,11 @@ def start_jupyter_server(
86
86
  """
87
87
 
88
88
  def resolve_dataset_marker(
89
- session_path: Path, create_processed_directories: bool, project_processed_path: Path | None, remove: bool
89
+ session_path: Path,
90
+ create_processed_directories: bool,
91
+ project_processed_path: Path | None,
92
+ remove: bool,
93
+ update_manifest: bool,
90
94
  ) -> None:
91
95
  """Depending on configuration, either creates or removes the p53.bin marker from the target session.
92
96
 
@@ -249,7 +249,7 @@ class ProcessingTracker(YamlConfig):
249
249
  def __post_init__(self) -> None: ...
250
250
  def __del__(self) -> None:
251
251
  """If the instance as used to start a runtime, ensures that the instance properly marks the runtime as completed
252
- or erred before beign garbage-collected.
252
+ or erred before being garbage-collected.
253
253
 
254
254
  This is a security mechanism to prevent deadlocking the processed session and pipeline for future runtimes.
255
255
  """
@@ -4,9 +4,15 @@ integrity of the data. The tools from this package are used by most other data p
4
4
  from .transfer_tools import transfer_directory
5
5
  from .ascension_tools import ascend_tyche_data
6
6
  from .packaging_tools import calculate_directory_checksum
7
- from .project_management_tools import resolve_p53_marker, verify_session_checksum, generate_project_manifest
7
+ from .project_management_tools import (
8
+ ProjectManifest,
9
+ resolve_p53_marker,
10
+ verify_session_checksum,
11
+ generate_project_manifest,
12
+ )
8
13
 
9
14
  __all__ = [
15
+ "ProjectManifest",
10
16
  "transfer_directory",
11
17
  "calculate_directory_checksum",
12
18
  "ascend_tyche_data",
@@ -2,12 +2,14 @@ from .transfer_tools import transfer_directory as transfer_directory
2
2
  from .ascension_tools import ascend_tyche_data as ascend_tyche_data
3
3
  from .packaging_tools import calculate_directory_checksum as calculate_directory_checksum
4
4
  from .project_management_tools import (
5
+ ProjectManifest as ProjectManifest,
5
6
  resolve_p53_marker as resolve_p53_marker,
6
7
  verify_session_checksum as verify_session_checksum,
7
8
  generate_project_manifest as generate_project_manifest,
8
9
  )
9
10
 
10
11
  __all__ = [
12
+ "ProjectManifest",
11
13
  "transfer_directory",
12
14
  "calculate_directory_checksum",
13
15
  "ascend_tyche_data",
@@ -7,6 +7,7 @@ from datetime import datetime
7
7
 
8
8
  import pytz
9
9
  import polars as pl
10
+ from filelock import FileLock
10
11
  from ataraxis_base_utilities import console
11
12
 
12
13
  from ..data_classes import (
@@ -288,143 +289,153 @@ def generate_project_manifest(
288
289
  "dataset": [], # Determines whether the session's data is ready to be integrated into a dataset.
289
290
  }
290
291
 
291
- # Loops over each session of every animal in the project and extracts session ID information and information
292
- # about which processing steps have been successfully applied to the session.
293
- for directory in session_directories:
294
- # Skips processing directories without files (sessions with empty raw-data directories)
295
- if len([file for file in directory.joinpath("raw_data").glob("*")]) == 0:
296
- continue
297
-
298
- # Instantiates the SessionData instance to resolve the paths to all session's data files and locations.
299
- session_data = SessionData.load(
300
- session_path=directory,
301
- processed_data_root=processed_project_directory,
302
- make_processed_data_directory=False,
303
- )
292
+ # Resolves the path to the manifest .feather file to be created and the .lock file for the generated manifest
293
+ manifest_path = output_directory.joinpath(f"{raw_project_directory.stem}_manifest.feather")
294
+ manifest_lock = manifest_path.with_suffix(manifest_path.suffix + ".lock")
295
+
296
+ # Acquires the lock
297
+ lock = FileLock(str(manifest_lock))
298
+ with lock.acquire(timeout=20.0):
299
+ # Loops over each session of every animal in the project and extracts session ID information and information
300
+ # about which processing steps have been successfully applied to the session.
301
+ for directory in session_directories:
302
+ # Skips processing directories without files (sessions with empty raw-data directories)
303
+ if len([file for file in directory.joinpath("raw_data").glob("*")]) == 0:
304
+ continue
305
+
306
+ # Instantiates the SessionData instance to resolve the paths to all session's data files and locations.
307
+ session_data = SessionData.load(
308
+ session_path=directory,
309
+ processed_data_root=processed_project_directory,
310
+ make_processed_data_directory=False,
311
+ )
304
312
 
305
- # Fills the manifest dictionary with data for the processed session:
306
-
307
- # Extracts ID and data path information from the SessionData instance
308
- manifest["animal"].append(session_data.animal_id)
309
- manifest["session"].append(session_data.session_name)
310
- manifest["type"].append(session_data.session_type)
311
-
312
- # Parses session name into the date-time object to simplify working with date-time data in the future
313
- date_time_components = session_data.session_name.split("-")
314
- date_time = datetime(
315
- year=int(date_time_components[0]),
316
- month=int(date_time_components[1]),
317
- day=int(date_time_components[2]),
318
- hour=int(date_time_components[3]),
319
- minute=int(date_time_components[4]),
320
- second=int(date_time_components[5]),
321
- microsecond=int(date_time_components[6]),
322
- tzinfo=pytz.UTC,
323
- )
313
+ # Fills the manifest dictionary with data for the processed session:
314
+
315
+ # Extracts ID and data path information from the SessionData instance
316
+ manifest["animal"].append(session_data.animal_id)
317
+ manifest["session"].append(session_data.session_name)
318
+ manifest["type"].append(session_data.session_type)
319
+
320
+ # Parses session name into the date-time object to simplify working with date-time data in the future
321
+ date_time_components = session_data.session_name.split("-")
322
+ date_time = datetime(
323
+ year=int(date_time_components[0]),
324
+ month=int(date_time_components[1]),
325
+ day=int(date_time_components[2]),
326
+ hour=int(date_time_components[3]),
327
+ minute=int(date_time_components[4]),
328
+ second=int(date_time_components[5]),
329
+ microsecond=int(date_time_components[6]),
330
+ tzinfo=pytz.UTC,
331
+ )
324
332
 
325
- # Converts from UTC to EST / EDT for user convenience
326
- eastern = pytz.timezone("America/New_York")
327
- date_time = date_time.astimezone(eastern)
328
- manifest["date"].append(date_time)
333
+ # Converts from UTC to EST / EDT for user convenience
334
+ eastern = pytz.timezone("America/New_York")
335
+ date_time = date_time.astimezone(eastern)
336
+ manifest["date"].append(date_time)
329
337
 
330
- # Depending on the session type, instantiates the appropriate descriptor instance and uses it to read the
331
- # experimenter notes
332
- if session_data.session_type == SessionTypes.LICK_TRAINING:
333
- descriptor: LickTrainingDescriptor = LickTrainingDescriptor.from_yaml( # type: ignore
334
- file_path=session_data.raw_data.session_descriptor_path
335
- )
336
- manifest["notes"].append(descriptor.experimenter_notes)
337
- elif session_data.session_type == SessionTypes.RUN_TRAINING:
338
- descriptor: RunTrainingDescriptor = RunTrainingDescriptor.from_yaml( # type: ignore
339
- file_path=session_data.raw_data.session_descriptor_path
340
- )
341
- manifest["notes"].append(descriptor.experimenter_notes)
342
- elif session_data.session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
343
- descriptor: MesoscopeExperimentDescriptor = MesoscopeExperimentDescriptor.from_yaml( # type: ignore
344
- file_path=session_data.raw_data.session_descriptor_path
345
- )
346
- manifest["notes"].append(descriptor.experimenter_notes)
347
- elif session_data.session_type == SessionTypes.WINDOW_CHECKING:
348
- # sl-experiment version 3.0.0 added session descriptors to Window Checking runtimes. Since the file does not
349
- # exist in prior versions, this section is written to statically handle the discrepancy.
350
- try:
351
- descriptor: WindowCheckingDescriptor = WindowCheckingDescriptor.from_yaml( # type: ignore
338
+ # Depending on the session type, instantiates the appropriate descriptor instance and uses it to read the
339
+ # experimenter notes
340
+ if session_data.session_type == SessionTypes.LICK_TRAINING:
341
+ descriptor: LickTrainingDescriptor = LickTrainingDescriptor.from_yaml( # type: ignore
352
342
  file_path=session_data.raw_data.session_descriptor_path
353
343
  )
354
344
  manifest["notes"].append(descriptor.experimenter_notes)
355
- except Exception:
356
- manifest["notes"].append("N/A")
357
-
358
- # If the session raw_data folder contains the telomere.bin file, marks the session as complete.
359
- manifest["complete"].append(session_data.raw_data.telomere_path.exists())
360
-
361
- # Data verification status
362
- tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
363
- manifest["integrity"].append(tracker.is_complete)
364
-
365
- # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing is
366
- # disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its data may
367
- # be corrupted.
368
- if not manifest["complete"][-1] or not manifest["integrity"][-1]:
369
- manifest["suite2p"].append(False)
370
- manifest["dataset"].append(False)
371
- manifest["behavior"].append(False)
372
- manifest["video"].append(False)
373
- continue # Cycles to the next session
374
-
375
- # Suite2p (single-day) processing status.
376
- tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
377
- manifest["suite2p"].append(tracker.is_complete)
378
-
379
- # Behavior data processing status.
380
- tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
381
- manifest["behavior"].append(tracker.is_complete)
382
-
383
- # DeepLabCut (video) processing status.
384
- tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
385
- manifest["video"].append(tracker.is_complete)
386
-
387
- # Tracks whether the session's data is currently in the processing or dataset integration mode.
388
- manifest["dataset"].append(session_data.processed_data.p53_path.exists())
389
-
390
- # If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
391
- # them as strings. The latter options are primarily kept for compatibility with Tyche data
392
- animal_type: type[pl.UInt64] | type[pl.String]
393
- if all([str(animal).isdigit() for animal in manifest["animal"]]):
394
- # Converts all strings to integers
395
- manifest["animal"] = [int(animal) for animal in manifest["animal"]] # type: ignore
396
- animal_type = pl.UInt64 # Uint64 for future proofing
397
- else:
398
- animal_type = pl.String
399
-
400
- # Converts the manifest dictionary to a Polars Dataframe.
401
- schema = {
402
- "animal": animal_type,
403
- "date": pl.Datetime,
404
- "session": pl.String,
405
- "type": pl.String,
406
- "notes": pl.String,
407
- "complete": pl.UInt8,
408
- "integrity": pl.UInt8,
409
- "suite2p": pl.UInt8,
410
- "dataset": pl.UInt8,
411
- "behavior": pl.UInt8,
412
- "video": pl.UInt8,
413
- }
414
- df = pl.DataFrame(manifest, schema=schema, strict=False)
415
-
416
- # Sorts the DataFrame by animal and then session. Since we assign animal IDs sequentially and 'name' sessions based
417
- # on acquisition timestamps, the sort order is chronological.
418
- sorted_df = df.sort(["animal", "session"])
419
-
420
- # Saves the generated manifest to the project-specific manifest .feather file for further processing.
421
- sorted_df.write_ipc(
422
- file=output_directory.joinpath(f"{raw_project_directory.stem}_manifest.feather"), compression="lz4"
423
- )
345
+ elif session_data.session_type == SessionTypes.RUN_TRAINING:
346
+ descriptor: RunTrainingDescriptor = RunTrainingDescriptor.from_yaml( # type: ignore
347
+ file_path=session_data.raw_data.session_descriptor_path
348
+ )
349
+ manifest["notes"].append(descriptor.experimenter_notes)
350
+ elif session_data.session_type == SessionTypes.MESOSCOPE_EXPERIMENT:
351
+ descriptor: MesoscopeExperimentDescriptor = MesoscopeExperimentDescriptor.from_yaml( # type: ignore
352
+ file_path=session_data.raw_data.session_descriptor_path
353
+ )
354
+ manifest["notes"].append(descriptor.experimenter_notes)
355
+ elif session_data.session_type == SessionTypes.WINDOW_CHECKING:
356
+ # sl-experiment version 3.0.0 added session descriptors to Window Checking runtimes. Since the file
357
+ # does not exist in prior versions, this section is written to statically handle the discrepancy.
358
+ try:
359
+ descriptor: WindowCheckingDescriptor = WindowCheckingDescriptor.from_yaml( # type: ignore
360
+ file_path=session_data.raw_data.session_descriptor_path
361
+ )
362
+ manifest["notes"].append(descriptor.experimenter_notes)
363
+ except Exception:
364
+ manifest["notes"].append("N/A")
365
+
366
+ # If the session raw_data folder contains the telomere.bin file, marks the session as complete.
367
+ manifest["complete"].append(session_data.raw_data.telomere_path.exists())
368
+
369
+ # Data verification status
370
+ tracker = ProcessingTracker(file_path=session_data.raw_data.integrity_verification_tracker_path)
371
+ manifest["integrity"].append(tracker.is_complete)
372
+
373
+ # If the session is incomplete or unverified, marks all processing steps as FALSE, as automatic processing
374
+ # is disabled for incomplete sessions. If the session is unverified, the case is even more severe, as its
375
+ # data may be corrupted.
376
+ if not manifest["complete"][-1] or not manifest["integrity"][-1]:
377
+ manifest["suite2p"].append(False)
378
+ manifest["dataset"].append(False)
379
+ manifest["behavior"].append(False)
380
+ manifest["video"].append(False)
381
+ continue # Cycles to the next session
382
+
383
+ # Suite2p (single-day) processing status.
384
+ tracker = ProcessingTracker(file_path=session_data.processed_data.suite2p_processing_tracker_path)
385
+ manifest["suite2p"].append(tracker.is_complete)
386
+
387
+ # Behavior data processing status.
388
+ tracker = ProcessingTracker(file_path=session_data.processed_data.behavior_processing_tracker_path)
389
+ manifest["behavior"].append(tracker.is_complete)
390
+
391
+ # DeepLabCut (video) processing status.
392
+ tracker = ProcessingTracker(file_path=session_data.processed_data.video_processing_tracker_path)
393
+ manifest["video"].append(tracker.is_complete)
394
+
395
+ # Tracks whether the session's data is currently in the processing or dataset integration mode.
396
+ manifest["dataset"].append(session_data.processed_data.p53_path.exists())
397
+
398
+ # If all animal IDs are integer-convertible, stores them as numbers to promote proper sorting. Otherwise, stores
399
+ # them as strings. The latter options are primarily kept for compatibility with Tyche data
400
+ animal_type: type[pl.UInt64] | type[pl.String]
401
+ if all([str(animal).isdigit() for animal in manifest["animal"]]):
402
+ # Converts all strings to integers
403
+ manifest["animal"] = [int(animal) for animal in manifest["animal"]] # type: ignore
404
+ animal_type = pl.UInt64 # Uint64 for future proofing
405
+ else:
406
+ animal_type = pl.String
407
+
408
+ # Converts the manifest dictionary to a Polars Dataframe.
409
+ schema = {
410
+ "animal": animal_type,
411
+ "date": pl.Datetime,
412
+ "session": pl.String,
413
+ "type": pl.String,
414
+ "notes": pl.String,
415
+ "complete": pl.UInt8,
416
+ "integrity": pl.UInt8,
417
+ "suite2p": pl.UInt8,
418
+ "dataset": pl.UInt8,
419
+ "behavior": pl.UInt8,
420
+ "video": pl.UInt8,
421
+ }
422
+ df = pl.DataFrame(manifest, schema=schema, strict=False)
423
+
424
+ # Sorts the DataFrame by animal and then session. Since we assign animal IDs sequentially and 'name' sessions
425
+ # based on acquisition timestamps, the sort order is chronological.
426
+ sorted_df = df.sort(["animal", "session"])
427
+
428
+ # Saves the generated manifest to the project-specific manifest .feather file for further processing.
429
+ sorted_df.write_ipc(
430
+ file=output_directory.joinpath(f"{raw_project_directory.stem}_manifest.feather"), compression="lz4"
431
+ )
424
432
 
425
433
 
426
434
  def verify_session_checksum(
427
- session_path: Path, create_processed_data_directory: bool = True, processed_data_root: None | Path = None
435
+ session_path: Path,
436
+ create_processed_data_directory: bool = True,
437
+ processed_data_root: None | Path = None,
438
+ update_manifest: bool = False,
428
439
  ) -> None:
429
440
  """Verifies the integrity of the session's raw data by generating the checksum of the raw_data directory and
430
441
  comparing it against the checksum stored in the ax_checksum.txt file.
@@ -440,6 +451,9 @@ def verify_session_checksum(
440
451
  This function is also used to create the processed data hierarchy on the BioHPC server, when it is called as
441
452
  part of the data preprocessing runtime performed by a data acquisition system.
442
453
 
454
+ Since version 3.1.0, this functon also supports (re) generating the processed session's project manifest file,
455
+ which is used to support further Sun lab data processing pipelines.
456
+
443
457
  Args:
444
458
  session_path: The path to the session directory to be verified. Note, the input session directory must contain
445
459
  the 'raw_data' subdirectory.
@@ -447,6 +461,9 @@ def verify_session_checksum(
447
461
  processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
448
462
  the root directory where to store the processed data from all projects, and it will be automatically
449
463
  modified to include the project name, the animal name, and the session ID.
464
+ update_manifest: Determines whether to update (regenerate) the project manifest file for the processed session's
465
+ project. This should always be enabled when working with remote compute server(s) to ensure that the
466
+ project manifest file contains the most actual snapshot of the project's state.
450
467
  """
451
468
 
452
469
  # Loads session data layout. If configured to do so, also creates the processed data hierarchy
@@ -492,12 +509,27 @@ def verify_session_checksum(
492
509
  if tracker.is_running:
493
510
  tracker.error()
494
511
 
512
+ # If the runtime is configured to generate the project manifest file, attempts to generate and overwrite the
513
+ # existing manifest file for the target project.
514
+ if update_manifest:
515
+ # All sessions are stored under root/project/animal/session. Therefore, the grandparent of the session is
516
+ # the raw project directory.
517
+ raw_directory = session_path.parents[1]
518
+
519
+ # Generates the manifest file inside the root raw data project directory
520
+ generate_project_manifest(
521
+ raw_project_directory=session_path.parents[1],
522
+ processed_project_directory=processed_data_root,
523
+ output_directory=raw_directory,
524
+ )
525
+
495
526
 
496
527
  def resolve_p53_marker(
497
528
  session_path: Path,
498
529
  create_processed_data_directory: bool = True,
499
530
  processed_data_root: None | Path = None,
500
531
  remove: bool = False,
532
+ update_manifest: bool = False,
501
533
  ) -> None:
502
534
  """Depending on configuration, either creates or removes the p53.bin marker file for the target session.
503
535
 
@@ -511,7 +543,10 @@ def resolve_p53_marker(
511
543
 
512
544
  For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
513
545
  p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
514
- dataset integration. This is due to data access hierarchy limitations of the Sun lab BioHPC server.
546
+ dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
547
+
548
+ Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
549
+ which is used to support further Sun lab data processing pipelines.
515
550
 
516
551
  Args:
517
552
  session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
@@ -521,6 +556,9 @@ def resolve_p53_marker(
521
556
  the root directory where to store the processed data from all projects, and it will be automatically
522
557
  modified to include the project name, the animal name, and the session ID.
523
558
  remove: Determines whether this function is called to create or remove the p53.bin marker.
559
+ update_manifest: Determines whether to update (regenerate) the project manifest file for the processed session's
560
+ project. This should always be enabled when working with remote compute server(s) to ensure that the
561
+ project manifest file contains the most actual snapshot of the project's state.
524
562
  """
525
563
 
526
564
  # Loads session data layout. If configured to do so, also creates the processed data hierarchy
@@ -574,3 +612,17 @@ def resolve_p53_marker(
574
612
  # If the runtime reached this point, the session is eligible for dataset integration. Creates the p53.bin marker
575
613
  # file, preventing the session from being processed again as long as the marker exists.
576
614
  session_data.processed_data.p53_path.touch()
615
+
616
+ # If the runtime is configured to generate the project manifest file, attempts to generate and overwrite the
617
+ # existing manifest file for the target project.
618
+ if update_manifest:
619
+ # All sessions are stored under root/project/animal/session. Therefore, the grandparent of the session is
620
+ # the raw project directory.
621
+ raw_directory = session_path.parents[1]
622
+
623
+ # Generates the manifest file inside the root raw data project directory
624
+ generate_project_manifest(
625
+ raw_project_directory=session_path.parents[1],
626
+ processed_project_directory=processed_data_root,
627
+ output_directory=raw_directory,
628
+ )
@@ -131,7 +131,10 @@ def generate_project_manifest(
131
131
  """
132
132
 
133
133
  def verify_session_checksum(
134
- session_path: Path, create_processed_data_directory: bool = True, processed_data_root: None | Path = None
134
+ session_path: Path,
135
+ create_processed_data_directory: bool = True,
136
+ processed_data_root: None | Path = None,
137
+ update_manifest: bool = False,
135
138
  ) -> None:
136
139
  """Verifies the integrity of the session's raw data by generating the checksum of the raw_data directory and
137
140
  comparing it against the checksum stored in the ax_checksum.txt file.
@@ -147,6 +150,9 @@ def verify_session_checksum(
147
150
  This function is also used to create the processed data hierarchy on the BioHPC server, when it is called as
148
151
  part of the data preprocessing runtime performed by a data acquisition system.
149
152
 
153
+ Since version 3.1.0, this functon also supports (re) generating the processed session's project manifest file,
154
+ which is used to support further Sun lab data processing pipelines.
155
+
150
156
  Args:
151
157
  session_path: The path to the session directory to be verified. Note, the input session directory must contain
152
158
  the 'raw_data' subdirectory.
@@ -154,6 +160,9 @@ def verify_session_checksum(
154
160
  processed_data_root: The root directory where to store the processed data hierarchy. This path has to point to
155
161
  the root directory where to store the processed data from all projects, and it will be automatically
156
162
  modified to include the project name, the animal name, and the session ID.
163
+ update_manifest: Determines whether to update (regenerate) the project manifest file for the processed session's
164
+ project. This should always be enabled when working with remote compute server(s) to ensure that the
165
+ project manifest file contains the most actual snapshot of the project's state.
157
166
  """
158
167
 
159
168
  def resolve_p53_marker(
@@ -161,6 +170,7 @@ def resolve_p53_marker(
161
170
  create_processed_data_directory: bool = True,
162
171
  processed_data_root: None | Path = None,
163
172
  remove: bool = False,
173
+ update_manifest: bool = False,
164
174
  ) -> None:
165
175
  """Depending on configuration, either creates or removes the p53.bin marker file for the target session.
166
176
 
@@ -174,7 +184,10 @@ def resolve_p53_marker(
174
184
 
175
185
  For the p53.bin marker to be created, the session must currently not undergo any processing. Removing the
176
186
  p53.bin marker does not have any dependencies and will be executed even if the session is currently undergoing
177
- dataset integration. This is due to data access hierarchy limitations of the Sun lab BioHPC server.
187
+ dataset integration. This is due to data access hierarchy limitations of the Sun lab compute server.
188
+
189
+ Since version 3.1.0, this functon also supports (re)generating the processed session's project manifest file,
190
+ which is used to support further Sun lab data processing pipelines.
178
191
 
179
192
  Args:
180
193
  session_path: The path to the session directory for which the p53.bin marker needs to be resolved. Note, the
@@ -184,4 +197,7 @@ def resolve_p53_marker(
184
197
  the root directory where to store the processed data from all projects, and it will be automatically
185
198
  modified to include the project name, the animal name, and the session ID.
186
199
  remove: Determines whether this function is called to create or remove the p53.bin marker.
200
+ update_manifest: Determines whether to update (regenerate) the project manifest file for the processed session's
201
+ project. This should always be enabled when working with remote compute server(s) to ensure that the
202
+ project manifest file contains the most actual snapshot of the project's state.
187
203
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sl-shared-assets
3
- Version: 3.0.1
3
+ Version: 3.1.1
4
4
  Summary: Provides data acquisition and processing assets shared between Sun (NeuroAI) lab libraries.
5
5
  Project-URL: Homepage, https://github.com/Sun-Lab-NBB/sl-shared-assets
6
6
  Project-URL: Documentation, https://sl-shared-assets-api-docs.netlify.app/
@@ -1,7 +1,7 @@
1
- sl_shared_assets/__init__.py,sha256=rCu1VYs2Lc1l0jqHO3UtfuymU0uY2ccxEn4UyscIut8,2347
2
- sl_shared_assets/__init__.pyi,sha256=WCWIS-I3ToP4XybNZAi3fA7j2CZ48dl9D-fmd7oZKCo,2615
3
- sl_shared_assets/cli.py,sha256=Af2yRujqZ01IzmXBqsMQmuz2yvRm4raKZ-y58j49fUI,18141
4
- sl_shared_assets/cli.pyi,sha256=hVxU1YlhWDz41GHaaH7rkbZz2slTMBmG83-7rg_geYk,5265
1
+ sl_shared_assets/__init__.py,sha256=ybThh0XDtijjwahKkSEnnQ44rxrN2SVyjB5dHaXts0E,2391
2
+ sl_shared_assets/__init__.pyi,sha256=Cb-umRqvnynk2udbgqAJ6h5_tiJyvVtWmx0kLKrL2Yg,2678
3
+ sl_shared_assets/cli.py,sha256=OIwXf6pNPnzqzUPL7mSmEw17KIa3yAOpP0Mpo1Zpf88,19087
4
+ sl_shared_assets/cli.pyi,sha256=5hEbOnYaH4q5qdqJ-zhM9-ElzgcaBeMAX34tuHaUDos,5328
5
5
  sl_shared_assets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  sl_shared_assets/data_classes/__init__.py,sha256=mP__bBIIjMf0EETM4PgQzKy1ZKsjp6paRPNDWWbPRV4,1962
7
7
  sl_shared_assets/data_classes/__init__.pyi,sha256=J7ZCH9qQ4qz-3Wq9ILdihlmK9zFR3iU1cpLcSaN45Y8,2238
@@ -10,7 +10,7 @@ sl_shared_assets/data_classes/configuration_data.pyi,sha256=1_kmBDPGkHmVwXEGYR_3
10
10
  sl_shared_assets/data_classes/runtime_data.py,sha256=kmXTUk5rDAJBN3XrYLYgusJRfVJ5WiBBk0RPNiSk2pE,16725
11
11
  sl_shared_assets/data_classes/runtime_data.pyi,sha256=Hyc-dBePM0xIGgkSIoKmwwUUmdOokm1LUwy1OHHalyU,6771
12
12
  sl_shared_assets/data_classes/session_data.py,sha256=YKXako1sNB87LDkGEXx9WZFs6lG3aD619lga5g4L4Ks,49172
13
- sl_shared_assets/data_classes/session_data.pyi,sha256=F_6UAzEiPJjxfHQaOt_pttjSUvSggq1FT48GAHmmcbA,15959
13
+ sl_shared_assets/data_classes/session_data.pyi,sha256=CgB4nIDBl4bY1JvcIILfFTlos3ukl3WK4AOaku4CL3Y,15959
14
14
  sl_shared_assets/data_classes/surgery_data.py,sha256=5B1OPKFq4bnzbAoe-_c5dFV3kbSD5YFzXbX2zXmfGs8,7485
15
15
  sl_shared_assets/data_classes/surgery_data.pyi,sha256=rf59lJ3tGSYKHQlEGXg75MnjajBwl0DYhL4TClAO4SM,2605
16
16
  sl_shared_assets/server/__init__.py,sha256=GOQ7wWjiS5Xg_WgTqeEqCTRF9ms9GXx0nffCr-BmKsA,453
@@ -19,18 +19,18 @@ sl_shared_assets/server/job.py,sha256=wZbppMrv6fqch79bKLjOGQ9AYfjiDKDnTyUe7xgAT4
19
19
  sl_shared_assets/server/job.pyi,sha256=wop4ulVY2u6eb3twajeA9MS0EAtNb89aA56pPoGF1Xc,11673
20
20
  sl_shared_assets/server/server.py,sha256=oEwdXisyel72Hdk7ZpEwTPq3Lu64UbQWfGHArV8Y6nI,32978
21
21
  sl_shared_assets/server/server.pyi,sha256=84XFtqU9fYbxu6Ldf-OMB2nFe6wdGneZM1MFtR9rz4s,15133
22
- sl_shared_assets/tools/__init__.py,sha256=NktXk62E_HHOrO_93z_MVmSd6-Oir3mE4xE9Yr8Qa7U,682
23
- sl_shared_assets/tools/__init__.pyi,sha256=0UXorfCXXmHQOP5z7hODpsqEX0DAkOta5VZqN6FSS-w,623
22
+ sl_shared_assets/tools/__init__.py,sha256=i-oUVw_un3lzyyII4Sc75s4BnUfZh_aUbQe6dP2Vrbc,743
23
+ sl_shared_assets/tools/__init__.pyi,sha256=pi-5AJyQYeuqIFGWpJ_HhUpXLq6P_nItIqDhsdaIJFU,686
24
24
  sl_shared_assets/tools/ascension_tools.py,sha256=xI-hrkR9NIgb7lyhj-ntc8tCYQvDEv6YgYJXl1yvxCs,14639
25
25
  sl_shared_assets/tools/ascension_tools.pyi,sha256=fs5j7nbnZ4WpgK8D75A7WJcvFMwK_MUO9ULIYo1YkGo,3739
26
26
  sl_shared_assets/tools/packaging_tools.py,sha256=VxQoluGPDUWjPj1ftEt2dvUcdmj0g7T1frGZhZPM8NE,7541
27
27
  sl_shared_assets/tools/packaging_tools.pyi,sha256=vgGbAQCExwg-0A5F72MzEhzHxu97Nqg1yuz-5P89ycU,3118
28
- sl_shared_assets/tools/project_management_tools.py,sha256=_AQFLKqK22pqzMDPqNY5Wde0bRVvZLcTO2gYrhKD89M,28744
29
- sl_shared_assets/tools/project_management_tools.pyi,sha256=AeBG-8XUygiJndfsBCKACKIZdnvk0avQRibWO24ahtM,10238
28
+ sl_shared_assets/tools/project_management_tools.py,sha256=9G9jdeUxs80yLkcFJsWUdVcAcuXDB8reqnTfxVa-FAw,32161
29
+ sl_shared_assets/tools/project_management_tools.pyi,sha256=hdn0U9e3_j9McJH75Dzoas-FxcB9nVCTHEFHPofdLtg,11361
30
30
  sl_shared_assets/tools/transfer_tools.py,sha256=vqYO4sERZV0W1DFNFnTpJA6QBZ4QJA94a2TyUhZW2Qk,6605
31
31
  sl_shared_assets/tools/transfer_tools.pyi,sha256=WtUGfaKV9FP_CnhBg_UvclpuDvOlEESOSMlEDtWpOLg,3293
32
- sl_shared_assets-3.0.1.dist-info/METADATA,sha256=78zdy2sXUtOmroilVGfI7k2LmVnFKjkyYpVXP0LesMA,56944
33
- sl_shared_assets-3.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
- sl_shared_assets-3.0.1.dist-info/entry_points.txt,sha256=UmO1rl7ly9N7HWPwWyP9E0b5KBUStpBo4TRoqNtizDY,430
35
- sl_shared_assets-3.0.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
36
- sl_shared_assets-3.0.1.dist-info/RECORD,,
32
+ sl_shared_assets-3.1.1.dist-info/METADATA,sha256=eBMBDc_ahWT4kEFQH0X_i3U1cthEtn5O23x5VBr3EOQ,56944
33
+ sl_shared_assets-3.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ sl_shared_assets-3.1.1.dist-info/entry_points.txt,sha256=UmO1rl7ly9N7HWPwWyP9E0b5KBUStpBo4TRoqNtizDY,430
35
+ sl_shared_assets-3.1.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
36
+ sl_shared_assets-3.1.1.dist-info/RECORD,,