lsst-pipe-base 29.2025.3900__py3-none-any.whl → 29.2025.4100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_task_metadata.py +15 -0
- lsst/pipe/base/dot_tools.py +14 -152
- lsst/pipe/base/exec_fixup_data_id.py +17 -44
- lsst/pipe/base/execution_graph_fixup.py +49 -18
- lsst/pipe/base/graph/_versionDeserializers.py +6 -5
- lsst/pipe/base/graph/graph.py +30 -10
- lsst/pipe/base/graph/graphSummary.py +30 -0
- lsst/pipe/base/graph_walker.py +119 -0
- lsst/pipe/base/log_capture.py +5 -2
- lsst/pipe/base/mermaid_tools.py +11 -64
- lsst/pipe/base/mp_graph_executor.py +298 -236
- lsst/pipe/base/pipeline_graph/io.py +1 -1
- lsst/pipe/base/quantum_graph/__init__.py +32 -0
- lsst/pipe/base/quantum_graph/_common.py +632 -0
- lsst/pipe/base/quantum_graph/_multiblock.py +808 -0
- lsst/pipe/base/quantum_graph/_predicted.py +1950 -0
- lsst/pipe/base/quantum_graph/visualization.py +302 -0
- lsst/pipe/base/quantum_graph_builder.py +292 -34
- lsst/pipe/base/quantum_graph_executor.py +2 -1
- lsst/pipe/base/quantum_provenance_graph.py +16 -7
- lsst/pipe/base/quantum_reports.py +45 -0
- lsst/pipe/base/separable_pipeline_executor.py +126 -15
- lsst/pipe/base/simple_pipeline_executor.py +44 -43
- lsst/pipe/base/single_quantum_executor.py +1 -40
- lsst/pipe/base/tests/mocks/__init__.py +1 -1
- lsst/pipe/base/tests/mocks/_pipeline_task.py +16 -1
- lsst/pipe/base/tests/mocks/{_in_memory_repo.py → _repo.py} +324 -45
- lsst/pipe/base/tests/mocks/_storage_class.py +51 -0
- lsst/pipe/base/tests/simpleQGraph.py +11 -5
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/METADATA +2 -1
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/RECORD +40 -34
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/zip-safe +0 -0
|
@@ -40,9 +40,11 @@ __all__ = (
|
|
|
40
40
|
)
|
|
41
41
|
|
|
42
42
|
import dataclasses
|
|
43
|
+
import operator
|
|
43
44
|
from abc import ABC, abstractmethod
|
|
45
|
+
from collections import defaultdict
|
|
44
46
|
from collections.abc import Iterable, Mapping, Sequence
|
|
45
|
-
from typing import TYPE_CHECKING, Any, final
|
|
47
|
+
from typing import TYPE_CHECKING, Any, cast, final
|
|
46
48
|
|
|
47
49
|
from lsst.daf.butler import (
|
|
48
50
|
Butler,
|
|
@@ -50,11 +52,13 @@ from lsst.daf.butler import (
|
|
|
50
52
|
DataCoordinate,
|
|
51
53
|
DatasetRef,
|
|
52
54
|
DatasetType,
|
|
55
|
+
DimensionDataAttacher,
|
|
53
56
|
DimensionUniverse,
|
|
54
57
|
NamedKeyDict,
|
|
55
58
|
NamedKeyMapping,
|
|
56
59
|
Quantum,
|
|
57
60
|
)
|
|
61
|
+
from lsst.daf.butler._rubin import generate_uuidv7
|
|
58
62
|
from lsst.daf.butler.datastore.record_data import DatastoreRecordData
|
|
59
63
|
from lsst.daf.butler.registry import MissingCollectionError, MissingDatasetTypeError
|
|
60
64
|
from lsst.utils.logging import LsstLogAdapter, getLogger
|
|
@@ -64,8 +68,7 @@ from . import automatic_connection_constants as acc
|
|
|
64
68
|
from ._status import NoWorkFound
|
|
65
69
|
from ._task_metadata import TaskMetadata
|
|
66
70
|
from .connections import AdjustQuantumHelper, QuantaAdjuster
|
|
67
|
-
from .
|
|
68
|
-
from .pipeline_graph import PipelineGraph, TaskNode
|
|
71
|
+
from .pipeline_graph import Edge, PipelineGraph, TaskNode
|
|
69
72
|
from .prerequisite_helpers import PrerequisiteInfo, SkyPixBoundsBuilder, TimespanBuilder
|
|
70
73
|
from .quantum_graph_skeleton import (
|
|
71
74
|
DatasetKey,
|
|
@@ -76,7 +79,9 @@ from .quantum_graph_skeleton import (
|
|
|
76
79
|
)
|
|
77
80
|
|
|
78
81
|
if TYPE_CHECKING:
|
|
82
|
+
from .graph import QuantumGraph
|
|
79
83
|
from .pipeline import TaskDef
|
|
84
|
+
from .quantum_graph import PredictedDatasetModel, PredictedQuantumGraphComponents
|
|
80
85
|
|
|
81
86
|
|
|
82
87
|
class QuantumGraphBuilderError(Exception):
|
|
@@ -310,7 +315,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
310
315
|
def build(
|
|
311
316
|
self, metadata: Mapping[str, Any] | None = None, attach_datastore_records: bool = True
|
|
312
317
|
) -> QuantumGraph:
|
|
313
|
-
"""Build the quantum graph.
|
|
318
|
+
"""Build the quantum graph, returning an old `QuantumGraph` instance.
|
|
314
319
|
|
|
315
320
|
Parameters
|
|
316
321
|
----------
|
|
@@ -331,6 +336,61 @@ class QuantumGraphBuilder(ABC):
|
|
|
331
336
|
call this method exactly once. See class documentation for details on
|
|
332
337
|
what it does.
|
|
333
338
|
"""
|
|
339
|
+
skeleton = self._build_skeleton(attach_datastore_records=attach_datastore_records)
|
|
340
|
+
if metadata is None:
|
|
341
|
+
metadata = {
|
|
342
|
+
"input": list(self.input_collections),
|
|
343
|
+
"output_run": self.output_run,
|
|
344
|
+
}
|
|
345
|
+
return self._construct_quantum_graph(skeleton, metadata)
|
|
346
|
+
|
|
347
|
+
def finish(
|
|
348
|
+
self,
|
|
349
|
+
output: str | None = None,
|
|
350
|
+
metadata: Mapping[str, Any] | None = None,
|
|
351
|
+
attach_datastore_records: bool = True,
|
|
352
|
+
) -> PredictedQuantumGraphComponents:
|
|
353
|
+
"""Return quantum graph components that can be used to save or
|
|
354
|
+
construct a `PredictedQuantumGraph` instance.
|
|
355
|
+
|
|
356
|
+
Parameters
|
|
357
|
+
----------
|
|
358
|
+
output : `str` or `None`, optional
|
|
359
|
+
Output `~lsst.daf.butler.CollectionType.CHAINED` collection that
|
|
360
|
+
combines the input and output collections.
|
|
361
|
+
metadata : `~collections.abc.Mapping`, optional
|
|
362
|
+
Mapping of JSON-friendly metadata. Collection information, the
|
|
363
|
+
current user, and the current timestamp are automatically
|
|
364
|
+
included.
|
|
365
|
+
attach_datastore_records : `bool`, optional
|
|
366
|
+
Whether to include datastore records for overall inputs for
|
|
367
|
+
`~lsst.daf.butler.QuantumBackedButler`.
|
|
368
|
+
|
|
369
|
+
Returns
|
|
370
|
+
-------
|
|
371
|
+
components : `.quantum_graph.PredictedQuantumGraphComponents`
|
|
372
|
+
Components that can be used to construct a graph object and/or save
|
|
373
|
+
it to disk.
|
|
374
|
+
"""
|
|
375
|
+
skeleton = self._build_skeleton(attach_datastore_records=attach_datastore_records)
|
|
376
|
+
return self._construct_components(skeleton, output=output, metadata=metadata)
|
|
377
|
+
|
|
378
|
+
def _build_skeleton(self, attach_datastore_records: bool = True) -> QuantumGraphSkeleton:
|
|
379
|
+
"""Build a complete skeleton for the quantum graph.
|
|
380
|
+
|
|
381
|
+
Parameters
|
|
382
|
+
----------
|
|
383
|
+
metadata : `~collections.abc.Mapping`, optional
|
|
384
|
+
Flexible metadata to add to the quantum graph.
|
|
385
|
+
attach_datastore_records : `bool`, optional
|
|
386
|
+
Whether to include datastore records in the graph. Required for
|
|
387
|
+
`lsst.daf.butler.QuantumBackedButler` execution.
|
|
388
|
+
|
|
389
|
+
Returns
|
|
390
|
+
-------
|
|
391
|
+
quantum_graph_skeleton : `QuantumGraphSkeleton`
|
|
392
|
+
DAG describing processing to be performed.
|
|
393
|
+
"""
|
|
334
394
|
with self.butler.registry.caching_context():
|
|
335
395
|
full_skeleton = QuantumGraphSkeleton(self._pipeline_graph.tasks)
|
|
336
396
|
subgraphs = list(self._pipeline_graph.split_independent())
|
|
@@ -344,11 +404,19 @@ class QuantumGraphBuilder(ABC):
|
|
|
344
404
|
self.log.verbose("Subgraph tasks: [%s]", ", ".join(label for label in subgraph.tasks))
|
|
345
405
|
subgraph_skeleton = self.process_subgraph(subgraph)
|
|
346
406
|
full_skeleton.update(subgraph_skeleton)
|
|
347
|
-
# Loop over tasks
|
|
348
|
-
#
|
|
349
|
-
#
|
|
407
|
+
# Loop over tasks to apply skip-existing logic and add missing
|
|
408
|
+
# prerequisites. The pipeline graph must be topologically sorted,
|
|
409
|
+
# so a quantum is only processed after any quantum that provides
|
|
410
|
+
# its inputs has been processed.
|
|
411
|
+
skipped_quanta: dict[str, list[QuantumKey]] = {}
|
|
412
|
+
for task_node in self._pipeline_graph.tasks.values():
|
|
413
|
+
skipped_quanta[task_node.label] = self._resolve_task_quanta(task_node, full_skeleton)
|
|
414
|
+
# Add any dimension records not handled by the subclass, and
|
|
415
|
+
# aggregate any that were added directly to data IDs.
|
|
416
|
+
full_skeleton.attach_dimension_records(self.butler, self._pipeline_graph.get_all_dimensions())
|
|
417
|
+
# Loop over tasks again to run the adjust hooks.
|
|
350
418
|
for task_node in self._pipeline_graph.tasks.values():
|
|
351
|
-
self.
|
|
419
|
+
self._adjust_task_quanta(task_node, full_skeleton, skipped_quanta[task_node.label])
|
|
352
420
|
# Add global init-outputs to the skeleton.
|
|
353
421
|
for dataset_type in self._global_init_output_types.values():
|
|
354
422
|
dataset_key = full_skeleton.add_dataset_node(
|
|
@@ -364,15 +432,9 @@ class QuantumGraphBuilder(ABC):
|
|
|
364
432
|
# with the quanta because no quantum knows if its the only
|
|
365
433
|
# consumer).
|
|
366
434
|
full_skeleton.remove_orphan_datasets()
|
|
367
|
-
# Add any dimension records not handled by the subclass, and
|
|
368
|
-
# aggregate any that were added directly to data IDs.
|
|
369
|
-
full_skeleton.attach_dimension_records(self.butler, self._pipeline_graph.get_all_dimensions())
|
|
370
435
|
if attach_datastore_records:
|
|
371
436
|
self._attach_datastore_records(full_skeleton)
|
|
372
|
-
|
|
373
|
-
if metadata is None:
|
|
374
|
-
metadata = {}
|
|
375
|
-
return self._construct_quantum_graph(full_skeleton, metadata)
|
|
437
|
+
return full_skeleton
|
|
376
438
|
|
|
377
439
|
@abstractmethod
|
|
378
440
|
def process_subgraph(self, subgraph: PipelineGraph) -> QuantumGraphSkeleton:
|
|
@@ -430,9 +492,9 @@ class QuantumGraphBuilder(ABC):
|
|
|
430
492
|
|
|
431
493
|
@final
|
|
432
494
|
@timeMethod
|
|
433
|
-
def _resolve_task_quanta(self, task_node: TaskNode, skeleton: QuantumGraphSkeleton) ->
|
|
495
|
+
def _resolve_task_quanta(self, task_node: TaskNode, skeleton: QuantumGraphSkeleton) -> list[QuantumKey]:
|
|
434
496
|
"""Process the quanta for one task in a skeleton graph to skip those
|
|
435
|
-
that have already completed and
|
|
497
|
+
that have already completed and add missing prerequisite inputs.
|
|
436
498
|
|
|
437
499
|
Parameters
|
|
438
500
|
----------
|
|
@@ -441,6 +503,12 @@ class QuantumGraphBuilder(ABC):
|
|
|
441
503
|
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
442
504
|
Preliminary quantum graph, to be modified in-place.
|
|
443
505
|
|
|
506
|
+
Returns
|
|
507
|
+
-------
|
|
508
|
+
skipped_quanta : `list` [ `.quantum_skeleton_graph.QuantumKey` ]
|
|
509
|
+
Keys of quanta that were already skipped because their metadata
|
|
510
|
+
already exists in a ``skip_existing_in`` collections.
|
|
511
|
+
|
|
444
512
|
Notes
|
|
445
513
|
-----
|
|
446
514
|
This method modifies ``skeleton`` in-place in several ways:
|
|
@@ -449,26 +517,11 @@ class QuantumGraphBuilder(ABC):
|
|
|
449
517
|
and drops input dataset nodes that do not have a
|
|
450
518
|
`lsst.daf.butler.DatasetRef` already. This ensures producing and
|
|
451
519
|
consuming tasks start from the same `lsst.daf.butler.DatasetRef`.
|
|
452
|
-
- It adds "inputs", "outputs", and "init_inputs" attributes to the
|
|
453
|
-
quantum nodes, holding the same `NamedValueMapping` objects needed to
|
|
454
|
-
construct an actual `Quantum` instances.
|
|
455
520
|
- It removes quantum nodes that are to be skipped because their outputs
|
|
456
521
|
already exist in `skip_existing_in`. It also marks their outputs
|
|
457
522
|
as no longer in the way.
|
|
458
523
|
- It adds prerequisite dataset nodes and edges that connect them to the
|
|
459
524
|
quanta that consume them.
|
|
460
|
-
- It removes quantum nodes whose
|
|
461
|
-
`~PipelineTaskConnections.adjustQuantum` calls raise `NoWorkFound` or
|
|
462
|
-
predict no outputs;
|
|
463
|
-
- It removes the nodes of output datasets that are "adjusted away".
|
|
464
|
-
- It removes the edges of input datasets that are "adjusted away".
|
|
465
|
-
|
|
466
|
-
The difference between how adjusted inputs and outputs are handled
|
|
467
|
-
reflects the fact that many quanta can share the same input, but only
|
|
468
|
-
one produces each output. This can lead to the graph having
|
|
469
|
-
superfluous isolated nodes after processing is complete, but these
|
|
470
|
-
should only be removed after all the quanta from all tasks have been
|
|
471
|
-
processed.
|
|
472
525
|
"""
|
|
473
526
|
# Extract the helper object for the prerequisite inputs of this task,
|
|
474
527
|
# and tell it to prepare to construct skypix bounds and timespans for
|
|
@@ -495,6 +548,46 @@ class QuantumGraphBuilder(ABC):
|
|
|
495
548
|
)
|
|
496
549
|
for skipped_quantum in skipped_quanta:
|
|
497
550
|
skeleton.remove_quantum_node(skipped_quantum, remove_outputs=False)
|
|
551
|
+
return skipped_quanta
|
|
552
|
+
|
|
553
|
+
@final
|
|
554
|
+
@timeMethod
|
|
555
|
+
def _adjust_task_quanta(
|
|
556
|
+
self, task_node: TaskNode, skeleton: QuantumGraphSkeleton, skipped_quanta: list[QuantumKey]
|
|
557
|
+
) -> None:
|
|
558
|
+
"""Process the quanta for one task in a skeleton graph by calling the
|
|
559
|
+
``adjust_all_quanta`` and ``adjustQuantum`` hooks.
|
|
560
|
+
|
|
561
|
+
Parameters
|
|
562
|
+
----------
|
|
563
|
+
task_node : `pipeline_graph.TaskNode`
|
|
564
|
+
Node for this task in the pipeline graph.
|
|
565
|
+
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
566
|
+
Preliminary quantum graph, to be modified in-place.
|
|
567
|
+
skipped_quanta : `list` [ `.quantum_skeleton_graph.QuantumKey` ]
|
|
568
|
+
Keys of quanta that were already skipped because their metadata
|
|
569
|
+
already exists in a ``skip_existing_in`` collections.
|
|
570
|
+
|
|
571
|
+
Notes
|
|
572
|
+
-----
|
|
573
|
+
This method modifies ``skeleton`` in-place in several ways:
|
|
574
|
+
|
|
575
|
+
- It adds "inputs", "outputs", and "init_inputs" attributes to the
|
|
576
|
+
quantum nodes, holding the same `NamedValueMapping` objects needed to
|
|
577
|
+
construct an actual `Quantum` instances.
|
|
578
|
+
- It removes quantum nodes whose
|
|
579
|
+
`~PipelineTaskConnections.adjustQuantum` calls raise `NoWorkFound` or
|
|
580
|
+
predict no outputs;
|
|
581
|
+
- It removes the nodes of output datasets that are "adjusted away".
|
|
582
|
+
- It removes the edges of input datasets that are "adjusted away".
|
|
583
|
+
|
|
584
|
+
The difference between how adjusted inputs and outputs are handled
|
|
585
|
+
reflects the fact that many quanta can share the same input, but only
|
|
586
|
+
one produces each output. This can lead to the graph having
|
|
587
|
+
superfluous isolated nodes after processing is complete, but these
|
|
588
|
+
should only be removed after all the quanta from all tasks have been
|
|
589
|
+
processed.
|
|
590
|
+
"""
|
|
498
591
|
# Give the task a chance to adjust all quanta together. This
|
|
499
592
|
# operates directly on the skeleton (via a the 'adjuster', which
|
|
500
593
|
# is just an interface adapter).
|
|
@@ -688,7 +781,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
688
781
|
"""
|
|
689
782
|
dataset_key: DatasetKey | PrerequisiteDatasetKey
|
|
690
783
|
for dataset_key in skeleton.iter_outputs_of(quantum_key):
|
|
691
|
-
dataset_data_id = skeleton
|
|
784
|
+
dataset_data_id = skeleton.get_data_id(dataset_key)
|
|
692
785
|
dataset_type_node = self._pipeline_graph.dataset_types[dataset_key.parent_dataset_type_name]
|
|
693
786
|
if (ref := skeleton.get_output_in_the_way(dataset_key)) is None:
|
|
694
787
|
ref = DatasetRef(dataset_type_node.dataset_type, dataset_data_id, run=self.output_run)
|
|
@@ -704,7 +797,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
704
797
|
skypix_bounds_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id)
|
|
705
798
|
timespan_builder.handle_dataset(dataset_key.parent_dataset_type_name, dataset_data_id)
|
|
706
799
|
skeleton.set_dataset_ref(ref, dataset_key)
|
|
707
|
-
quantum_data_id = skeleton
|
|
800
|
+
quantum_data_id = skeleton.get_data_id(quantum_key)
|
|
708
801
|
# Process inputs already present in the skeleton - this should include
|
|
709
802
|
# all regular inputs (including intermediates) and may include some
|
|
710
803
|
# prerequisites.
|
|
@@ -1057,6 +1150,8 @@ class QuantumGraphBuilder(ABC):
|
|
|
1057
1150
|
quantum_graph : `.QuantumGraph`
|
|
1058
1151
|
DAG describing processing to be performed.
|
|
1059
1152
|
"""
|
|
1153
|
+
from .graph import QuantumGraph
|
|
1154
|
+
|
|
1060
1155
|
quanta: dict[TaskDef, set[Quantum]] = {}
|
|
1061
1156
|
init_inputs: dict[TaskDef, Iterable[DatasetRef]] = {}
|
|
1062
1157
|
init_outputs: dict[TaskDef, Iterable[DatasetRef]] = {}
|
|
@@ -1113,6 +1208,169 @@ class QuantumGraphBuilder(ABC):
|
|
|
1113
1208
|
registryDatasetTypes=registry_dataset_types,
|
|
1114
1209
|
)
|
|
1115
1210
|
|
|
1211
|
+
@final
|
|
1212
|
+
@timeMethod
|
|
1213
|
+
def _construct_components(
|
|
1214
|
+
self,
|
|
1215
|
+
skeleton: QuantumGraphSkeleton,
|
|
1216
|
+
output: str | None,
|
|
1217
|
+
metadata: Mapping[str, Any] | None,
|
|
1218
|
+
) -> PredictedQuantumGraphComponents:
|
|
1219
|
+
"""Return quantum graph components from a completed skeleton.
|
|
1220
|
+
|
|
1221
|
+
Parameters
|
|
1222
|
+
----------
|
|
1223
|
+
skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
1224
|
+
Temporary data structure used by the builder to represent the
|
|
1225
|
+
graph.
|
|
1226
|
+
output : `str` or `None`, optional
|
|
1227
|
+
Output `~lsst.daf.butler.CollectionType.CHAINED` collection that
|
|
1228
|
+
combines the input and output collections.
|
|
1229
|
+
metadata : `~collections.abc.Mapping`, optional
|
|
1230
|
+
Mapping of JSON-friendly metadata. Collection information, the
|
|
1231
|
+
current user, and the current timestamp are automatically
|
|
1232
|
+
included.
|
|
1233
|
+
|
|
1234
|
+
Returns
|
|
1235
|
+
-------
|
|
1236
|
+
components : `.quantum_graph.PredictedQuantumGraphComponents`
|
|
1237
|
+
Components that can be used to construct a graph object and/or save
|
|
1238
|
+
it to disk.
|
|
1239
|
+
"""
|
|
1240
|
+
from .quantum_graph import (
|
|
1241
|
+
PredictedDatasetModel,
|
|
1242
|
+
PredictedQuantumDatasetsModel,
|
|
1243
|
+
PredictedQuantumGraphComponents,
|
|
1244
|
+
)
|
|
1245
|
+
|
|
1246
|
+
components = PredictedQuantumGraphComponents(pipeline_graph=self._pipeline_graph)
|
|
1247
|
+
components.header.inputs = list(self.input_collections)
|
|
1248
|
+
components.header.output_run = self.output_run
|
|
1249
|
+
components.header.output = output
|
|
1250
|
+
if metadata is not None:
|
|
1251
|
+
components.header.metadata.update(metadata)
|
|
1252
|
+
components.dimension_data = DimensionDataAttacher(
|
|
1253
|
+
records=skeleton.get_dimension_data(),
|
|
1254
|
+
dimensions=self._pipeline_graph.get_all_dimensions(),
|
|
1255
|
+
)
|
|
1256
|
+
components.init_quanta.root = [
|
|
1257
|
+
PredictedQuantumDatasetsModel.model_construct(
|
|
1258
|
+
quantum_id=generate_uuidv7(),
|
|
1259
|
+
task_label="",
|
|
1260
|
+
outputs={
|
|
1261
|
+
dataset_key.parent_dataset_type_name: [
|
|
1262
|
+
PredictedDatasetModel.from_dataset_ref(
|
|
1263
|
+
cast(DatasetRef, skeleton.get_dataset_ref(dataset_key))
|
|
1264
|
+
)
|
|
1265
|
+
]
|
|
1266
|
+
for dataset_key in skeleton.global_init_outputs
|
|
1267
|
+
},
|
|
1268
|
+
)
|
|
1269
|
+
]
|
|
1270
|
+
for task_node in self._pipeline_graph.tasks.values():
|
|
1271
|
+
if not skeleton.has_task(task_node.label):
|
|
1272
|
+
continue
|
|
1273
|
+
task_init_key = TaskInitKey(task_node.label)
|
|
1274
|
+
init_quantum_datasets = PredictedQuantumDatasetsModel.model_construct(
|
|
1275
|
+
quantum_id=generate_uuidv7(),
|
|
1276
|
+
task_label=task_node.label,
|
|
1277
|
+
inputs=self._make_predicted_datasets(
|
|
1278
|
+
skeleton,
|
|
1279
|
+
task_node.init.iter_all_inputs(),
|
|
1280
|
+
skeleton.iter_inputs_of(task_init_key),
|
|
1281
|
+
),
|
|
1282
|
+
outputs=self._make_predicted_datasets(
|
|
1283
|
+
skeleton,
|
|
1284
|
+
task_node.init.iter_all_outputs(),
|
|
1285
|
+
skeleton.iter_outputs_of(task_init_key),
|
|
1286
|
+
),
|
|
1287
|
+
datastore_records={
|
|
1288
|
+
datastore_name: records.to_simple()
|
|
1289
|
+
for datastore_name, records in skeleton[task_init_key]
|
|
1290
|
+
.get("datastore_records", {})
|
|
1291
|
+
.items()
|
|
1292
|
+
},
|
|
1293
|
+
)
|
|
1294
|
+
components.init_quanta.root.append(init_quantum_datasets)
|
|
1295
|
+
for quantum_key in skeleton.get_quanta(task_node.label):
|
|
1296
|
+
quantum_datasets = PredictedQuantumDatasetsModel.model_construct(
|
|
1297
|
+
quantum_id=generate_uuidv7(),
|
|
1298
|
+
task_label=task_node.label,
|
|
1299
|
+
data_coordinate=list(skeleton.get_data_id(quantum_key).full_values),
|
|
1300
|
+
inputs=self._make_predicted_datasets(
|
|
1301
|
+
skeleton,
|
|
1302
|
+
task_node.iter_all_inputs(),
|
|
1303
|
+
skeleton.iter_inputs_of(quantum_key),
|
|
1304
|
+
),
|
|
1305
|
+
outputs=self._make_predicted_datasets(
|
|
1306
|
+
skeleton,
|
|
1307
|
+
task_node.iter_all_outputs(),
|
|
1308
|
+
skeleton.iter_outputs_of(quantum_key),
|
|
1309
|
+
),
|
|
1310
|
+
datastore_records={
|
|
1311
|
+
datastore_name: records.to_simple()
|
|
1312
|
+
for datastore_name, records in skeleton[quantum_key]
|
|
1313
|
+
.get("datastore_records", {})
|
|
1314
|
+
.items()
|
|
1315
|
+
},
|
|
1316
|
+
)
|
|
1317
|
+
components.quantum_datasets[quantum_datasets.quantum_id] = quantum_datasets
|
|
1318
|
+
components.set_quantum_indices()
|
|
1319
|
+
components.set_thin_graph()
|
|
1320
|
+
components.set_header_counts()
|
|
1321
|
+
return components
|
|
1322
|
+
|
|
1323
|
+
@staticmethod
|
|
1324
|
+
def _make_predicted_datasets(
|
|
1325
|
+
skeleton: QuantumGraphSkeleton,
|
|
1326
|
+
edges: Iterable[Edge],
|
|
1327
|
+
dataset_keys: Iterable[DatasetKey | PrerequisiteDatasetKey],
|
|
1328
|
+
) -> dict[str, list[PredictedDatasetModel]]:
|
|
1329
|
+
"""Make the predicted quantum graph model objects that represent the
|
|
1330
|
+
datasets from an iterable of pipeline graph edges.
|
|
1331
|
+
|
|
1332
|
+
Parameters
|
|
1333
|
+
----------
|
|
1334
|
+
skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
1335
|
+
Temporary data structure used by the builder to represent the
|
|
1336
|
+
graph.
|
|
1337
|
+
edges : `~collections.abc.Iterable` [ `.pipeline_graph.Edge` ]
|
|
1338
|
+
Pipeline graph edges.
|
|
1339
|
+
dataset_keys : `~collections.abc.Iterable` [ \
|
|
1340
|
+
`.quantum_graph_skeleton.DatasetKey` or\
|
|
1341
|
+
`.quantum_graph_skeleton.PrerequisiteDatasetKey` ]
|
|
1342
|
+
All nodes in the skeleton that correspond to any of the given
|
|
1343
|
+
pipeline graph edges.
|
|
1344
|
+
|
|
1345
|
+
Returns
|
|
1346
|
+
-------
|
|
1347
|
+
predicted_datasets : `dict` [ `str`, \
|
|
1348
|
+
`list` [ `.quantum_graph.PredictedDatasetModel` ] ]
|
|
1349
|
+
Mapping of dataset models, keyed by connection name.
|
|
1350
|
+
"""
|
|
1351
|
+
from .quantum_graph import PredictedDatasetModel
|
|
1352
|
+
|
|
1353
|
+
connection_names_by_dataset_type: defaultdict[str, list[str]] = defaultdict(list)
|
|
1354
|
+
result: dict[str, list[PredictedDatasetModel]] = {}
|
|
1355
|
+
for edge in edges:
|
|
1356
|
+
connection_names_by_dataset_type[edge.parent_dataset_type_name].append(edge.connection_name)
|
|
1357
|
+
result[edge.connection_name] = []
|
|
1358
|
+
|
|
1359
|
+
for dataset_key in dataset_keys:
|
|
1360
|
+
connection_names = connection_names_by_dataset_type.get(dataset_key.parent_dataset_type_name)
|
|
1361
|
+
if connection_names is None:
|
|
1362
|
+
# Ignore if this isn't one of the connections we're processing
|
|
1363
|
+
# (probably an init-input), which would also be predecessor to
|
|
1364
|
+
# a quantum node, but should be handled separately.
|
|
1365
|
+
continue
|
|
1366
|
+
ref = skeleton.get_dataset_ref(dataset_key)
|
|
1367
|
+
assert ref is not None, "DatasetRefs should have already been added to skeleton."
|
|
1368
|
+
for connection_name in connection_names:
|
|
1369
|
+
result[connection_name].append(PredictedDatasetModel.from_dataset_ref(ref))
|
|
1370
|
+
for refs in result.values():
|
|
1371
|
+
refs.sort(key=operator.attrgetter("data_coordinate"))
|
|
1372
|
+
return result
|
|
1373
|
+
|
|
1116
1374
|
@staticmethod
|
|
1117
1375
|
@final
|
|
1118
1376
|
def _find_removed(
|
|
@@ -41,6 +41,7 @@ if TYPE_CHECKING:
|
|
|
41
41
|
|
|
42
42
|
from .graph import QuantumGraph
|
|
43
43
|
from .pipeline_graph import TaskNode
|
|
44
|
+
from .quantum_graph import PredictedQuantumGraph
|
|
44
45
|
|
|
45
46
|
|
|
46
47
|
class QuantumExecutor(ABC):
|
|
@@ -92,7 +93,7 @@ class QuantumGraphExecutor(ABC):
|
|
|
92
93
|
"""
|
|
93
94
|
|
|
94
95
|
@abstractmethod
|
|
95
|
-
def execute(self, graph: QuantumGraph) -> None:
|
|
96
|
+
def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
|
|
96
97
|
"""Execute whole graph.
|
|
97
98
|
|
|
98
99
|
Implementation of this method depends on particular execution model
|
|
@@ -73,7 +73,13 @@ from lsst.resources import ResourcePathExpression
|
|
|
73
73
|
from lsst.utils.logging import PeriodicLogger, getLogger
|
|
74
74
|
|
|
75
75
|
from ._status import QuantumSuccessCaveats
|
|
76
|
-
from .automatic_connection_constants import
|
|
76
|
+
from .automatic_connection_constants import (
|
|
77
|
+
LOG_OUTPUT_CONNECTION_NAME,
|
|
78
|
+
LOG_OUTPUT_TEMPLATE,
|
|
79
|
+
METADATA_OUTPUT_CONNECTION_NAME,
|
|
80
|
+
METADATA_OUTPUT_STORAGE_CLASS,
|
|
81
|
+
METADATA_OUTPUT_TEMPLATE,
|
|
82
|
+
)
|
|
77
83
|
from .graph import QuantumGraph, QuantumNode
|
|
78
84
|
|
|
79
85
|
if TYPE_CHECKING:
|
|
@@ -1612,8 +1618,8 @@ class QuantumProvenanceGraph:
|
|
|
1612
1618
|
quantum_info.setdefault("status", QuantumInfoStatus.UNKNOWN)
|
|
1613
1619
|
quantum_info.setdefault("recovered", False)
|
|
1614
1620
|
self._quanta.setdefault(quantum_key.task_label, set()).add(quantum_key)
|
|
1615
|
-
metadata_ref = node.quantum.outputs[
|
|
1616
|
-
log_ref = node.quantum.outputs[
|
|
1621
|
+
metadata_ref = node.quantum.outputs[METADATA_OUTPUT_TEMPLATE.format(label=node.taskDef.label)][0]
|
|
1622
|
+
log_ref = node.quantum.outputs[LOG_OUTPUT_TEMPLATE.format(label=node.taskDef.label)][0]
|
|
1617
1623
|
# associate run collections with specific quanta. this is important
|
|
1618
1624
|
# if the same quanta are processed in multiple runs as in recovery
|
|
1619
1625
|
# workflows.
|
|
@@ -1640,10 +1646,10 @@ class QuantumProvenanceGraph:
|
|
|
1640
1646
|
# collection combination.
|
|
1641
1647
|
dataset_runs[output_run] = DatasetRun(id=ref.id)
|
|
1642
1648
|
# save metadata and logs for easier status interpretation later
|
|
1643
|
-
if dataset_key.dataset_type_name.endswith(
|
|
1649
|
+
if dataset_key.dataset_type_name.endswith(METADATA_OUTPUT_CONNECTION_NAME):
|
|
1644
1650
|
quantum_info["metadata"] = dataset_key
|
|
1645
1651
|
quantum_runs[output_run].metadata_ref = ref
|
|
1646
|
-
if dataset_key.dataset_type_name.endswith(
|
|
1652
|
+
if dataset_key.dataset_type_name.endswith(LOG_OUTPUT_CONNECTION_NAME):
|
|
1647
1653
|
quantum_info["log"] = dataset_key
|
|
1648
1654
|
quantum_runs[output_run].log_ref = ref
|
|
1649
1655
|
for ref in itertools.chain.from_iterable(node.quantum.inputs.values()):
|
|
@@ -1826,7 +1832,7 @@ class QuantumProvenanceGraph:
|
|
|
1826
1832
|
quantum_run = quantum_info["runs"][output_run]
|
|
1827
1833
|
|
|
1828
1834
|
def read_metadata() -> None:
|
|
1829
|
-
md = self._butler_get(quantum_run.metadata_ref, storageClass=
|
|
1835
|
+
md = self._butler_get(quantum_run.metadata_ref, storageClass=METADATA_OUTPUT_STORAGE_CLASS)
|
|
1830
1836
|
try:
|
|
1831
1837
|
# Int conversion guards against spurious conversion to
|
|
1832
1838
|
# float that can apparently sometimes happen in
|
|
@@ -1964,7 +1970,10 @@ class QuantumProvenanceGraph:
|
|
|
1964
1970
|
# Avoiding publishing failed logs is difficult
|
|
1965
1971
|
# without using tagged collections, so flag them as
|
|
1966
1972
|
# merely unsuccessful unless the user requests it.
|
|
1967
|
-
if
|
|
1973
|
+
if (
|
|
1974
|
+
dataset_type_name.endswith(LOG_OUTPUT_CONNECTION_NAME)
|
|
1975
|
+
and not curse_failed_logs
|
|
1976
|
+
):
|
|
1968
1977
|
dataset_info["status"] = DatasetInfoStatus.UNSUCCESSFUL
|
|
1969
1978
|
else:
|
|
1970
1979
|
dataset_info["status"] = DatasetInfoStatus.CURSED
|
|
@@ -121,6 +121,21 @@ class ExceptionInfo(pydantic.BaseModel):
|
|
|
121
121
|
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
122
122
|
return super().model_json_schema(*args, **kwargs)
|
|
123
123
|
|
|
124
|
+
@classmethod
|
|
125
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
126
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
127
|
+
return super().model_validate(*args, **kwargs)
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
131
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
132
|
+
return super().model_validate_json(*args, **kwargs)
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
136
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
137
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
138
|
+
|
|
124
139
|
|
|
125
140
|
class QuantumReport(pydantic.BaseModel):
|
|
126
141
|
"""Task execution report for a single Quantum.
|
|
@@ -276,6 +291,21 @@ class QuantumReport(pydantic.BaseModel):
|
|
|
276
291
|
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
277
292
|
return super().model_json_schema(*args, **kwargs)
|
|
278
293
|
|
|
294
|
+
@classmethod
|
|
295
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
296
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
297
|
+
return super().model_validate(*args, **kwargs)
|
|
298
|
+
|
|
299
|
+
@classmethod
|
|
300
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
301
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
302
|
+
return super().model_validate_json(*args, **kwargs)
|
|
303
|
+
|
|
304
|
+
@classmethod
|
|
305
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
306
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
307
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
308
|
+
|
|
279
309
|
|
|
280
310
|
class Report(pydantic.BaseModel):
|
|
281
311
|
"""Execution report for the whole job with one or few quanta."""
|
|
@@ -348,3 +378,18 @@ class Report(pydantic.BaseModel):
|
|
|
348
378
|
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
349
379
|
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
350
380
|
return super().model_json_schema(*args, **kwargs)
|
|
381
|
+
|
|
382
|
+
@classmethod
|
|
383
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
384
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
385
|
+
return super().model_validate(*args, **kwargs)
|
|
386
|
+
|
|
387
|
+
@classmethod
|
|
388
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
389
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
390
|
+
return super().model_validate_json(*args, **kwargs)
|
|
391
|
+
|
|
392
|
+
@classmethod
|
|
393
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
394
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
395
|
+
return super().model_validate_strings(*args, **kwargs)
|