lsst-pipe-base 30.0.1rc1__py3-none-any.whl → 30.2025.5100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +20 -31
- lsst/pipe/base/_quantumContext.py +3 -3
- lsst/pipe/base/_status.py +10 -43
- lsst/pipe/base/_task_metadata.py +2 -2
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +3 -8
- lsst/pipe/base/automatic_connection_constants.py +1 -20
- lsst/pipe/base/cli/cmd/__init__.py +2 -18
- lsst/pipe/base/cli/cmd/commands.py +4 -149
- lsst/pipe/base/connectionTypes.py +160 -72
- lsst/pipe/base/connections.py +9 -6
- lsst/pipe/base/execution_reports.py +5 -0
- lsst/pipe/base/graph/graph.py +10 -11
- lsst/pipe/base/graph/quantumNode.py +4 -4
- lsst/pipe/base/graph_walker.py +10 -8
- lsst/pipe/base/log_capture.py +80 -40
- lsst/pipe/base/mp_graph_executor.py +15 -51
- lsst/pipe/base/pipeline.py +6 -5
- lsst/pipe/base/pipelineIR.py +8 -2
- lsst/pipe/base/pipelineTask.py +7 -5
- lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
- lsst/pipe/base/pipeline_graph/_edges.py +22 -32
- lsst/pipe/base/pipeline_graph/_mapping_views.py +7 -4
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +7 -14
- lsst/pipe/base/pipeline_graph/expressions.py +2 -2
- lsst/pipe/base/pipeline_graph/io.py +10 -7
- lsst/pipe/base/pipeline_graph/visualization/_dot.py +12 -13
- lsst/pipe/base/pipeline_graph/visualization/_layout.py +18 -16
- lsst/pipe/base/pipeline_graph/visualization/_merge.py +7 -4
- lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
- lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +0 -7
- lsst/pipe/base/prerequisite_helpers.py +1 -2
- lsst/pipe/base/quantum_graph/_common.py +20 -19
- lsst/pipe/base/quantum_graph/_multiblock.py +31 -37
- lsst/pipe/base/quantum_graph/_predicted.py +13 -111
- lsst/pipe/base/quantum_graph/_provenance.py +45 -1136
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +1 -0
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +289 -204
- lsst/pipe/base/quantum_graph/aggregator/_config.py +9 -87
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -13
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +235 -49
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +116 -6
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +39 -29
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +351 -34
- lsst/pipe/base/quantum_graph/visualization.py +1 -5
- lsst/pipe/base/quantum_graph_builder.py +8 -21
- lsst/pipe/base/quantum_graph_executor.py +13 -116
- lsst/pipe/base/quantum_graph_skeleton.py +29 -31
- lsst/pipe/base/quantum_provenance_graph.py +12 -29
- lsst/pipe/base/separable_pipeline_executor.py +3 -19
- lsst/pipe/base/single_quantum_executor.py +42 -67
- lsst/pipe/base/struct.py +0 -4
- lsst/pipe/base/testUtils.py +3 -3
- lsst/pipe/base/tests/mocks/_storage_class.py +1 -2
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/METADATA +3 -3
- lsst_pipe_base-30.2025.5100.dist-info/RECORD +125 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/WHEEL +1 -1
- lsst/pipe/base/log_on_close.py +0 -76
- lsst/pipe/base/quantum_graph/aggregator/_workers.py +0 -303
- lsst/pipe/base/quantum_graph/formatter.py +0 -171
- lsst/pipe/base/quantum_graph/ingest_graph.py +0 -413
- lsst_pipe_base-30.0.1rc1.dist-info/RECORD +0 -129
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/zip-safe +0 -0
lsst/pipe/base/log_capture.py
CHANGED
|
@@ -31,15 +31,17 @@ __all__ = ["LogCapture"]
|
|
|
31
31
|
|
|
32
32
|
import dataclasses
|
|
33
33
|
import logging
|
|
34
|
+
import os
|
|
35
|
+
import shutil
|
|
36
|
+
import tempfile
|
|
34
37
|
import uuid
|
|
35
38
|
from collections.abc import Iterator
|
|
36
|
-
from contextlib import contextmanager
|
|
39
|
+
from contextlib import contextmanager, suppress
|
|
37
40
|
from logging import FileHandler
|
|
38
41
|
|
|
39
42
|
import pydantic
|
|
40
43
|
|
|
41
|
-
from lsst.daf.butler import Butler, LimitedButler, Quantum
|
|
42
|
-
from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
|
|
44
|
+
from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
|
|
43
45
|
from lsst.daf.butler.logging import (
|
|
44
46
|
ButlerLogRecord,
|
|
45
47
|
ButlerLogRecordHandler,
|
|
@@ -103,7 +105,7 @@ class _ExecutionLogRecordsExtra(pydantic.BaseModel):
|
|
|
103
105
|
|
|
104
106
|
Parameters
|
|
105
107
|
----------
|
|
106
|
-
log_records : `
|
|
108
|
+
log_records : `ButlerLogRecords`
|
|
107
109
|
Logs from a past attempt to run a quantum.
|
|
108
110
|
"""
|
|
109
111
|
previous = self.model_validate(log_records.extra)
|
|
@@ -163,9 +165,7 @@ class LogCapture:
|
|
|
163
165
|
return cls(butler, butler)
|
|
164
166
|
|
|
165
167
|
@contextmanager
|
|
166
|
-
def capture_logging(
|
|
167
|
-
self, task_node: TaskNode, /, quantum: Quantum, records: ButlerLogRecords | None = None
|
|
168
|
-
) -> Iterator[_LogCaptureContext]:
|
|
168
|
+
def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
|
|
169
169
|
"""Configure logging system to capture logs for execution of this task.
|
|
170
170
|
|
|
171
171
|
Parameters
|
|
@@ -174,9 +174,6 @@ class LogCapture:
|
|
|
174
174
|
The task definition.
|
|
175
175
|
quantum : `~lsst.daf.butler.Quantum`
|
|
176
176
|
Single Quantum instance.
|
|
177
|
-
records : `lsst.daf.butler.logging.ButlerLogRecords`, optional
|
|
178
|
-
Log record container to append to and save. If provided, streaming
|
|
179
|
-
mode is disabled (since we'll be saving logs in memory anyway).
|
|
180
177
|
|
|
181
178
|
Notes
|
|
182
179
|
-----
|
|
@@ -208,40 +205,44 @@ class LogCapture:
|
|
|
208
205
|
|
|
209
206
|
# Add a handler to the root logger to capture execution log output.
|
|
210
207
|
if log_dataset_name is not None:
|
|
211
|
-
try:
|
|
212
|
-
[ref] = quantum.outputs[log_dataset_name]
|
|
213
|
-
except LookupError as exc:
|
|
214
|
-
raise InvalidQuantumError(
|
|
215
|
-
f"Quantum outputs is missing log output dataset type {log_dataset_name};"
|
|
216
|
-
" this could happen due to inconsistent options between QuantumGraph generation"
|
|
217
|
-
" and execution"
|
|
218
|
-
) from exc
|
|
219
208
|
# Either accumulate into ButlerLogRecords or stream JSON records to
|
|
220
209
|
# file and ingest that (ingest is possible only with full butler).
|
|
221
|
-
if self.stream_json_logs and self.full_butler is not None
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
210
|
+
if self.stream_json_logs and self.full_butler is not None:
|
|
211
|
+
# Create the log file in a temporary directory rather than
|
|
212
|
+
# creating a temporary file. This is necessary because
|
|
213
|
+
# temporary files are created with restrictive permissions
|
|
214
|
+
# and during file ingest these permissions persist in the
|
|
215
|
+
# datastore. Using a temp directory allows us to create
|
|
216
|
+
# a file with umask default permissions.
|
|
217
|
+
tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
|
|
218
|
+
|
|
219
|
+
# Construct a file to receive the log records and "touch" it.
|
|
220
|
+
log_file = os.path.join(tmpdir, f"butler-log-{task_node.label}.json")
|
|
221
|
+
with open(log_file, "w"):
|
|
222
|
+
pass
|
|
223
|
+
log_handler_file = FileHandler(log_file)
|
|
224
|
+
log_handler_file.setFormatter(JsonLogFormatter())
|
|
225
|
+
logging.getLogger().addHandler(log_handler_file)
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
with ButlerMDC.set_mdc(mdc):
|
|
229
|
+
yield ctx
|
|
230
|
+
finally:
|
|
231
|
+
# Ensure that the logs are stored in butler.
|
|
232
|
+
logging.getLogger().removeHandler(log_handler_file)
|
|
233
|
+
log_handler_file.close()
|
|
234
|
+
if ctx.extra:
|
|
235
|
+
with open(log_file, "a") as log_stream:
|
|
236
|
+
ButlerLogRecords.write_streaming_extra(
|
|
237
|
+
log_stream,
|
|
238
|
+
ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
|
|
239
|
+
)
|
|
240
|
+
if ctx.store:
|
|
241
|
+
self._ingest_log_records(quantum, log_dataset_name, log_file)
|
|
242
|
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
242
243
|
|
|
243
244
|
else:
|
|
244
|
-
log_handler_memory = ButlerLogRecordHandler(
|
|
245
|
+
log_handler_memory = ButlerLogRecordHandler()
|
|
245
246
|
logging.getLogger().addHandler(log_handler_memory)
|
|
246
247
|
|
|
247
248
|
try:
|
|
@@ -260,6 +261,7 @@ class LogCapture:
|
|
|
260
261
|
logging.getLogger().removeHandler(log_handler_memory)
|
|
261
262
|
if ctx.store:
|
|
262
263
|
self._store_log_records(quantum, log_dataset_name, log_handler_memory)
|
|
264
|
+
log_handler_memory.records.clear()
|
|
263
265
|
|
|
264
266
|
else:
|
|
265
267
|
with ButlerMDC.set_mdc(mdc):
|
|
@@ -279,3 +281,41 @@ class LogCapture:
|
|
|
279
281
|
) from exc
|
|
280
282
|
|
|
281
283
|
self.butler.put(log_handler.records, ref)
|
|
284
|
+
|
|
285
|
+
def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
|
|
286
|
+
# If we are logging to an external file we must always try to
|
|
287
|
+
# close it.
|
|
288
|
+
assert self.full_butler is not None, "Expected to have full butler for ingest"
|
|
289
|
+
ingested = False
|
|
290
|
+
try:
|
|
291
|
+
# DatasetRef has to be in the Quantum outputs, can lookup by name.
|
|
292
|
+
try:
|
|
293
|
+
[ref] = quantum.outputs[dataset_type]
|
|
294
|
+
except LookupError as exc:
|
|
295
|
+
raise InvalidQuantumError(
|
|
296
|
+
f"Quantum outputs is missing log output dataset type {dataset_type};"
|
|
297
|
+
" this could happen due to inconsistent options between QuantumGraph generation"
|
|
298
|
+
" and execution"
|
|
299
|
+
) from exc
|
|
300
|
+
|
|
301
|
+
# Need to ingest this file directly into butler.
|
|
302
|
+
dataset = FileDataset(path=filename, refs=ref)
|
|
303
|
+
try:
|
|
304
|
+
self.full_butler.ingest(dataset, transfer="move")
|
|
305
|
+
ingested = True
|
|
306
|
+
except NotImplementedError:
|
|
307
|
+
# Some datastores can't receive files (e.g. in-memory datastore
|
|
308
|
+
# when testing), we store empty list for those just to have a
|
|
309
|
+
# dataset. Alternative is to read the file as a
|
|
310
|
+
# ButlerLogRecords object and put it.
|
|
311
|
+
_LOG.info(
|
|
312
|
+
"Log records could not be stored in this butler because the"
|
|
313
|
+
" datastore can not ingest files, empty record list is stored instead."
|
|
314
|
+
)
|
|
315
|
+
records = ButlerLogRecords.from_records([])
|
|
316
|
+
self.full_butler.put(records, ref)
|
|
317
|
+
finally:
|
|
318
|
+
# remove file if it is not ingested
|
|
319
|
+
if not ingested:
|
|
320
|
+
with suppress(OSError):
|
|
321
|
+
os.remove(filename)
|
|
@@ -39,24 +39,20 @@ import sys
|
|
|
39
39
|
import threading
|
|
40
40
|
import time
|
|
41
41
|
import uuid
|
|
42
|
-
from contextlib import ExitStack
|
|
43
42
|
from typing import Literal, cast
|
|
44
43
|
|
|
45
44
|
import networkx
|
|
46
45
|
|
|
47
46
|
from lsst.daf.butler import DataCoordinate, Quantum
|
|
48
47
|
from lsst.daf.butler.cli.cliLog import CliLog
|
|
49
|
-
from lsst.daf.butler.logging import ButlerLogRecords
|
|
50
48
|
from lsst.utils.threads import disable_implicit_threading
|
|
51
49
|
|
|
52
50
|
from ._status import InvalidQuantumError, RepeatableQuantumError
|
|
53
|
-
from ._task_metadata import TaskMetadata
|
|
54
51
|
from .execution_graph_fixup import ExecutionGraphFixup
|
|
55
52
|
from .graph import QuantumGraph
|
|
56
53
|
from .graph_walker import GraphWalker
|
|
57
|
-
from .log_on_close import LogOnClose
|
|
58
54
|
from .pipeline_graph import TaskNode
|
|
59
|
-
from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo
|
|
55
|
+
from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo
|
|
60
56
|
from .quantum_graph_executor import QuantumExecutor, QuantumGraphExecutor
|
|
61
57
|
from .quantum_reports import ExecutionStatus, QuantumReport, Report
|
|
62
58
|
|
|
@@ -519,9 +515,7 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
519
515
|
start_method = "spawn"
|
|
520
516
|
self._start_method = start_method
|
|
521
517
|
|
|
522
|
-
def execute(
|
|
523
|
-
self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
|
|
524
|
-
) -> None:
|
|
518
|
+
def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
|
|
525
519
|
# Docstring inherited from QuantumGraphExecutor.execute
|
|
526
520
|
old_graph: QuantumGraph | None = None
|
|
527
521
|
if isinstance(graph, QuantumGraph):
|
|
@@ -531,31 +525,14 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
531
525
|
new_graph = graph
|
|
532
526
|
xgraph = self._make_xgraph(new_graph, old_graph)
|
|
533
527
|
self._report = Report(qgraphSummary=new_graph._make_summary())
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
provenance_graph_file,
|
|
543
|
-
exit_stack=exit_stack,
|
|
544
|
-
log_on_close=LogOnClose(_LOG.log),
|
|
545
|
-
predicted=new_graph,
|
|
546
|
-
)
|
|
547
|
-
try:
|
|
548
|
-
if self._num_proc > 1:
|
|
549
|
-
self._execute_quanta_mp(xgraph, self._report)
|
|
550
|
-
else:
|
|
551
|
-
self._execute_quanta_in_process(xgraph, self._report, provenance_writer)
|
|
552
|
-
except Exception as exc:
|
|
553
|
-
self._report.set_exception(exc)
|
|
554
|
-
raise
|
|
555
|
-
if provenance_writer is not None:
|
|
556
|
-
provenance_writer.write_overall_inputs()
|
|
557
|
-
provenance_writer.write_packages()
|
|
558
|
-
provenance_writer.write_init_outputs(assume_existence=True)
|
|
528
|
+
try:
|
|
529
|
+
if self._num_proc > 1:
|
|
530
|
+
self._execute_quanta_mp(xgraph, self._report)
|
|
531
|
+
else:
|
|
532
|
+
self._execute_quanta_in_process(xgraph, self._report)
|
|
533
|
+
except Exception as exc:
|
|
534
|
+
self._report.set_exception(exc)
|
|
535
|
+
raise
|
|
559
536
|
|
|
560
537
|
def _make_xgraph(
|
|
561
538
|
self, new_graph: PredictedQuantumGraph, old_graph: QuantumGraph | None
|
|
@@ -599,9 +576,7 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
599
576
|
raise MPGraphExecutorError("Updated execution graph has dependency cycle.")
|
|
600
577
|
return xgraph
|
|
601
578
|
|
|
602
|
-
def _execute_quanta_in_process(
|
|
603
|
-
self, xgraph: networkx.DiGraph, report: Report, provenance_writer: ProvenanceQuantumGraphWriter | None
|
|
604
|
-
) -> None:
|
|
579
|
+
def _execute_quanta_in_process(self, xgraph: networkx.DiGraph, report: Report) -> None:
|
|
605
580
|
"""Execute all Quanta in current process.
|
|
606
581
|
|
|
607
582
|
Parameters
|
|
@@ -614,9 +589,6 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
614
589
|
`.quantum_graph.PredictedQuantumGraph.quantum_only_xgraph`.
|
|
615
590
|
report : `Report`
|
|
616
591
|
Object for reporting execution status.
|
|
617
|
-
provenance_writer : `.quantum_graph.ProvenanceQuantumGraphWriter` or \
|
|
618
|
-
`None`
|
|
619
|
-
Object for recording provenance.
|
|
620
592
|
"""
|
|
621
593
|
|
|
622
594
|
def tiebreaker_sort_key(quantum_id: uuid.UUID) -> tuple:
|
|
@@ -634,19 +606,16 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
634
606
|
|
|
635
607
|
_LOG.debug("Executing %s (%s@%s)", quantum_id, task_node.label, data_id)
|
|
636
608
|
fail_exit_code: int | None = None
|
|
637
|
-
task_metadata: TaskMetadata | None = None
|
|
638
|
-
task_logs = ButlerLogRecords([])
|
|
639
609
|
try:
|
|
640
610
|
# For some exception types we want to exit immediately with
|
|
641
611
|
# exception-specific exit code, but we still want to start
|
|
642
612
|
# debugger before exiting if debugging is enabled.
|
|
643
613
|
try:
|
|
644
|
-
|
|
645
|
-
task_node, quantum, quantum_id=quantum_id
|
|
614
|
+
_, quantum_report = self._quantum_executor.execute(
|
|
615
|
+
task_node, quantum, quantum_id=quantum_id
|
|
646
616
|
)
|
|
647
|
-
if
|
|
648
|
-
report.quantaReports.append(
|
|
649
|
-
task_metadata = execution_result.task_metadata
|
|
617
|
+
if quantum_report:
|
|
618
|
+
report.quantaReports.append(quantum_report)
|
|
650
619
|
success_count += 1
|
|
651
620
|
walker.finish(quantum_id)
|
|
652
621
|
except RepeatableQuantumError as exc:
|
|
@@ -732,11 +701,6 @@ class MPGraphExecutor(QuantumGraphExecutor):
|
|
|
732
701
|
)
|
|
733
702
|
failed_count += 1
|
|
734
703
|
|
|
735
|
-
if provenance_writer is not None:
|
|
736
|
-
provenance_writer.write_quantum_provenance(
|
|
737
|
-
quantum_id, metadata=task_metadata, logs=task_logs
|
|
738
|
-
)
|
|
739
|
-
|
|
740
704
|
_LOG.info(
|
|
741
705
|
"Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.",
|
|
742
706
|
success_count,
|
lsst/pipe/base/pipeline.py
CHANGED
|
@@ -54,12 +54,13 @@ from lsst.utils.introspection import get_full_type_name
|
|
|
54
54
|
|
|
55
55
|
from . import automatic_connection_constants as acc
|
|
56
56
|
from . import pipeline_graph, pipelineIR
|
|
57
|
-
from ._instrument import Instrument as
|
|
57
|
+
from ._instrument import Instrument as PipeBaseInstrument
|
|
58
58
|
from .config import PipelineTaskConfig
|
|
59
59
|
from .connections import PipelineTaskConnections
|
|
60
60
|
from .pipelineTask import PipelineTask
|
|
61
61
|
|
|
62
62
|
if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
|
|
63
|
+
from lsst.obs.base import Instrument
|
|
63
64
|
from lsst.pex.config import Config
|
|
64
65
|
|
|
65
66
|
# ----------------------------------
|
|
@@ -495,7 +496,7 @@ class Pipeline:
|
|
|
495
496
|
Returns
|
|
496
497
|
-------
|
|
497
498
|
pipeline: `Pipeline`
|
|
498
|
-
|
|
499
|
+
The new pipeline.
|
|
499
500
|
"""
|
|
500
501
|
return cls.fromIR(copy.deepcopy(pipeline._pipelineIR))
|
|
501
502
|
|
|
@@ -605,7 +606,7 @@ class Pipeline:
|
|
|
605
606
|
|
|
606
607
|
@property
|
|
607
608
|
def subsets(self) -> MappingProxyType[str, set]:
|
|
608
|
-
"""Returns a `
|
|
609
|
+
"""Returns a `MappingProxyType` where the keys are the labels of
|
|
609
610
|
labeled subsets in the `Pipeline` and the values are the set of task
|
|
610
611
|
labels contained within that subset.
|
|
611
612
|
"""
|
|
@@ -701,7 +702,7 @@ class Pipeline:
|
|
|
701
702
|
"""
|
|
702
703
|
instrument_class_name = self._pipelineIR.instrument
|
|
703
704
|
if instrument_class_name is not None:
|
|
704
|
-
instrument_class = cast(
|
|
705
|
+
instrument_class = cast(PipeBaseInstrument, doImportType(instrument_class_name))
|
|
705
706
|
if instrument_class is not None:
|
|
706
707
|
return DataCoordinate.standardize(instrument=instrument_class.getName(), universe=universe)
|
|
707
708
|
return DataCoordinate.make_empty(universe)
|
|
@@ -892,7 +893,7 @@ class Pipeline:
|
|
|
892
893
|
raise NameError(f"Label {label} does not appear in this pipeline")
|
|
893
894
|
taskClass: type[PipelineTask] = doImportType(taskIR.klass)
|
|
894
895
|
config = taskClass.ConfigClass()
|
|
895
|
-
instrument:
|
|
896
|
+
instrument: PipeBaseInstrument | None = None
|
|
896
897
|
if (instrumentName := self._pipelineIR.instrument) is not None:
|
|
897
898
|
instrument_cls: type = doImportType(instrumentName)
|
|
898
899
|
instrument = instrument_cls()
|
lsst/pipe/base/pipelineIR.py
CHANGED
|
@@ -220,6 +220,12 @@ class LabeledSubset:
|
|
|
220
220
|
class ParametersIR:
|
|
221
221
|
"""Intermediate representation of parameters that are global to a pipeline.
|
|
222
222
|
|
|
223
|
+
Attributes
|
|
224
|
+
----------
|
|
225
|
+
mapping : `dict` [`str`, `str`]
|
|
226
|
+
A mutable mapping of identifiers as keys, and shared configuration
|
|
227
|
+
as values.
|
|
228
|
+
|
|
223
229
|
Notes
|
|
224
230
|
-----
|
|
225
231
|
These parameters are specified under a top level key named ``parameters``
|
|
@@ -337,7 +343,7 @@ class ConfigIR:
|
|
|
337
343
|
)
|
|
338
344
|
return new_config
|
|
339
345
|
|
|
340
|
-
def maybe_merge(self, other_config: ConfigIR) -> Generator[ConfigIR]:
|
|
346
|
+
def maybe_merge(self, other_config: ConfigIR) -> Generator[ConfigIR, None, None]:
|
|
341
347
|
"""Merge another instance of a `ConfigIR` into this instance if
|
|
342
348
|
possible. This function returns a generator that is either self
|
|
343
349
|
if the configs were merged, or self, and other_config if that could
|
|
@@ -700,7 +706,7 @@ class PipelineIR:
|
|
|
700
706
|
|
|
701
707
|
Parameters
|
|
702
708
|
----------
|
|
703
|
-
loaded_yaml
|
|
709
|
+
loaded_yaml: `dict`
|
|
704
710
|
A dictionary which matches the structure that would be produced
|
|
705
711
|
by a yaml reader which parses a pipeline definition document
|
|
706
712
|
"""
|
lsst/pipe/base/pipelineTask.py
CHANGED
|
@@ -55,7 +55,7 @@ class PipelineTask(Task):
|
|
|
55
55
|
resulting data is also stored in a data butler.
|
|
56
56
|
|
|
57
57
|
PipelineTask inherits from a `~lsst.pipe.base.Task` and uses the same
|
|
58
|
-
configuration mechanism based on
|
|
58
|
+
configuration mechanism based on :ref:`lsst.pex.config`. `PipelineTask`
|
|
59
59
|
classes also have a `PipelineTaskConnections` class associated with their
|
|
60
60
|
config which defines all of the IO a `PipelineTask` will need to do.
|
|
61
61
|
PipelineTask sub-class typically implements `run()` method which receives
|
|
@@ -75,6 +75,12 @@ class PipelineTask(Task):
|
|
|
75
75
|
PipelineTask base class constructor, but may support other signatures as
|
|
76
76
|
well.
|
|
77
77
|
|
|
78
|
+
Attributes
|
|
79
|
+
----------
|
|
80
|
+
canMultiprocess : bool, True by default (class attribute)
|
|
81
|
+
This class attribute is checked by execution framework, sub-classes
|
|
82
|
+
can set it to ``False`` in case task does not support multiprocessing.
|
|
83
|
+
|
|
78
84
|
Parameters
|
|
79
85
|
----------
|
|
80
86
|
config : `~lsst.pex.config.Config`, optional
|
|
@@ -96,11 +102,7 @@ class PipelineTask(Task):
|
|
|
96
102
|
"""
|
|
97
103
|
|
|
98
104
|
ConfigClass: ClassVar[type[PipelineTaskConfig]]
|
|
99
|
-
|
|
100
105
|
canMultiprocess: ClassVar[bool] = True
|
|
101
|
-
"""Whether this task can be run by an executor that uses subprocesses for
|
|
102
|
-
parallelism.
|
|
103
|
-
"""
|
|
104
106
|
|
|
105
107
|
def __init__(
|
|
106
108
|
self,
|
|
@@ -106,8 +106,8 @@ class DatasetTypeNode:
|
|
|
106
106
|
The internal networkx graph.
|
|
107
107
|
get_registered : `~collections.abc.Callable` or `None`
|
|
108
108
|
Callable that takes a dataset type name and returns the
|
|
109
|
-
|
|
110
|
-
|
|
109
|
+
`DatasetType` registered in the data repository, or `None` if it is
|
|
110
|
+
not registered.
|
|
111
111
|
dimensions : `lsst.daf.butler.DimensionUniverse`
|
|
112
112
|
Definitions of all dimensions.
|
|
113
113
|
previous : `DatasetTypeNode` or `None`
|
|
@@ -30,7 +30,7 @@ __all__ = ("Edge", "ReadEdge", "WriteEdge")
|
|
|
30
30
|
|
|
31
31
|
from abc import ABC, abstractmethod
|
|
32
32
|
from collections.abc import Callable, Mapping, Sequence
|
|
33
|
-
from typing import Any, ClassVar, Self
|
|
33
|
+
from typing import Any, ClassVar, Self, TypeVar
|
|
34
34
|
|
|
35
35
|
from lsst.daf.butler import DatasetRef, DatasetType, DimensionUniverse, StorageClassFactory
|
|
36
36
|
from lsst.daf.butler.registry import MissingDatasetTypeError
|
|
@@ -40,6 +40,8 @@ from ..connectionTypes import BaseConnection
|
|
|
40
40
|
from ._exceptions import ConnectionTypeConsistencyError, IncompatibleDatasetTypeError
|
|
41
41
|
from ._nodes import NodeKey, NodeType
|
|
42
42
|
|
|
43
|
+
_S = TypeVar("_S", bound="Edge")
|
|
44
|
+
|
|
43
45
|
|
|
44
46
|
@immutable
|
|
45
47
|
class Edge(ABC):
|
|
@@ -170,7 +172,7 @@ class Edge(ABC):
|
|
|
170
172
|
"""
|
|
171
173
|
return self.parent_dataset_type_name
|
|
172
174
|
|
|
173
|
-
def diff
|
|
175
|
+
def diff(self: _S, other: _S, connection_type: str = "connection") -> list[str]:
|
|
174
176
|
"""Compare this edge to another one from a possibly-different
|
|
175
177
|
configuration of the same task label.
|
|
176
178
|
|
|
@@ -478,11 +480,11 @@ class ReadEdge(Edge):
|
|
|
478
480
|
Parameters
|
|
479
481
|
----------
|
|
480
482
|
current : `lsst.daf.butler.DatasetType` or `None`
|
|
481
|
-
The current graph-wide
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
483
|
+
The current graph-wide `DatasetType`, or `None`. This will always
|
|
484
|
+
be the registry's definition of the parent dataset type, if one
|
|
485
|
+
exists. If not, it will be the dataset type definition from the
|
|
486
|
+
task in the graph that writes it, if there is one. If there is no
|
|
487
|
+
such task, this will be `None`.
|
|
486
488
|
is_initial_query_constraint : `bool`
|
|
487
489
|
Whether this dataset type is currently marked as a constraint on
|
|
488
490
|
the initial data ID query in QuantumGraph generation.
|
|
@@ -494,7 +496,7 @@ class ReadEdge(Edge):
|
|
|
494
496
|
producer : `str` or `None`
|
|
495
497
|
The label of the task that produces this dataset type in the
|
|
496
498
|
pipeline, or `None` if it is an overall input.
|
|
497
|
-
consumers :
|
|
499
|
+
consumers : `Sequence` [ `str` ]
|
|
498
500
|
Labels for other consuming tasks that have already participated in
|
|
499
501
|
this dataset type's resolution.
|
|
500
502
|
is_registered : `bool`
|
|
@@ -510,7 +512,7 @@ class ReadEdge(Edge):
|
|
|
510
512
|
|
|
511
513
|
Returns
|
|
512
514
|
-------
|
|
513
|
-
dataset_type :
|
|
515
|
+
dataset_type : `DatasetType`
|
|
514
516
|
The updated graph-wide dataset type. If ``current`` was provided,
|
|
515
517
|
this must be equal to it.
|
|
516
518
|
is_initial_query_constraint : `bool`
|
|
@@ -657,25 +659,13 @@ class ReadEdge(Edge):
|
|
|
657
659
|
# compatible), since neither connection should take
|
|
658
660
|
# precedence.
|
|
659
661
|
if dataset_type != current:
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
current.dimensions,
|
|
668
|
-
"/".join(sorted(all_storage_classes)),
|
|
669
|
-
)
|
|
670
|
-
else:
|
|
671
|
-
raise MissingDatasetTypeError(
|
|
672
|
-
f"Definitions differ for input dataset type "
|
|
673
|
-
f"{self.parent_dataset_type_name!r}; task {self.task_label!r} has "
|
|
674
|
-
f"{dataset_type}, but the definition from {report_current_origin()} is "
|
|
675
|
-
f"{current}. If the storage classes are compatible but different, "
|
|
676
|
-
"registering the dataset type in the data repository in advance will avoid "
|
|
677
|
-
"this error."
|
|
678
|
-
)
|
|
662
|
+
raise MissingDatasetTypeError(
|
|
663
|
+
f"Definitions differ for input dataset type {self.parent_dataset_type_name!r}; "
|
|
664
|
+
f"task {self.task_label!r} has {dataset_type}, but the definition "
|
|
665
|
+
f"from {report_current_origin()} is {current}. If the storage classes are "
|
|
666
|
+
"compatible but different, registering the dataset type in the data repository "
|
|
667
|
+
"in advance will avoid this error."
|
|
668
|
+
)
|
|
679
669
|
elif not visualization_only and not dataset_type.is_compatible_with(current):
|
|
680
670
|
raise IncompatibleDatasetTypeError(
|
|
681
671
|
f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; "
|
|
@@ -798,15 +788,15 @@ class WriteEdge(Edge):
|
|
|
798
788
|
Parameters
|
|
799
789
|
----------
|
|
800
790
|
current : `lsst.daf.butler.DatasetType` or `None`
|
|
801
|
-
The current graph-wide
|
|
802
|
-
|
|
803
|
-
|
|
791
|
+
The current graph-wide `DatasetType`, or `None`. This will always
|
|
792
|
+
be the registry's definition of the parent dataset type, if one
|
|
793
|
+
exists.
|
|
804
794
|
universe : `lsst.daf.butler.DimensionUniverse`
|
|
805
795
|
Object that holds all dimension definitions.
|
|
806
796
|
|
|
807
797
|
Returns
|
|
808
798
|
-------
|
|
809
|
-
dataset_type :
|
|
799
|
+
dataset_type : `DatasetType`
|
|
810
800
|
A dataset type compatible with this edge. If ``current`` was
|
|
811
801
|
provided, this must be equal to it.
|
|
812
802
|
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
from __future__ import annotations
|
|
28
28
|
|
|
29
29
|
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
30
|
-
from typing import Any, ClassVar, cast, overload
|
|
30
|
+
from typing import Any, ClassVar, TypeVar, cast, overload
|
|
31
31
|
|
|
32
32
|
import networkx
|
|
33
33
|
|
|
@@ -36,8 +36,11 @@ from ._exceptions import UnresolvedGraphError
|
|
|
36
36
|
from ._nodes import NodeKey, NodeType
|
|
37
37
|
from ._tasks import TaskInitNode, TaskNode
|
|
38
38
|
|
|
39
|
+
_N = TypeVar("_N", covariant=True)
|
|
40
|
+
_T = TypeVar("_T")
|
|
39
41
|
|
|
40
|
-
|
|
42
|
+
|
|
43
|
+
class MappingView(Mapping[str, _N]):
|
|
41
44
|
"""Base class for mapping views into nodes of certain types in a
|
|
42
45
|
`PipelineGraph`.
|
|
43
46
|
|
|
@@ -71,7 +74,7 @@ class MappingView[N](Mapping[str, N]):
|
|
|
71
74
|
self._keys = self._make_keys(self._parent_xgraph)
|
|
72
75
|
return iter(self._keys)
|
|
73
76
|
|
|
74
|
-
def __getitem__(self, key: str) ->
|
|
77
|
+
def __getitem__(self, key: str) -> _N:
|
|
75
78
|
return self._parent_xgraph.nodes[NodeKey(self._NODE_TYPE, key)]["instance"]
|
|
76
79
|
|
|
77
80
|
def __len__(self) -> int:
|
|
@@ -227,7 +230,7 @@ class DatasetTypeMappingView(MappingView[DatasetTypeNode]):
|
|
|
227
230
|
def get_if_resolved(self, key: str) -> DatasetTypeNode | None: ... # pragma: nocover
|
|
228
231
|
|
|
229
232
|
@overload
|
|
230
|
-
def get_if_resolved
|
|
233
|
+
def get_if_resolved(self, key: str, default: _T) -> DatasetTypeNode | _T: ... # pragma: nocover
|
|
231
234
|
|
|
232
235
|
def get_if_resolved(self, key: str, default: Any = None) -> DatasetTypeNode | Any:
|
|
233
236
|
"""Get a node or return a default if it has not been resolved.
|
|
@@ -33,7 +33,7 @@ import itertools
|
|
|
33
33
|
import json
|
|
34
34
|
import logging
|
|
35
35
|
from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set
|
|
36
|
-
from typing import TYPE_CHECKING, Any, BinaryIO, Literal, cast
|
|
36
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, Literal, TypeVar, cast
|
|
37
37
|
|
|
38
38
|
import networkx
|
|
39
39
|
import networkx.algorithms.bipartite
|
|
@@ -79,6 +79,9 @@ if TYPE_CHECKING:
|
|
|
79
79
|
from ..pipeline import TaskDef
|
|
80
80
|
from ..pipelineTask import PipelineTask
|
|
81
81
|
|
|
82
|
+
|
|
83
|
+
_G = TypeVar("_G", bound=networkx.DiGraph | networkx.MultiDiGraph)
|
|
84
|
+
|
|
82
85
|
_LOG = logging.getLogger("lsst.pipe.base.pipeline_graph")
|
|
83
86
|
|
|
84
87
|
|
|
@@ -894,10 +897,6 @@ class PipelineGraph:
|
|
|
894
897
|
New config objects or overrides to apply to copies of the current
|
|
895
898
|
config objects, with task labels as the keywords.
|
|
896
899
|
|
|
897
|
-
Returns
|
|
898
|
-
-------
|
|
899
|
-
None
|
|
900
|
-
|
|
901
900
|
Raises
|
|
902
901
|
------
|
|
903
902
|
ValueError
|
|
@@ -1633,7 +1632,7 @@ class PipelineGraph:
|
|
|
1633
1632
|
|
|
1634
1633
|
Returns
|
|
1635
1634
|
-------
|
|
1636
|
-
subgraphs :
|
|
1635
|
+
subgraphs : `Iterable` [ `PipelineGraph` ]
|
|
1637
1636
|
An iterable over component subgraphs that could be run
|
|
1638
1637
|
independently (they have only overall inputs in common). May be a
|
|
1639
1638
|
lazy iterator.
|
|
@@ -1756,10 +1755,6 @@ class PipelineGraph:
|
|
|
1756
1755
|
not considered part of the pipeline graph in other respects, but it
|
|
1757
1756
|
does get written with other provenance datasets).
|
|
1758
1757
|
|
|
1759
|
-
Returns
|
|
1760
|
-
-------
|
|
1761
|
-
None
|
|
1762
|
-
|
|
1763
1758
|
Raises
|
|
1764
1759
|
------
|
|
1765
1760
|
lsst.daf.butler.MissingDatasetTypeError
|
|
@@ -2184,9 +2179,7 @@ class PipelineGraph:
|
|
|
2184
2179
|
]
|
|
2185
2180
|
return networkx.algorithms.bipartite.projected_graph(networkx.DiGraph(bipartite_xgraph), task_keys)
|
|
2186
2181
|
|
|
2187
|
-
def _transform_xgraph_state
|
|
2188
|
-
self, xgraph: G, skip_edges: bool
|
|
2189
|
-
) -> G:
|
|
2182
|
+
def _transform_xgraph_state(self, xgraph: _G, skip_edges: bool) -> _G:
|
|
2190
2183
|
"""Transform networkx graph attributes in-place from the internal
|
|
2191
2184
|
"instance" attributes to the documented exported attributes.
|
|
2192
2185
|
|
|
@@ -2235,7 +2228,7 @@ class PipelineGraph:
|
|
|
2235
2228
|
|
|
2236
2229
|
Parameters
|
|
2237
2230
|
----------
|
|
2238
|
-
updates :
|
|
2231
|
+
updates : `Mapping` [ `str`, `TaskNode` ]
|
|
2239
2232
|
New task nodes with task label keys. All keys must be task labels
|
|
2240
2233
|
that are already present in the graph.
|
|
2241
2234
|
check_edges_unchanged : `bool`, optional
|