lsst-pipe-base 30.0.1rc1__py3-none-any.whl → 30.2025.5100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. lsst/pipe/base/_instrument.py +20 -31
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +10 -43
  4. lsst/pipe/base/_task_metadata.py +2 -2
  5. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +3 -8
  6. lsst/pipe/base/automatic_connection_constants.py +1 -20
  7. lsst/pipe/base/cli/cmd/__init__.py +2 -18
  8. lsst/pipe/base/cli/cmd/commands.py +4 -149
  9. lsst/pipe/base/connectionTypes.py +160 -72
  10. lsst/pipe/base/connections.py +9 -6
  11. lsst/pipe/base/execution_reports.py +5 -0
  12. lsst/pipe/base/graph/graph.py +10 -11
  13. lsst/pipe/base/graph/quantumNode.py +4 -4
  14. lsst/pipe/base/graph_walker.py +10 -8
  15. lsst/pipe/base/log_capture.py +80 -40
  16. lsst/pipe/base/mp_graph_executor.py +15 -51
  17. lsst/pipe/base/pipeline.py +6 -5
  18. lsst/pipe/base/pipelineIR.py +8 -2
  19. lsst/pipe/base/pipelineTask.py +7 -5
  20. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  21. lsst/pipe/base/pipeline_graph/_edges.py +22 -32
  22. lsst/pipe/base/pipeline_graph/_mapping_views.py +7 -4
  23. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +7 -14
  24. lsst/pipe/base/pipeline_graph/expressions.py +2 -2
  25. lsst/pipe/base/pipeline_graph/io.py +10 -7
  26. lsst/pipe/base/pipeline_graph/visualization/_dot.py +12 -13
  27. lsst/pipe/base/pipeline_graph/visualization/_layout.py +18 -16
  28. lsst/pipe/base/pipeline_graph/visualization/_merge.py +7 -4
  29. lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
  30. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +0 -7
  31. lsst/pipe/base/prerequisite_helpers.py +1 -2
  32. lsst/pipe/base/quantum_graph/_common.py +20 -19
  33. lsst/pipe/base/quantum_graph/_multiblock.py +31 -37
  34. lsst/pipe/base/quantum_graph/_predicted.py +13 -111
  35. lsst/pipe/base/quantum_graph/_provenance.py +45 -1136
  36. lsst/pipe/base/quantum_graph/aggregator/__init__.py +1 -0
  37. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +289 -204
  38. lsst/pipe/base/quantum_graph/aggregator/_config.py +9 -87
  39. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -13
  40. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +235 -49
  41. lsst/pipe/base/quantum_graph/aggregator/_structs.py +116 -6
  42. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +39 -29
  43. lsst/pipe/base/quantum_graph/aggregator/_writer.py +351 -34
  44. lsst/pipe/base/quantum_graph/visualization.py +1 -5
  45. lsst/pipe/base/quantum_graph_builder.py +8 -21
  46. lsst/pipe/base/quantum_graph_executor.py +13 -116
  47. lsst/pipe/base/quantum_graph_skeleton.py +29 -31
  48. lsst/pipe/base/quantum_provenance_graph.py +12 -29
  49. lsst/pipe/base/separable_pipeline_executor.py +3 -19
  50. lsst/pipe/base/single_quantum_executor.py +42 -67
  51. lsst/pipe/base/struct.py +0 -4
  52. lsst/pipe/base/testUtils.py +3 -3
  53. lsst/pipe/base/tests/mocks/_storage_class.py +1 -2
  54. lsst/pipe/base/version.py +1 -1
  55. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/METADATA +3 -3
  56. lsst_pipe_base-30.2025.5100.dist-info/RECORD +125 -0
  57. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/WHEEL +1 -1
  58. lsst/pipe/base/log_on_close.py +0 -76
  59. lsst/pipe/base/quantum_graph/aggregator/_workers.py +0 -303
  60. lsst/pipe/base/quantum_graph/formatter.py +0 -171
  61. lsst/pipe/base/quantum_graph/ingest_graph.py +0 -413
  62. lsst_pipe_base-30.0.1rc1.dist-info/RECORD +0 -129
  63. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/entry_points.txt +0 -0
  64. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/COPYRIGHT +0 -0
  65. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/LICENSE +0 -0
  66. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/bsd_license.txt +0 -0
  67. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/gpl-v3.0.txt +0 -0
  68. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/top_level.txt +0 -0
  69. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/zip-safe +0 -0
@@ -31,15 +31,17 @@ __all__ = ["LogCapture"]
31
31
 
32
32
  import dataclasses
33
33
  import logging
34
+ import os
35
+ import shutil
36
+ import tempfile
34
37
  import uuid
35
38
  from collections.abc import Iterator
36
- from contextlib import contextmanager
39
+ from contextlib import contextmanager, suppress
37
40
  from logging import FileHandler
38
41
 
39
42
  import pydantic
40
43
 
41
- from lsst.daf.butler import Butler, LimitedButler, Quantum
42
- from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
44
+ from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
43
45
  from lsst.daf.butler.logging import (
44
46
  ButlerLogRecord,
45
47
  ButlerLogRecordHandler,
@@ -103,7 +105,7 @@ class _ExecutionLogRecordsExtra(pydantic.BaseModel):
103
105
 
104
106
  Parameters
105
107
  ----------
106
- log_records : `lsst.daf.butler.ButlerLogRecords`
108
+ log_records : `ButlerLogRecords`
107
109
  Logs from a past attempt to run a quantum.
108
110
  """
109
111
  previous = self.model_validate(log_records.extra)
@@ -163,9 +165,7 @@ class LogCapture:
163
165
  return cls(butler, butler)
164
166
 
165
167
  @contextmanager
166
- def capture_logging(
167
- self, task_node: TaskNode, /, quantum: Quantum, records: ButlerLogRecords | None = None
168
- ) -> Iterator[_LogCaptureContext]:
168
+ def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
169
169
  """Configure logging system to capture logs for execution of this task.
170
170
 
171
171
  Parameters
@@ -174,9 +174,6 @@ class LogCapture:
174
174
  The task definition.
175
175
  quantum : `~lsst.daf.butler.Quantum`
176
176
  Single Quantum instance.
177
- records : `lsst.daf.butler.logging.ButlerLogRecords`, optional
178
- Log record container to append to and save. If provided, streaming
179
- mode is disabled (since we'll be saving logs in memory anyway).
180
177
 
181
178
  Notes
182
179
  -----
@@ -208,40 +205,44 @@ class LogCapture:
208
205
 
209
206
  # Add a handler to the root logger to capture execution log output.
210
207
  if log_dataset_name is not None:
211
- try:
212
- [ref] = quantum.outputs[log_dataset_name]
213
- except LookupError as exc:
214
- raise InvalidQuantumError(
215
- f"Quantum outputs is missing log output dataset type {log_dataset_name};"
216
- " this could happen due to inconsistent options between QuantumGraph generation"
217
- " and execution"
218
- ) from exc
219
208
  # Either accumulate into ButlerLogRecords or stream JSON records to
220
209
  # file and ingest that (ingest is possible only with full butler).
221
- if self.stream_json_logs and self.full_butler is not None and records is None:
222
- with TemporaryForIngest(self.full_butler, ref) as temporary:
223
- log_handler_file = FileHandler(temporary.ospath)
224
- log_handler_file.setFormatter(JsonLogFormatter())
225
- logging.getLogger().addHandler(log_handler_file)
226
-
227
- try:
228
- with ButlerMDC.set_mdc(mdc):
229
- yield ctx
230
- finally:
231
- # Ensure that the logs are stored in butler.
232
- logging.getLogger().removeHandler(log_handler_file)
233
- log_handler_file.close()
234
- if ctx.extra:
235
- with open(temporary.ospath, "a") as log_stream:
236
- ButlerLogRecords.write_streaming_extra(
237
- log_stream,
238
- ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
239
- )
240
- if ctx.store:
241
- temporary.ingest()
210
+ if self.stream_json_logs and self.full_butler is not None:
211
+ # Create the log file in a temporary directory rather than
212
+ # creating a temporary file. This is necessary because
213
+ # temporary files are created with restrictive permissions
214
+ # and during file ingest these permissions persist in the
215
+ # datastore. Using a temp directory allows us to create
216
+ # a file with umask default permissions.
217
+ tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
218
+
219
+ # Construct a file to receive the log records and "touch" it.
220
+ log_file = os.path.join(tmpdir, f"butler-log-{task_node.label}.json")
221
+ with open(log_file, "w"):
222
+ pass
223
+ log_handler_file = FileHandler(log_file)
224
+ log_handler_file.setFormatter(JsonLogFormatter())
225
+ logging.getLogger().addHandler(log_handler_file)
226
+
227
+ try:
228
+ with ButlerMDC.set_mdc(mdc):
229
+ yield ctx
230
+ finally:
231
+ # Ensure that the logs are stored in butler.
232
+ logging.getLogger().removeHandler(log_handler_file)
233
+ log_handler_file.close()
234
+ if ctx.extra:
235
+ with open(log_file, "a") as log_stream:
236
+ ButlerLogRecords.write_streaming_extra(
237
+ log_stream,
238
+ ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
239
+ )
240
+ if ctx.store:
241
+ self._ingest_log_records(quantum, log_dataset_name, log_file)
242
+ shutil.rmtree(tmpdir, ignore_errors=True)
242
243
 
243
244
  else:
244
- log_handler_memory = ButlerLogRecordHandler(records)
245
+ log_handler_memory = ButlerLogRecordHandler()
245
246
  logging.getLogger().addHandler(log_handler_memory)
246
247
 
247
248
  try:
@@ -260,6 +261,7 @@ class LogCapture:
260
261
  logging.getLogger().removeHandler(log_handler_memory)
261
262
  if ctx.store:
262
263
  self._store_log_records(quantum, log_dataset_name, log_handler_memory)
264
+ log_handler_memory.records.clear()
263
265
 
264
266
  else:
265
267
  with ButlerMDC.set_mdc(mdc):
@@ -279,3 +281,41 @@ class LogCapture:
279
281
  ) from exc
280
282
 
281
283
  self.butler.put(log_handler.records, ref)
284
+
285
+ def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
286
+ # If we are logging to an external file we must always try to
287
+ # close it.
288
+ assert self.full_butler is not None, "Expected to have full butler for ingest"
289
+ ingested = False
290
+ try:
291
+ # DatasetRef has to be in the Quantum outputs, can lookup by name.
292
+ try:
293
+ [ref] = quantum.outputs[dataset_type]
294
+ except LookupError as exc:
295
+ raise InvalidQuantumError(
296
+ f"Quantum outputs is missing log output dataset type {dataset_type};"
297
+ " this could happen due to inconsistent options between QuantumGraph generation"
298
+ " and execution"
299
+ ) from exc
300
+
301
+ # Need to ingest this file directly into butler.
302
+ dataset = FileDataset(path=filename, refs=ref)
303
+ try:
304
+ self.full_butler.ingest(dataset, transfer="move")
305
+ ingested = True
306
+ except NotImplementedError:
307
+ # Some datastores can't receive files (e.g. in-memory datastore
308
+ # when testing), we store empty list for those just to have a
309
+ # dataset. Alternative is to read the file as a
310
+ # ButlerLogRecords object and put it.
311
+ _LOG.info(
312
+ "Log records could not be stored in this butler because the"
313
+ " datastore can not ingest files, empty record list is stored instead."
314
+ )
315
+ records = ButlerLogRecords.from_records([])
316
+ self.full_butler.put(records, ref)
317
+ finally:
318
+ # remove file if it is not ingested
319
+ if not ingested:
320
+ with suppress(OSError):
321
+ os.remove(filename)
@@ -39,24 +39,20 @@ import sys
39
39
  import threading
40
40
  import time
41
41
  import uuid
42
- from contextlib import ExitStack
43
42
  from typing import Literal, cast
44
43
 
45
44
  import networkx
46
45
 
47
46
  from lsst.daf.butler import DataCoordinate, Quantum
48
47
  from lsst.daf.butler.cli.cliLog import CliLog
49
- from lsst.daf.butler.logging import ButlerLogRecords
50
48
  from lsst.utils.threads import disable_implicit_threading
51
49
 
52
50
  from ._status import InvalidQuantumError, RepeatableQuantumError
53
- from ._task_metadata import TaskMetadata
54
51
  from .execution_graph_fixup import ExecutionGraphFixup
55
52
  from .graph import QuantumGraph
56
53
  from .graph_walker import GraphWalker
57
- from .log_on_close import LogOnClose
58
54
  from .pipeline_graph import TaskNode
59
- from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo, ProvenanceQuantumGraphWriter
55
+ from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo
60
56
  from .quantum_graph_executor import QuantumExecutor, QuantumGraphExecutor
61
57
  from .quantum_reports import ExecutionStatus, QuantumReport, Report
62
58
 
@@ -519,9 +515,7 @@ class MPGraphExecutor(QuantumGraphExecutor):
519
515
  start_method = "spawn"
520
516
  self._start_method = start_method
521
517
 
522
- def execute(
523
- self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
524
- ) -> None:
518
+ def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
525
519
  # Docstring inherited from QuantumGraphExecutor.execute
526
520
  old_graph: QuantumGraph | None = None
527
521
  if isinstance(graph, QuantumGraph):
@@ -531,31 +525,14 @@ class MPGraphExecutor(QuantumGraphExecutor):
531
525
  new_graph = graph
532
526
  xgraph = self._make_xgraph(new_graph, old_graph)
533
527
  self._report = Report(qgraphSummary=new_graph._make_summary())
534
- with ExitStack() as exit_stack:
535
- provenance_writer: ProvenanceQuantumGraphWriter | None = None
536
- if provenance_graph_file is not None:
537
- if provenance_graph_file is not None and self._num_proc > 1:
538
- raise NotImplementedError(
539
- "Provenance writing is not implemented for multiprocess execution."
540
- )
541
- provenance_writer = ProvenanceQuantumGraphWriter(
542
- provenance_graph_file,
543
- exit_stack=exit_stack,
544
- log_on_close=LogOnClose(_LOG.log),
545
- predicted=new_graph,
546
- )
547
- try:
548
- if self._num_proc > 1:
549
- self._execute_quanta_mp(xgraph, self._report)
550
- else:
551
- self._execute_quanta_in_process(xgraph, self._report, provenance_writer)
552
- except Exception as exc:
553
- self._report.set_exception(exc)
554
- raise
555
- if provenance_writer is not None:
556
- provenance_writer.write_overall_inputs()
557
- provenance_writer.write_packages()
558
- provenance_writer.write_init_outputs(assume_existence=True)
528
+ try:
529
+ if self._num_proc > 1:
530
+ self._execute_quanta_mp(xgraph, self._report)
531
+ else:
532
+ self._execute_quanta_in_process(xgraph, self._report)
533
+ except Exception as exc:
534
+ self._report.set_exception(exc)
535
+ raise
559
536
 
560
537
  def _make_xgraph(
561
538
  self, new_graph: PredictedQuantumGraph, old_graph: QuantumGraph | None
@@ -599,9 +576,7 @@ class MPGraphExecutor(QuantumGraphExecutor):
599
576
  raise MPGraphExecutorError("Updated execution graph has dependency cycle.")
600
577
  return xgraph
601
578
 
602
- def _execute_quanta_in_process(
603
- self, xgraph: networkx.DiGraph, report: Report, provenance_writer: ProvenanceQuantumGraphWriter | None
604
- ) -> None:
579
+ def _execute_quanta_in_process(self, xgraph: networkx.DiGraph, report: Report) -> None:
605
580
  """Execute all Quanta in current process.
606
581
 
607
582
  Parameters
@@ -614,9 +589,6 @@ class MPGraphExecutor(QuantumGraphExecutor):
614
589
  `.quantum_graph.PredictedQuantumGraph.quantum_only_xgraph`.
615
590
  report : `Report`
616
591
  Object for reporting execution status.
617
- provenance_writer : `.quantum_graph.ProvenanceQuantumGraphWriter` or \
618
- `None`
619
- Object for recording provenance.
620
592
  """
621
593
 
622
594
  def tiebreaker_sort_key(quantum_id: uuid.UUID) -> tuple:
@@ -634,19 +606,16 @@ class MPGraphExecutor(QuantumGraphExecutor):
634
606
 
635
607
  _LOG.debug("Executing %s (%s@%s)", quantum_id, task_node.label, data_id)
636
608
  fail_exit_code: int | None = None
637
- task_metadata: TaskMetadata | None = None
638
- task_logs = ButlerLogRecords([])
639
609
  try:
640
610
  # For some exception types we want to exit immediately with
641
611
  # exception-specific exit code, but we still want to start
642
612
  # debugger before exiting if debugging is enabled.
643
613
  try:
644
- execution_result = self._quantum_executor.execute(
645
- task_node, quantum, quantum_id=quantum_id, log_records=task_logs
614
+ _, quantum_report = self._quantum_executor.execute(
615
+ task_node, quantum, quantum_id=quantum_id
646
616
  )
647
- if execution_result.report:
648
- report.quantaReports.append(execution_result.report)
649
- task_metadata = execution_result.task_metadata
617
+ if quantum_report:
618
+ report.quantaReports.append(quantum_report)
650
619
  success_count += 1
651
620
  walker.finish(quantum_id)
652
621
  except RepeatableQuantumError as exc:
@@ -732,11 +701,6 @@ class MPGraphExecutor(QuantumGraphExecutor):
732
701
  )
733
702
  failed_count += 1
734
703
 
735
- if provenance_writer is not None:
736
- provenance_writer.write_quantum_provenance(
737
- quantum_id, metadata=task_metadata, logs=task_logs
738
- )
739
-
740
704
  _LOG.info(
741
705
  "Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.",
742
706
  success_count,
@@ -54,12 +54,13 @@ from lsst.utils.introspection import get_full_type_name
54
54
 
55
55
  from . import automatic_connection_constants as acc
56
56
  from . import pipeline_graph, pipelineIR
57
- from ._instrument import Instrument as Instrument
57
+ from ._instrument import Instrument as PipeBaseInstrument
58
58
  from .config import PipelineTaskConfig
59
59
  from .connections import PipelineTaskConnections
60
60
  from .pipelineTask import PipelineTask
61
61
 
62
62
  if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
63
+ from lsst.obs.base import Instrument
63
64
  from lsst.pex.config import Config
64
65
 
65
66
  # ----------------------------------
@@ -495,7 +496,7 @@ class Pipeline:
495
496
  Returns
496
497
  -------
497
498
  pipeline: `Pipeline`
498
- The new pipeline.
499
+ The new pipeline.
499
500
  """
500
501
  return cls.fromIR(copy.deepcopy(pipeline._pipelineIR))
501
502
 
@@ -605,7 +606,7 @@ class Pipeline:
605
606
 
606
607
  @property
607
608
  def subsets(self) -> MappingProxyType[str, set]:
608
- """Returns a `types.MappingProxyType` where the keys are the labels of
609
+ """Returns a `MappingProxyType` where the keys are the labels of
609
610
  labeled subsets in the `Pipeline` and the values are the set of task
610
611
  labels contained within that subset.
611
612
  """
@@ -701,7 +702,7 @@ class Pipeline:
701
702
  """
702
703
  instrument_class_name = self._pipelineIR.instrument
703
704
  if instrument_class_name is not None:
704
- instrument_class = cast(Instrument, doImportType(instrument_class_name))
705
+ instrument_class = cast(PipeBaseInstrument, doImportType(instrument_class_name))
705
706
  if instrument_class is not None:
706
707
  return DataCoordinate.standardize(instrument=instrument_class.getName(), universe=universe)
707
708
  return DataCoordinate.make_empty(universe)
@@ -892,7 +893,7 @@ class Pipeline:
892
893
  raise NameError(f"Label {label} does not appear in this pipeline")
893
894
  taskClass: type[PipelineTask] = doImportType(taskIR.klass)
894
895
  config = taskClass.ConfigClass()
895
- instrument: Instrument | None = None
896
+ instrument: PipeBaseInstrument | None = None
896
897
  if (instrumentName := self._pipelineIR.instrument) is not None:
897
898
  instrument_cls: type = doImportType(instrumentName)
898
899
  instrument = instrument_cls()
@@ -220,6 +220,12 @@ class LabeledSubset:
220
220
  class ParametersIR:
221
221
  """Intermediate representation of parameters that are global to a pipeline.
222
222
 
223
+ Attributes
224
+ ----------
225
+ mapping : `dict` [`str`, `str`]
226
+ A mutable mapping of identifiers as keys, and shared configuration
227
+ as values.
228
+
223
229
  Notes
224
230
  -----
225
231
  These parameters are specified under a top level key named ``parameters``
@@ -337,7 +343,7 @@ class ConfigIR:
337
343
  )
338
344
  return new_config
339
345
 
340
- def maybe_merge(self, other_config: ConfigIR) -> Generator[ConfigIR]:
346
+ def maybe_merge(self, other_config: ConfigIR) -> Generator[ConfigIR, None, None]:
341
347
  """Merge another instance of a `ConfigIR` into this instance if
342
348
  possible. This function returns a generator that is either self
343
349
  if the configs were merged, or self, and other_config if that could
@@ -700,7 +706,7 @@ class PipelineIR:
700
706
 
701
707
  Parameters
702
708
  ----------
703
- loaded_yaml : `dict`
709
+ loaded_yaml: `dict`
704
710
  A dictionary which matches the structure that would be produced
705
711
  by a yaml reader which parses a pipeline definition document
706
712
  """
@@ -55,7 +55,7 @@ class PipelineTask(Task):
55
55
  resulting data is also stored in a data butler.
56
56
 
57
57
  PipelineTask inherits from a `~lsst.pipe.base.Task` and uses the same
58
- configuration mechanism based on `lsst.pex.config`. `PipelineTask`
58
+ configuration mechanism based on :ref:`lsst.pex.config`. `PipelineTask`
59
59
  classes also have a `PipelineTaskConnections` class associated with their
60
60
  config which defines all of the IO a `PipelineTask` will need to do.
61
61
  PipelineTask sub-class typically implements `run()` method which receives
@@ -75,6 +75,12 @@ class PipelineTask(Task):
75
75
  PipelineTask base class constructor, but may support other signatures as
76
76
  well.
77
77
 
78
+ Attributes
79
+ ----------
80
+ canMultiprocess : bool, True by default (class attribute)
81
+ This class attribute is checked by execution framework, sub-classes
82
+ can set it to ``False`` in case task does not support multiprocessing.
83
+
78
84
  Parameters
79
85
  ----------
80
86
  config : `~lsst.pex.config.Config`, optional
@@ -96,11 +102,7 @@ class PipelineTask(Task):
96
102
  """
97
103
 
98
104
  ConfigClass: ClassVar[type[PipelineTaskConfig]]
99
-
100
105
  canMultiprocess: ClassVar[bool] = True
101
- """Whether this task can be run by an executor that uses subprocesses for
102
- parallelism.
103
- """
104
106
 
105
107
  def __init__(
106
108
  self,
@@ -106,8 +106,8 @@ class DatasetTypeNode:
106
106
  The internal networkx graph.
107
107
  get_registered : `~collections.abc.Callable` or `None`
108
108
  Callable that takes a dataset type name and returns the
109
- `~lsst.daf.butler.DatasetType` registered in the data repository,
110
- or `None` if it is not registered.
109
+ `DatasetType` registered in the data repository, or `None` if it is
110
+ not registered.
111
111
  dimensions : `lsst.daf.butler.DimensionUniverse`
112
112
  Definitions of all dimensions.
113
113
  previous : `DatasetTypeNode` or `None`
@@ -30,7 +30,7 @@ __all__ = ("Edge", "ReadEdge", "WriteEdge")
30
30
 
31
31
  from abc import ABC, abstractmethod
32
32
  from collections.abc import Callable, Mapping, Sequence
33
- from typing import Any, ClassVar, Self
33
+ from typing import Any, ClassVar, Self, TypeVar
34
34
 
35
35
  from lsst.daf.butler import DatasetRef, DatasetType, DimensionUniverse, StorageClassFactory
36
36
  from lsst.daf.butler.registry import MissingDatasetTypeError
@@ -40,6 +40,8 @@ from ..connectionTypes import BaseConnection
40
40
  from ._exceptions import ConnectionTypeConsistencyError, IncompatibleDatasetTypeError
41
41
  from ._nodes import NodeKey, NodeType
42
42
 
43
+ _S = TypeVar("_S", bound="Edge")
44
+
43
45
 
44
46
  @immutable
45
47
  class Edge(ABC):
@@ -170,7 +172,7 @@ class Edge(ABC):
170
172
  """
171
173
  return self.parent_dataset_type_name
172
174
 
173
- def diff[S: Edge](self: S, other: S, connection_type: str = "connection") -> list[str]:
175
+ def diff(self: _S, other: _S, connection_type: str = "connection") -> list[str]:
174
176
  """Compare this edge to another one from a possibly-different
175
177
  configuration of the same task label.
176
178
 
@@ -478,11 +480,11 @@ class ReadEdge(Edge):
478
480
  Parameters
479
481
  ----------
480
482
  current : `lsst.daf.butler.DatasetType` or `None`
481
- The current graph-wide `~lsst.daf.butler.DatasetType`, or `None`.
482
- This will always be the registry's definition of the parent dataset
483
- type, if one exists. If not, it will be the dataset type
484
- definition from the task in the graph that writes it, if there is
485
- one. If there is no such task, this will be `None`.
483
+ The current graph-wide `DatasetType`, or `None`. This will always
484
+ be the registry's definition of the parent dataset type, if one
485
+ exists. If not, it will be the dataset type definition from the
486
+ task in the graph that writes it, if there is one. If there is no
487
+ such task, this will be `None`.
486
488
  is_initial_query_constraint : `bool`
487
489
  Whether this dataset type is currently marked as a constraint on
488
490
  the initial data ID query in QuantumGraph generation.
@@ -494,7 +496,7 @@ class ReadEdge(Edge):
494
496
  producer : `str` or `None`
495
497
  The label of the task that produces this dataset type in the
496
498
  pipeline, or `None` if it is an overall input.
497
- consumers : `~collections.abc.Sequence` [ `str` ]
499
+ consumers : `Sequence` [ `str` ]
498
500
  Labels for other consuming tasks that have already participated in
499
501
  this dataset type's resolution.
500
502
  is_registered : `bool`
@@ -510,7 +512,7 @@ class ReadEdge(Edge):
510
512
 
511
513
  Returns
512
514
  -------
513
- dataset_type : `~lsst.daf.butler.DatasetType`
515
+ dataset_type : `DatasetType`
514
516
  The updated graph-wide dataset type. If ``current`` was provided,
515
517
  this must be equal to it.
516
518
  is_initial_query_constraint : `bool`
@@ -657,25 +659,13 @@ class ReadEdge(Edge):
657
659
  # compatible), since neither connection should take
658
660
  # precedence.
659
661
  if dataset_type != current:
660
- if visualization_only and dataset_type.dimensions == current.dimensions:
661
- # Make a visualization-only ambiguous storage class
662
- # "name".
663
- all_storage_classes = set(current.storageClass_name.split("/"))
664
- all_storage_classes.update(dataset_type.storageClass_name.split("/"))
665
- current = DatasetType(
666
- current.name,
667
- current.dimensions,
668
- "/".join(sorted(all_storage_classes)),
669
- )
670
- else:
671
- raise MissingDatasetTypeError(
672
- f"Definitions differ for input dataset type "
673
- f"{self.parent_dataset_type_name!r}; task {self.task_label!r} has "
674
- f"{dataset_type}, but the definition from {report_current_origin()} is "
675
- f"{current}. If the storage classes are compatible but different, "
676
- "registering the dataset type in the data repository in advance will avoid "
677
- "this error."
678
- )
662
+ raise MissingDatasetTypeError(
663
+ f"Definitions differ for input dataset type {self.parent_dataset_type_name!r}; "
664
+ f"task {self.task_label!r} has {dataset_type}, but the definition "
665
+ f"from {report_current_origin()} is {current}. If the storage classes are "
666
+ "compatible but different, registering the dataset type in the data repository "
667
+ "in advance will avoid this error."
668
+ )
679
669
  elif not visualization_only and not dataset_type.is_compatible_with(current):
680
670
  raise IncompatibleDatasetTypeError(
681
671
  f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; "
@@ -798,15 +788,15 @@ class WriteEdge(Edge):
798
788
  Parameters
799
789
  ----------
800
790
  current : `lsst.daf.butler.DatasetType` or `None`
801
- The current graph-wide `~lsst.daf.butler.DatasetType`, or `None`.
802
- This will always be the registry's definition of the parent dataset
803
- type, if one exists.
791
+ The current graph-wide `DatasetType`, or `None`. This will always
792
+ be the registry's definition of the parent dataset type, if one
793
+ exists.
804
794
  universe : `lsst.daf.butler.DimensionUniverse`
805
795
  Object that holds all dimension definitions.
806
796
 
807
797
  Returns
808
798
  -------
809
- dataset_type : `~lsst.daf.butler.DatasetType`
799
+ dataset_type : `DatasetType`
810
800
  A dataset type compatible with this edge. If ``current`` was
811
801
  provided, this must be equal to it.
812
802
 
@@ -27,7 +27,7 @@
27
27
  from __future__ import annotations
28
28
 
29
29
  from collections.abc import Iterable, Iterator, Mapping, Sequence
30
- from typing import Any, ClassVar, cast, overload
30
+ from typing import Any, ClassVar, TypeVar, cast, overload
31
31
 
32
32
  import networkx
33
33
 
@@ -36,8 +36,11 @@ from ._exceptions import UnresolvedGraphError
36
36
  from ._nodes import NodeKey, NodeType
37
37
  from ._tasks import TaskInitNode, TaskNode
38
38
 
39
+ _N = TypeVar("_N", covariant=True)
40
+ _T = TypeVar("_T")
39
41
 
40
- class MappingView[N](Mapping[str, N]):
42
+
43
+ class MappingView(Mapping[str, _N]):
41
44
  """Base class for mapping views into nodes of certain types in a
42
45
  `PipelineGraph`.
43
46
 
@@ -71,7 +74,7 @@ class MappingView[N](Mapping[str, N]):
71
74
  self._keys = self._make_keys(self._parent_xgraph)
72
75
  return iter(self._keys)
73
76
 
74
- def __getitem__(self, key: str) -> N:
77
+ def __getitem__(self, key: str) -> _N:
75
78
  return self._parent_xgraph.nodes[NodeKey(self._NODE_TYPE, key)]["instance"]
76
79
 
77
80
  def __len__(self) -> int:
@@ -227,7 +230,7 @@ class DatasetTypeMappingView(MappingView[DatasetTypeNode]):
227
230
  def get_if_resolved(self, key: str) -> DatasetTypeNode | None: ... # pragma: nocover
228
231
 
229
232
  @overload
230
- def get_if_resolved[T](self, key: str, default: T) -> DatasetTypeNode | T: ... # pragma: nocover
233
+ def get_if_resolved(self, key: str, default: _T) -> DatasetTypeNode | _T: ... # pragma: nocover
231
234
 
232
235
  def get_if_resolved(self, key: str, default: Any = None) -> DatasetTypeNode | Any:
233
236
  """Get a node or return a default if it has not been resolved.
@@ -33,7 +33,7 @@ import itertools
33
33
  import json
34
34
  import logging
35
35
  from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set
36
- from typing import TYPE_CHECKING, Any, BinaryIO, Literal, cast
36
+ from typing import TYPE_CHECKING, Any, BinaryIO, Literal, TypeVar, cast
37
37
 
38
38
  import networkx
39
39
  import networkx.algorithms.bipartite
@@ -79,6 +79,9 @@ if TYPE_CHECKING:
79
79
  from ..pipeline import TaskDef
80
80
  from ..pipelineTask import PipelineTask
81
81
 
82
+
83
+ _G = TypeVar("_G", bound=networkx.DiGraph | networkx.MultiDiGraph)
84
+
82
85
  _LOG = logging.getLogger("lsst.pipe.base.pipeline_graph")
83
86
 
84
87
 
@@ -894,10 +897,6 @@ class PipelineGraph:
894
897
  New config objects or overrides to apply to copies of the current
895
898
  config objects, with task labels as the keywords.
896
899
 
897
- Returns
898
- -------
899
- None
900
-
901
900
  Raises
902
901
  ------
903
902
  ValueError
@@ -1633,7 +1632,7 @@ class PipelineGraph:
1633
1632
 
1634
1633
  Returns
1635
1634
  -------
1636
- subgraphs : `~collections.abc.Iterable` [ `PipelineGraph` ]
1635
+ subgraphs : `Iterable` [ `PipelineGraph` ]
1637
1636
  An iterable over component subgraphs that could be run
1638
1637
  independently (they have only overall inputs in common). May be a
1639
1638
  lazy iterator.
@@ -1756,10 +1755,6 @@ class PipelineGraph:
1756
1755
  not considered part of the pipeline graph in other respects, but it
1757
1756
  does get written with other provenance datasets).
1758
1757
 
1759
- Returns
1760
- -------
1761
- None
1762
-
1763
1758
  Raises
1764
1759
  ------
1765
1760
  lsst.daf.butler.MissingDatasetTypeError
@@ -2184,9 +2179,7 @@ class PipelineGraph:
2184
2179
  ]
2185
2180
  return networkx.algorithms.bipartite.projected_graph(networkx.DiGraph(bipartite_xgraph), task_keys)
2186
2181
 
2187
- def _transform_xgraph_state[G: networkx.DiGraph | networkx.MultiDiGraph](
2188
- self, xgraph: G, skip_edges: bool
2189
- ) -> G:
2182
+ def _transform_xgraph_state(self, xgraph: _G, skip_edges: bool) -> _G:
2190
2183
  """Transform networkx graph attributes in-place from the internal
2191
2184
  "instance" attributes to the documented exported attributes.
2192
2185
 
@@ -2235,7 +2228,7 @@ class PipelineGraph:
2235
2228
 
2236
2229
  Parameters
2237
2230
  ----------
2238
- updates : `~collections.abc.Mapping` [ `str`, `TaskNode` ]
2231
+ updates : `Mapping` [ `str`, `TaskNode` ]
2239
2232
  New task nodes with task label keys. All keys must be task labels
2240
2233
  that are already present in the graph.
2241
2234
  check_edges_unchanged : `bool`, optional