lsst-daf-butler 30.0.0rc2__py3-none-any.whl → 30.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. lsst/daf/butler/_butler.py +8 -5
  2. lsst/daf/butler/_butler_metrics.py +49 -2
  3. lsst/daf/butler/_formatter.py +2 -7
  4. lsst/daf/butler/_labeled_butler_factory.py +28 -8
  5. lsst/daf/butler/_rubin/temporary_for_ingest.py +207 -0
  6. lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
  7. lsst/daf/butler/configs/storageClasses.yaml +15 -0
  8. lsst/daf/butler/datastore/record_data.py +1 -1
  9. lsst/daf/butler/datastores/fileDatastore.py +15 -12
  10. lsst/daf/butler/dimensions/_coordinate.py +5 -0
  11. lsst/daf/butler/direct_butler/_direct_butler.py +45 -28
  12. lsst/daf/butler/logging.py +9 -3
  13. lsst/daf/butler/registry/bridge/monolithic.py +17 -13
  14. lsst/daf/butler/registry/datasets/byDimensions/_manager.py +49 -45
  15. lsst/daf/butler/registry/expand_data_ids.py +93 -0
  16. lsst/daf/butler/registry/interfaces/_database.py +6 -1
  17. lsst/daf/butler/registry/sql_registry.py +2 -24
  18. lsst/daf/butler/remote_butler/_remote_butler.py +5 -1
  19. lsst/daf/butler/tests/hybrid_butler.py +4 -1
  20. lsst/daf/butler/tests/registry_data/lsstcam-subset.yaml +191 -0
  21. lsst/daf/butler/tests/testFormatters.py +2 -2
  22. lsst/daf/butler/transfers/_context.py +7 -6
  23. lsst/daf/butler/version.py +1 -1
  24. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/METADATA +1 -1
  25. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/RECORD +33 -30
  26. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/WHEEL +0 -0
  27. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/entry_points.txt +0 -0
  28. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/COPYRIGHT +0 -0
  29. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/LICENSE +0 -0
  30. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/bsd_license.txt +0 -0
  31. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/gpl-v3.0.txt +0 -0
  32. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/top_level.txt +0 -0
  33. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/zip-safe +0 -0
@@ -1566,7 +1566,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1566
1566
 
1567
1567
  @abstractmethod
1568
1568
  def transfer_dimension_records_from(
1569
- self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
1569
+ self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate]
1570
1570
  ) -> None:
1571
1571
  """Transfer dimension records to this Butler from another Butler.
1572
1572
 
@@ -1578,10 +1578,9 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1578
1578
  `Butler` whose registry will be used to expand data IDs. If the
1579
1579
  source refs contain coordinates that are used to populate other
1580
1580
  records then this will also need to be a full `Butler`.
1581
- source_refs : iterable of `DatasetRef`
1582
- Datasets defined in the source butler whose dimension records
1583
- should be transferred to this butler. In most circumstances.
1584
- transfer is faster if the dataset refs are expanded.
1581
+ source_refs : iterable of `DatasetRef` or `DataCoordinate`
1582
+ Datasets or data IDs defined in the source butler whose dimension
1583
+ records should be transferred to this butler.
1585
1584
  """
1586
1585
  raise NotImplementedError()
1587
1586
 
@@ -2227,3 +2226,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2227
2226
  @abstractmethod
2228
2227
  def close(self) -> None:
2229
2228
  raise NotImplementedError()
2229
+
2230
+ @abstractmethod
2231
+ def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
2232
+ raise NotImplementedError()
@@ -29,12 +29,15 @@ from __future__ import annotations
29
29
 
30
30
  from collections.abc import Callable, Iterator
31
31
  from contextlib import contextmanager
32
+ from typing import Concatenate, ParamSpec
32
33
 
33
34
  from pydantic import BaseModel
34
35
 
35
36
  from lsst.utils.logging import LsstLoggers
36
37
  from lsst.utils.timer import time_this
37
38
 
39
+ P = ParamSpec("P")
40
+
38
41
 
39
42
  class ButlerMetrics(BaseModel):
40
43
  """Metrics collected during Butler operations."""
@@ -45,18 +48,26 @@ class ButlerMetrics(BaseModel):
45
48
  time_in_get: float = 0.0
46
49
  """Wall-clock time, in seconds, spent in get()."""
47
50
 
51
+ time_in_ingest: float = 0.0
52
+ """Wall-clock time, in seconds, spent in ingest()."""
53
+
48
54
  n_get: int = 0
49
55
  """Number of datasets retrieved with get()."""
50
56
 
51
57
  n_put: int = 0
52
58
  """Number of datasets stored with put()."""
53
59
 
60
+ n_ingest: int = 0
61
+ """Number of datasets ingested."""
62
+
54
63
  def reset(self) -> None:
55
64
  """Reset all metrics."""
56
65
  self.time_in_put = 0.0
57
66
  self.time_in_get = 0.0
67
+ self.time_in_ingest = 0.0
58
68
  self.n_get = 0
59
69
  self.n_put = 0
70
+ self.n_ingest = 0
60
71
 
61
72
  def increment_get(self, duration: float) -> None:
62
73
  """Increment time for get().
@@ -80,13 +91,31 @@ class ButlerMetrics(BaseModel):
80
91
  self.time_in_put += duration
81
92
  self.n_put += 1
82
93
 
94
+ def increment_ingest(self, duration: float, n_datasets: int) -> None:
95
+ """Increment time and datasets for ingest().
96
+
97
+ Parameters
98
+ ----------
99
+ duration : `float`
100
+ Duration to add to the ingest() statistics.
101
+ n_datasets : `int`
102
+ Number of datasets to be ingested for this call.
103
+ """
104
+ self.time_in_ingest += duration
105
+ self.n_ingest += n_datasets
106
+
83
107
  @contextmanager
84
108
  def _timer(
85
- self, handler: Callable[[float], None], log: LsstLoggers | None = None, msg: str | None = None
109
+ self,
110
+ handler: Callable[Concatenate[float, P], None],
111
+ log: LsstLoggers | None = None,
112
+ msg: str | None = None,
113
+ *args: P.args,
114
+ **kwargs: P.kwargs,
86
115
  ) -> Iterator[None]:
87
116
  with time_this(log=log, msg=msg) as timer:
88
117
  yield
89
- handler(timer.duration)
118
+ handler(timer.duration, *args, **kwargs)
90
119
 
91
120
  @contextmanager
92
121
  def instrument_get(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
@@ -115,3 +144,21 @@ class ButlerMetrics(BaseModel):
115
144
  """
116
145
  with self._timer(self.increment_put, log=log, msg=msg):
117
146
  yield
147
+
148
+ @contextmanager
149
+ def instrument_ingest(
150
+ self, n_datasets: int, log: LsstLoggers | None = None, msg: str | None = None
151
+ ) -> Iterator[None]:
152
+ """Run code and increment ingest statistics.
153
+
154
+ Parameters
155
+ ----------
156
+ n_datasets : `int`
157
+ Number of datasets being ingested.
158
+ log : `logging.Logger` or `None`
159
+ Logger to use for any timing information.
160
+ msg : `str` or `None`
161
+ Any message to be included in log output.
162
+ """
163
+ with self._timer(self.increment_ingest, n_datasets=n_datasets, log=log, msg=msg):
164
+ yield
@@ -54,6 +54,7 @@ from ._config import Config
54
54
  from ._config_support import LookupKey, processLookupConfigs
55
55
  from ._file_descriptor import FileDescriptor
56
56
  from ._location import Location
57
+ from ._rubin.temporary_for_ingest import TemporaryForIngest
57
58
  from .dimensions import DataCoordinate, DimensionUniverse
58
59
  from .mapping_factory import MappingFactory
59
60
 
@@ -1031,13 +1032,7 @@ class FormatterV2:
1031
1032
  """
1032
1033
  cache_manager = self._ensure_cache(cache_manager)
1033
1034
 
1034
- # Always write to a temporary even if
1035
- # using a local file system -- that gives us atomic writes.
1036
- # If a process is killed as the file is being written we do not
1037
- # want it to remain in the correct place but in corrupt state.
1038
- # For local files write to the output directory not temporary dir.
1039
- prefix = uri.dirname() if uri.isLocal else None
1040
- with ResourcePath.temporary_uri(suffix=uri.getExtension(), prefix=prefix) as temporary_uri:
1035
+ with TemporaryForIngest.make_path(uri) as temporary_uri:
1041
1036
  # Need to configure the formatter to write to a different
1042
1037
  # location and that needs us to overwrite internals
1043
1038
  log.debug("Writing dataset to temporary location at %s", temporary_uri)
@@ -30,7 +30,9 @@ from __future__ import annotations
30
30
  __all__ = ("LabeledButlerFactory", "LabeledButlerFactoryProtocol")
31
31
 
32
32
  from collections.abc import Mapping
33
- from typing import Protocol
33
+ from contextlib import AbstractContextManager
34
+ from logging import getLogger
35
+ from typing import Any, Literal, Protocol, Self
34
36
 
35
37
  from lsst.resources import ResourcePathExpression
36
38
 
@@ -40,6 +42,8 @@ from ._butler_repo_index import ButlerRepoIndex
40
42
  from ._utilities.named_locks import NamedLocks
41
43
  from ._utilities.thread_safe_cache import ThreadSafeCache
42
44
 
45
+ _LOG = getLogger(__name__)
46
+
43
47
 
44
48
  class LabeledButlerFactoryProtocol(Protocol):
45
49
  """Callable to retrieve a butler from a label."""
@@ -47,7 +51,7 @@ class LabeledButlerFactoryProtocol(Protocol):
47
51
  def __call__(self, label: str) -> Butler: ...
48
52
 
49
53
 
50
- class LabeledButlerFactory:
54
+ class LabeledButlerFactory(AbstractContextManager):
51
55
  """Factory for efficiently instantiating Butler instances from the
52
56
  repository index file. This is intended for use from long-lived services
53
57
  that want to instantiate a separate Butler instance for each end user
@@ -60,6 +64,9 @@ class LabeledButlerFactory:
60
64
  files. If not provided, defaults to the global repository index
61
65
  configured by the ``DAF_BUTLER_REPOSITORY_INDEX`` environment variable
62
66
  -- see `ButlerRepoIndex`.
67
+ writeable : `bool`, optional
68
+ If `True`, Butler instances created by this factory will be writeable.
69
+ If `False` (the default), instances will be read-only.
63
70
 
64
71
  Notes
65
72
  -----
@@ -76,11 +83,12 @@ class LabeledButlerFactory:
76
83
  safely be used by separate threads.
77
84
  """
78
85
 
79
- def __init__(self, repositories: Mapping[str, str] | None = None) -> None:
86
+ def __init__(self, repositories: Mapping[str, str] | None = None, writeable: bool = False) -> None:
80
87
  if repositories is None:
81
88
  self._repositories = None
82
89
  else:
83
90
  self._repositories = dict(repositories)
91
+ self._writeable = writeable
84
92
 
85
93
  self._factories = ThreadSafeCache[str, _ButlerFactory]()
86
94
  self._initialization_locks = NamedLocks()
@@ -88,6 +96,16 @@ class LabeledButlerFactory:
88
96
  # This may be overridden by unit tests.
89
97
  self._preload_unsafe_direct_butler_caches = True
90
98
 
99
+ def __enter__(self) -> Self:
100
+ return self
101
+
102
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
103
+ try:
104
+ self.close()
105
+ except Exception:
106
+ _LOG.exception("An exception occurred during LabeledButlerFactory.close()")
107
+ return False
108
+
91
109
  def bind(self, access_token: str | None) -> LabeledButlerFactoryProtocol:
92
110
  """Create a callable factory function for generating Butler instances
93
111
  with out needing to specify access tokans again.
@@ -109,7 +127,7 @@ class LabeledButlerFactory:
109
127
 
110
128
  return create
111
129
 
112
- def create_butler(self, *, label: str, access_token: str | None) -> Butler:
130
+ def create_butler(self, label: str, *, access_token: str | None = None) -> Butler:
113
131
  """Create a Butler instance.
114
132
 
115
133
  Parameters
@@ -118,7 +136,7 @@ class LabeledButlerFactory:
118
136
  Label of the repository to instantiate, from the ``repositories``
119
137
  parameter to the `LabeledButlerFactory` constructor or the global
120
138
  repository index file.
121
- access_token : `str` | `None`
139
+ access_token : `str` | `None`, optional
122
140
  Gafaelfawr access token used to authenticate to a Butler server.
123
141
  This is required for any repositories configured to use
124
142
  `RemoteButler`. If you only use `DirectButler`, this may be
@@ -167,7 +185,9 @@ class LabeledButlerFactory:
167
185
 
168
186
  match butler_type:
169
187
  case ButlerType.DIRECT:
170
- return _DirectButlerFactory(config, self._preload_unsafe_direct_butler_caches)
188
+ return _DirectButlerFactory(
189
+ config, self._preload_unsafe_direct_butler_caches, self._writeable
190
+ )
171
191
  case ButlerType.REMOTE:
172
192
  return _RemoteButlerFactory(config)
173
193
  case _:
@@ -189,12 +209,12 @@ class _ButlerFactory(Protocol):
189
209
 
190
210
 
191
211
  class _DirectButlerFactory(_ButlerFactory):
192
- def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool) -> None:
212
+ def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool, writeable: bool) -> None:
193
213
  import lsst.daf.butler.direct_butler
194
214
 
195
215
  # Create a 'template' Butler that will be cloned when callers request
196
216
  # an instance.
197
- self._butler = Butler.from_config(config)
217
+ self._butler = Butler.from_config(config, writeable=writeable)
198
218
  assert isinstance(self._butler, lsst.daf.butler.direct_butler.DirectButler)
199
219
 
200
220
  # Load caches so that data is available in cloned instances without
@@ -0,0 +1,207 @@
1
+ # This file is part of daf_butler.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("TemporaryForIngest",)
31
+
32
+ import dataclasses
33
+ import glob
34
+ from contextlib import contextmanager
35
+ from typing import TYPE_CHECKING, Self, cast
36
+
37
+ from lsst.resources import ResourcePath
38
+
39
+ if TYPE_CHECKING:
40
+ from collections.abc import Iterator
41
+ from types import TracebackType
42
+
43
+ from .._butler import Butler
44
+ from .._dataset_ref import DatasetRef
45
+ from .._file_dataset import FileDataset
46
+ from .._limited_butler import LimitedButler
47
+
48
+
49
+ @dataclasses.dataclass
50
+ class TemporaryForIngest:
51
+ """A context manager for generating temporary paths that will be ingested
52
+ as butler datasets.
53
+
54
+ Notes
55
+ -----
56
+ Neither this class nor its `make_path` method run ingest automatically when
57
+ their context manager is exited; the `ingest` method must always be called
58
+ explicitly.
59
+ """
60
+
61
+ butler: Butler
62
+ """Full butler to obtain a predicted path from and ingest into."""
63
+
64
+ ref: DatasetRef
65
+ """Description of the dataset to ingest."""
66
+
67
+ dataset: FileDataset = dataclasses.field(init=False)
68
+ """The dataset that will be passed to `Butler.ingest`."""
69
+
70
+ @property
71
+ def path(self) -> ResourcePath:
72
+ """The temporary path.
73
+
74
+ Guaranteed to be a local POSIX path.
75
+ """
76
+ return cast(ResourcePath, self.dataset.path)
77
+
78
+ @property
79
+ def ospath(self) -> str:
80
+ """The temporary path as a complete filename."""
81
+ return self.path.ospath
82
+
83
+ @classmethod
84
+ @contextmanager
85
+ def make_path(cls, final_path: ResourcePath) -> Iterator[ResourcePath]:
86
+ """Return a temporary path context manager given the predicted final
87
+ path.
88
+
89
+ Parameters
90
+ ----------
91
+ final_path : `lsst.resources.ResourcePath`
92
+ Predicted final path.
93
+
94
+ Returns
95
+ -------
96
+ context : `contextlib.AbstractContextManager`
97
+ A context manager that yields the temporary
98
+ `~lsst.resources.ResourcePath` when entered and deletes that file
99
+ when exited.
100
+ """
101
+ # Always write to a temporary even if using a local file system -- that
102
+ # gives us atomic writes. If a process is killed as the file is being
103
+ # written we do not want it to remain in the correct place but in
104
+ # corrupt state. For local files write to the output directory not
105
+ # temporary dir.
106
+ prefix = final_path.dirname() if final_path.isLocal else None
107
+ if prefix is not None:
108
+ prefix.mkdir()
109
+ with ResourcePath.temporary_uri(
110
+ suffix=cls._get_temporary_suffix(final_path), prefix=prefix
111
+ ) as temporary_path:
112
+ yield temporary_path
113
+
114
+ def ingest(self, record_validation_info: bool = True) -> None:
115
+ """Ingest the file into the butler.
116
+
117
+ Parameters
118
+ ----------
119
+ record_validation_info : `bool`, optional
120
+ Whether to- record the file size and checksum upon ingest.
121
+ """
122
+ self.butler.ingest(self.dataset, transfer="move", record_validation_info=record_validation_info)
123
+
124
+ def __enter__(self) -> Self:
125
+ from .._file_dataset import FileDataset
126
+
127
+ final_path = self.butler.getURI(self.ref, predict=True).replace(fragment="")
128
+ prefix = final_path.dirname() if final_path.isLocal else None
129
+ if prefix is not None:
130
+ prefix.mkdir()
131
+ self._temporary_path_context = self.make_path(final_path)
132
+ temporary_path = self._temporary_path_context.__enter__()
133
+ self.dataset = FileDataset(temporary_path, [self.ref], formatter=None)
134
+ return self
135
+
136
+ def __exit__(
137
+ self,
138
+ exc_type: type[BaseException] | None,
139
+ exc_value: BaseException | None,
140
+ traceback: TracebackType | None,
141
+ ) -> bool | None:
142
+ return self._temporary_path_context.__exit__(exc_type, exc_value, traceback)
143
+
144
+ @classmethod
145
+ def find_orphaned_temporaries_by_path(cls, final_path: ResourcePath) -> list[ResourcePath]:
146
+ """Search for temporary files that were not successfully ingested.
147
+
148
+ Parameters
149
+ ----------
150
+ final_path : `lsst.resources.ResourcePath`
151
+ Final path a successfully-ingested file would have.
152
+
153
+ Returns
154
+ -------
155
+ paths : `list` [ `lsst.resources.ResourcePath` ]
156
+ Files that look like temporaries that might have been created while
157
+ trying to write the target dataset.
158
+
159
+ Notes
160
+ -----
161
+ Orphaned files are only possible when a context manager is interrupted
162
+ by a hard error that prevents any cleanup code from running (e.g.
163
+ sudden loss of power).
164
+ """
165
+ if not final_path.isLocal:
166
+ # We return true tempfile for non-local predicted paths, so orphans
167
+ # are not our problem (the OS etc. will take care of them).
168
+ return []
169
+ return [
170
+ ResourcePath(filename)
171
+ for filename in glob.glob(
172
+ f"{glob.escape(final_path.dirname().ospath)}*{glob.escape(cls._get_temporary_suffix(final_path))}"
173
+ )
174
+ if filename != final_path.ospath
175
+ ]
176
+
177
+ @classmethod
178
+ def find_orphaned_temporaries_by_ref(cls, ref: DatasetRef, butler: LimitedButler) -> list[ResourcePath]:
179
+ """Search for temporary files that were not successfully ingested.
180
+
181
+ Parameters
182
+ ----------
183
+ ref : `..DatasetRef`
184
+ A dataset reference the temporaries correspond to.
185
+ butler : `lsst.daf.butler.LimitedButler`
186
+ Butler that can be used to obtain a predicted URI for a dataset.
187
+
188
+ Returns
189
+ -------
190
+ paths : `list` [ `lsst.resources.ResourcePath` ]
191
+ Files that look like temporaries that might have been created while
192
+ trying to write the target dataset.
193
+
194
+ Notes
195
+ -----
196
+ Orphaned files are only possible when a context manager is interrupted
197
+ by a hard error that prevents any cleanup code from running (e.g.
198
+ sudden loss of power).
199
+ """
200
+ final_path = butler.getURI(ref, predict=True).replace(fragment="")
201
+ return cls.find_orphaned_temporaries_by_path(final_path)
202
+
203
+ @staticmethod
204
+ def _get_temporary_suffix(path: ResourcePath) -> str:
205
+ ext = path.getExtension()
206
+ basename = path.basename().removesuffix(ext)
207
+ return f"{basename}.tmp{ext}"
@@ -100,3 +100,4 @@ VisitBackgroundModel: lsst.daf.butler.formatters.json.JsonFormatter
100
100
  VignettingCorrection: lsst.ts.observatory.control.utils.extras.vignetting_storage.VignettingCorrectionFormatter
101
101
  SSPAuxiliaryFile: lsst.pipe.tasks.sspAuxiliaryFile.SSPAuxiliaryFileFormatter
102
102
  VisitGeometry: lsst.daf.butler.formatters.json.JsonFormatter
103
+ ProvenanceQuantumGraph: lsst.pipe.base.quantum_graph.formatter.ProvenanceFormatter
@@ -443,3 +443,18 @@ storageClasses:
443
443
  pytype: lsst.pipe.tasks.sspAuxiliaryFile.SSPAuxiliaryFile
444
444
  VisitGeometry:
445
445
  pytype: lsst.obs.base.visit_geometry.VisitGeometry
446
+ ProvenanceQuantumGraph:
447
+ pytype: lsst.pipe.base.quantum_graph.ProvenanceQuantumGraph
448
+ parameters:
449
+ - import_mode # lsst.pipe.base.pipeline_graph.TaskImportMode
450
+ - quanta # iterable of uuid.UUID; quanta to read
451
+ - datasets # iterable of uuid.UUID; datasets to read
452
+ - read_init_quanta # bool, defaults to True; whether to read pre-exec-init info
453
+ derivedComponents:
454
+ packages: Packages # ignores node parameters
455
+
456
+ # UUID keys can be quantum or data IDs (whichever is passed in via
457
+ # parameters). Nested lists are attempts to run the quantum (last is
458
+ # most recent).
459
+ logs: StructuredDataDict # dict[uuid.UUID, list[ButlerLogRecords]]
460
+ metadata: StructuredDataDict # dict[uuid.UUID, list[TaskMetadata]]
@@ -49,7 +49,7 @@ if TYPE_CHECKING:
49
49
  # Pydantic requires the possible value types to be explicitly enumerated in
50
50
  # order for `uuid.UUID` in particular to work. `typing.Any` does not work
51
51
  # here.
52
- _Record: TypeAlias = dict[str, int | str | uuid.UUID | None]
52
+ _Record: TypeAlias = dict[str, int | str | None]
53
53
 
54
54
 
55
55
  class SerializedDatastoreRecordData(pydantic.BaseModel):
@@ -1068,9 +1068,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
1068
1068
  # Work out the name we want this ingested file to have
1069
1069
  # inside the datastore
1070
1070
  tgtLocation = self._calculate_ingested_datastore_name(srcUri, ref, formatter)
1071
- if not tgtLocation.uri.dirname().exists():
1072
- log.debug("Folder %s does not exist yet.", tgtLocation.uri.dirname())
1073
- tgtLocation.uri.dirname().mkdir()
1074
1071
 
1075
1072
  # if we are transferring from a local file to a remote location
1076
1073
  # it may be more efficient to get the size and checksum of the
@@ -1311,12 +1308,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
1311
1308
  f"and storage class type ({required_pytype})"
1312
1309
  )
1313
1310
 
1314
- uri = location.uri
1315
-
1316
- if not uri.dirname().exists():
1317
- log.debug("Folder %s does not exist yet so creating it.", uri.dirname())
1318
- uri.dirname().mkdir()
1319
-
1320
1311
  if self._transaction is None:
1321
1312
  raise RuntimeError("Attempting to write artifact without transaction enabled")
1322
1313
 
@@ -1332,6 +1323,7 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
1332
1323
 
1333
1324
  # Register a callback to try to delete the uploaded data if
1334
1325
  # something fails below
1326
+ uri = location.uri
1335
1327
  self._transaction.registerUndo("artifactWrite", _removeFileExists, uri)
1336
1328
 
1337
1329
  # Need to record the specified formatter but if this is a V1 formatter
@@ -2220,9 +2212,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
2220
2212
  else:
2221
2213
  # Name the zip file based on index contents.
2222
2214
  tgtLocation = self.locationFactory.fromPath(index.calculate_zip_file_path_in_store())
2223
- if not tgtLocation.uri.dirname().exists():
2224
- log.debug("Folder %s does not exist yet.", tgtLocation.uri.dirname())
2225
- tgtLocation.uri.dirname().mkdir()
2226
2215
 
2227
2216
  # Transfer the Zip file into the datastore.
2228
2217
  if not dry_run:
@@ -3177,6 +3166,20 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
3177
3166
 
3178
3167
  def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]:
3179
3168
  # Docstring inherited from the base class.
3169
+
3170
+ # This call to 'bridge.check' filters out "partially deleted" datasets.
3171
+ # Specifically, ones in the unusual edge state that:
3172
+ # 1. They have an entry in the registry dataset tables
3173
+ # 2. They were "trashed" from the datastore, so they are not
3174
+ # present in the "dataset_location" table.)
3175
+ # 3. But the trash has not been "emptied", so there are still entries
3176
+ # in the "opaque" datastore records table.
3177
+ #
3178
+ # As far as I can tell, this can only occur in the case of a concurrent
3179
+ # or aborted call to `Butler.pruneDatasets(unstore=True, purge=False)`.
3180
+ # Datasets (with or without files existing on disk) can persist in
3181
+ # this zombie state indefinitely, until someone manually empties
3182
+ # the trash.
3180
3183
  exported_refs = list(self._bridge.check(refs))
3181
3184
  ids = {ref.id for ref in exported_refs}
3182
3185
  records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {id: {} for id in ids}
@@ -755,6 +755,11 @@ class DataCoordinate:
755
755
  to_json = to_json_pydantic
756
756
  from_json: ClassVar[Callable[..., Self]] = cast(Callable[..., Self], classmethod(from_json_pydantic))
757
757
 
758
+ @property
759
+ def dataId(self) -> Self:
760
+ """Return this `DataCoordinate` instance, unmodified."""
761
+ return self
762
+
758
763
 
759
764
  DataId = DataCoordinate | Mapping[str, Any]
760
765
  """A type-annotation alias for signatures that accept both informal data ID