lsst-daf-butler 30.0.0rc2__py3-none-any.whl → 30.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/daf/butler/_butler.py +8 -5
- lsst/daf/butler/_butler_metrics.py +49 -2
- lsst/daf/butler/_formatter.py +2 -7
- lsst/daf/butler/_labeled_butler_factory.py +28 -8
- lsst/daf/butler/_rubin/temporary_for_ingest.py +207 -0
- lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
- lsst/daf/butler/configs/storageClasses.yaml +15 -0
- lsst/daf/butler/datastore/record_data.py +1 -1
- lsst/daf/butler/datastores/fileDatastore.py +15 -12
- lsst/daf/butler/dimensions/_coordinate.py +5 -0
- lsst/daf/butler/direct_butler/_direct_butler.py +45 -28
- lsst/daf/butler/logging.py +9 -3
- lsst/daf/butler/registry/bridge/monolithic.py +17 -13
- lsst/daf/butler/registry/datasets/byDimensions/_manager.py +49 -45
- lsst/daf/butler/registry/expand_data_ids.py +93 -0
- lsst/daf/butler/registry/interfaces/_database.py +6 -1
- lsst/daf/butler/registry/sql_registry.py +2 -24
- lsst/daf/butler/remote_butler/_remote_butler.py +5 -1
- lsst/daf/butler/tests/hybrid_butler.py +4 -1
- lsst/daf/butler/tests/registry_data/lsstcam-subset.yaml +191 -0
- lsst/daf/butler/tests/testFormatters.py +2 -2
- lsst/daf/butler/transfers/_context.py +7 -6
- lsst/daf/butler/version.py +1 -1
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/METADATA +1 -1
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/RECORD +33 -30
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/WHEEL +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/entry_points.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/LICENSE +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/top_level.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/zip-safe +0 -0
lsst/daf/butler/_butler.py
CHANGED
|
@@ -1566,7 +1566,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1566
1566
|
|
|
1567
1567
|
@abstractmethod
|
|
1568
1568
|
def transfer_dimension_records_from(
|
|
1569
|
-
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
|
|
1569
|
+
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate]
|
|
1570
1570
|
) -> None:
|
|
1571
1571
|
"""Transfer dimension records to this Butler from another Butler.
|
|
1572
1572
|
|
|
@@ -1578,10 +1578,9 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1578
1578
|
`Butler` whose registry will be used to expand data IDs. If the
|
|
1579
1579
|
source refs contain coordinates that are used to populate other
|
|
1580
1580
|
records then this will also need to be a full `Butler`.
|
|
1581
|
-
source_refs : iterable of `DatasetRef`
|
|
1582
|
-
Datasets defined in the source butler whose dimension
|
|
1583
|
-
should be transferred to this butler.
|
|
1584
|
-
transfer is faster if the dataset refs are expanded.
|
|
1581
|
+
source_refs : iterable of `DatasetRef` or `DataCoordinate`
|
|
1582
|
+
Datasets or data IDs defined in the source butler whose dimension
|
|
1583
|
+
records should be transferred to this butler.
|
|
1585
1584
|
"""
|
|
1586
1585
|
raise NotImplementedError()
|
|
1587
1586
|
|
|
@@ -2227,3 +2226,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
2227
2226
|
@abstractmethod
|
|
2228
2227
|
def close(self) -> None:
|
|
2229
2228
|
raise NotImplementedError()
|
|
2229
|
+
|
|
2230
|
+
@abstractmethod
|
|
2231
|
+
def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
|
|
2232
|
+
raise NotImplementedError()
|
|
@@ -29,12 +29,15 @@ from __future__ import annotations
|
|
|
29
29
|
|
|
30
30
|
from collections.abc import Callable, Iterator
|
|
31
31
|
from contextlib import contextmanager
|
|
32
|
+
from typing import Concatenate, ParamSpec
|
|
32
33
|
|
|
33
34
|
from pydantic import BaseModel
|
|
34
35
|
|
|
35
36
|
from lsst.utils.logging import LsstLoggers
|
|
36
37
|
from lsst.utils.timer import time_this
|
|
37
38
|
|
|
39
|
+
P = ParamSpec("P")
|
|
40
|
+
|
|
38
41
|
|
|
39
42
|
class ButlerMetrics(BaseModel):
|
|
40
43
|
"""Metrics collected during Butler operations."""
|
|
@@ -45,18 +48,26 @@ class ButlerMetrics(BaseModel):
|
|
|
45
48
|
time_in_get: float = 0.0
|
|
46
49
|
"""Wall-clock time, in seconds, spent in get()."""
|
|
47
50
|
|
|
51
|
+
time_in_ingest: float = 0.0
|
|
52
|
+
"""Wall-clock time, in seconds, spent in ingest()."""
|
|
53
|
+
|
|
48
54
|
n_get: int = 0
|
|
49
55
|
"""Number of datasets retrieved with get()."""
|
|
50
56
|
|
|
51
57
|
n_put: int = 0
|
|
52
58
|
"""Number of datasets stored with put()."""
|
|
53
59
|
|
|
60
|
+
n_ingest: int = 0
|
|
61
|
+
"""Number of datasets ingested."""
|
|
62
|
+
|
|
54
63
|
def reset(self) -> None:
|
|
55
64
|
"""Reset all metrics."""
|
|
56
65
|
self.time_in_put = 0.0
|
|
57
66
|
self.time_in_get = 0.0
|
|
67
|
+
self.time_in_ingest = 0.0
|
|
58
68
|
self.n_get = 0
|
|
59
69
|
self.n_put = 0
|
|
70
|
+
self.n_ingest = 0
|
|
60
71
|
|
|
61
72
|
def increment_get(self, duration: float) -> None:
|
|
62
73
|
"""Increment time for get().
|
|
@@ -80,13 +91,31 @@ class ButlerMetrics(BaseModel):
|
|
|
80
91
|
self.time_in_put += duration
|
|
81
92
|
self.n_put += 1
|
|
82
93
|
|
|
94
|
+
def increment_ingest(self, duration: float, n_datasets: int) -> None:
|
|
95
|
+
"""Increment time and datasets for ingest().
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
duration : `float`
|
|
100
|
+
Duration to add to the ingest() statistics.
|
|
101
|
+
n_datasets : `int`
|
|
102
|
+
Number of datasets to be ingested for this call.
|
|
103
|
+
"""
|
|
104
|
+
self.time_in_ingest += duration
|
|
105
|
+
self.n_ingest += n_datasets
|
|
106
|
+
|
|
83
107
|
@contextmanager
|
|
84
108
|
def _timer(
|
|
85
|
-
self,
|
|
109
|
+
self,
|
|
110
|
+
handler: Callable[Concatenate[float, P], None],
|
|
111
|
+
log: LsstLoggers | None = None,
|
|
112
|
+
msg: str | None = None,
|
|
113
|
+
*args: P.args,
|
|
114
|
+
**kwargs: P.kwargs,
|
|
86
115
|
) -> Iterator[None]:
|
|
87
116
|
with time_this(log=log, msg=msg) as timer:
|
|
88
117
|
yield
|
|
89
|
-
handler(timer.duration)
|
|
118
|
+
handler(timer.duration, *args, **kwargs)
|
|
90
119
|
|
|
91
120
|
@contextmanager
|
|
92
121
|
def instrument_get(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
|
|
@@ -115,3 +144,21 @@ class ButlerMetrics(BaseModel):
|
|
|
115
144
|
"""
|
|
116
145
|
with self._timer(self.increment_put, log=log, msg=msg):
|
|
117
146
|
yield
|
|
147
|
+
|
|
148
|
+
@contextmanager
|
|
149
|
+
def instrument_ingest(
|
|
150
|
+
self, n_datasets: int, log: LsstLoggers | None = None, msg: str | None = None
|
|
151
|
+
) -> Iterator[None]:
|
|
152
|
+
"""Run code and increment ingest statistics.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
n_datasets : `int`
|
|
157
|
+
Number of datasets being ingested.
|
|
158
|
+
log : `logging.Logger` or `None`
|
|
159
|
+
Logger to use for any timing information.
|
|
160
|
+
msg : `str` or `None`
|
|
161
|
+
Any message to be included in log output.
|
|
162
|
+
"""
|
|
163
|
+
with self._timer(self.increment_ingest, n_datasets=n_datasets, log=log, msg=msg):
|
|
164
|
+
yield
|
lsst/daf/butler/_formatter.py
CHANGED
|
@@ -54,6 +54,7 @@ from ._config import Config
|
|
|
54
54
|
from ._config_support import LookupKey, processLookupConfigs
|
|
55
55
|
from ._file_descriptor import FileDescriptor
|
|
56
56
|
from ._location import Location
|
|
57
|
+
from ._rubin.temporary_for_ingest import TemporaryForIngest
|
|
57
58
|
from .dimensions import DataCoordinate, DimensionUniverse
|
|
58
59
|
from .mapping_factory import MappingFactory
|
|
59
60
|
|
|
@@ -1031,13 +1032,7 @@ class FormatterV2:
|
|
|
1031
1032
|
"""
|
|
1032
1033
|
cache_manager = self._ensure_cache(cache_manager)
|
|
1033
1034
|
|
|
1034
|
-
|
|
1035
|
-
# using a local file system -- that gives us atomic writes.
|
|
1036
|
-
# If a process is killed as the file is being written we do not
|
|
1037
|
-
# want it to remain in the correct place but in corrupt state.
|
|
1038
|
-
# For local files write to the output directory not temporary dir.
|
|
1039
|
-
prefix = uri.dirname() if uri.isLocal else None
|
|
1040
|
-
with ResourcePath.temporary_uri(suffix=uri.getExtension(), prefix=prefix) as temporary_uri:
|
|
1035
|
+
with TemporaryForIngest.make_path(uri) as temporary_uri:
|
|
1041
1036
|
# Need to configure the formatter to write to a different
|
|
1042
1037
|
# location and that needs us to overwrite internals
|
|
1043
1038
|
log.debug("Writing dataset to temporary location at %s", temporary_uri)
|
|
@@ -30,7 +30,9 @@ from __future__ import annotations
|
|
|
30
30
|
__all__ = ("LabeledButlerFactory", "LabeledButlerFactoryProtocol")
|
|
31
31
|
|
|
32
32
|
from collections.abc import Mapping
|
|
33
|
-
from
|
|
33
|
+
from contextlib import AbstractContextManager
|
|
34
|
+
from logging import getLogger
|
|
35
|
+
from typing import Any, Literal, Protocol, Self
|
|
34
36
|
|
|
35
37
|
from lsst.resources import ResourcePathExpression
|
|
36
38
|
|
|
@@ -40,6 +42,8 @@ from ._butler_repo_index import ButlerRepoIndex
|
|
|
40
42
|
from ._utilities.named_locks import NamedLocks
|
|
41
43
|
from ._utilities.thread_safe_cache import ThreadSafeCache
|
|
42
44
|
|
|
45
|
+
_LOG = getLogger(__name__)
|
|
46
|
+
|
|
43
47
|
|
|
44
48
|
class LabeledButlerFactoryProtocol(Protocol):
|
|
45
49
|
"""Callable to retrieve a butler from a label."""
|
|
@@ -47,7 +51,7 @@ class LabeledButlerFactoryProtocol(Protocol):
|
|
|
47
51
|
def __call__(self, label: str) -> Butler: ...
|
|
48
52
|
|
|
49
53
|
|
|
50
|
-
class LabeledButlerFactory:
|
|
54
|
+
class LabeledButlerFactory(AbstractContextManager):
|
|
51
55
|
"""Factory for efficiently instantiating Butler instances from the
|
|
52
56
|
repository index file. This is intended for use from long-lived services
|
|
53
57
|
that want to instantiate a separate Butler instance for each end user
|
|
@@ -60,6 +64,9 @@ class LabeledButlerFactory:
|
|
|
60
64
|
files. If not provided, defaults to the global repository index
|
|
61
65
|
configured by the ``DAF_BUTLER_REPOSITORY_INDEX`` environment variable
|
|
62
66
|
-- see `ButlerRepoIndex`.
|
|
67
|
+
writeable : `bool`, optional
|
|
68
|
+
If `True`, Butler instances created by this factory will be writeable.
|
|
69
|
+
If `False` (the default), instances will be read-only.
|
|
63
70
|
|
|
64
71
|
Notes
|
|
65
72
|
-----
|
|
@@ -76,11 +83,12 @@ class LabeledButlerFactory:
|
|
|
76
83
|
safely be used by separate threads.
|
|
77
84
|
"""
|
|
78
85
|
|
|
79
|
-
def __init__(self, repositories: Mapping[str, str] | None = None) -> None:
|
|
86
|
+
def __init__(self, repositories: Mapping[str, str] | None = None, writeable: bool = False) -> None:
|
|
80
87
|
if repositories is None:
|
|
81
88
|
self._repositories = None
|
|
82
89
|
else:
|
|
83
90
|
self._repositories = dict(repositories)
|
|
91
|
+
self._writeable = writeable
|
|
84
92
|
|
|
85
93
|
self._factories = ThreadSafeCache[str, _ButlerFactory]()
|
|
86
94
|
self._initialization_locks = NamedLocks()
|
|
@@ -88,6 +96,16 @@ class LabeledButlerFactory:
|
|
|
88
96
|
# This may be overridden by unit tests.
|
|
89
97
|
self._preload_unsafe_direct_butler_caches = True
|
|
90
98
|
|
|
99
|
+
def __enter__(self) -> Self:
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
|
|
103
|
+
try:
|
|
104
|
+
self.close()
|
|
105
|
+
except Exception:
|
|
106
|
+
_LOG.exception("An exception occurred during LabeledButlerFactory.close()")
|
|
107
|
+
return False
|
|
108
|
+
|
|
91
109
|
def bind(self, access_token: str | None) -> LabeledButlerFactoryProtocol:
|
|
92
110
|
"""Create a callable factory function for generating Butler instances
|
|
93
111
|
with out needing to specify access tokans again.
|
|
@@ -109,7 +127,7 @@ class LabeledButlerFactory:
|
|
|
109
127
|
|
|
110
128
|
return create
|
|
111
129
|
|
|
112
|
-
def create_butler(self,
|
|
130
|
+
def create_butler(self, label: str, *, access_token: str | None = None) -> Butler:
|
|
113
131
|
"""Create a Butler instance.
|
|
114
132
|
|
|
115
133
|
Parameters
|
|
@@ -118,7 +136,7 @@ class LabeledButlerFactory:
|
|
|
118
136
|
Label of the repository to instantiate, from the ``repositories``
|
|
119
137
|
parameter to the `LabeledButlerFactory` constructor or the global
|
|
120
138
|
repository index file.
|
|
121
|
-
access_token : `str` | `None
|
|
139
|
+
access_token : `str` | `None`, optional
|
|
122
140
|
Gafaelfawr access token used to authenticate to a Butler server.
|
|
123
141
|
This is required for any repositories configured to use
|
|
124
142
|
`RemoteButler`. If you only use `DirectButler`, this may be
|
|
@@ -167,7 +185,9 @@ class LabeledButlerFactory:
|
|
|
167
185
|
|
|
168
186
|
match butler_type:
|
|
169
187
|
case ButlerType.DIRECT:
|
|
170
|
-
return _DirectButlerFactory(
|
|
188
|
+
return _DirectButlerFactory(
|
|
189
|
+
config, self._preload_unsafe_direct_butler_caches, self._writeable
|
|
190
|
+
)
|
|
171
191
|
case ButlerType.REMOTE:
|
|
172
192
|
return _RemoteButlerFactory(config)
|
|
173
193
|
case _:
|
|
@@ -189,12 +209,12 @@ class _ButlerFactory(Protocol):
|
|
|
189
209
|
|
|
190
210
|
|
|
191
211
|
class _DirectButlerFactory(_ButlerFactory):
|
|
192
|
-
def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool) -> None:
|
|
212
|
+
def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool, writeable: bool) -> None:
|
|
193
213
|
import lsst.daf.butler.direct_butler
|
|
194
214
|
|
|
195
215
|
# Create a 'template' Butler that will be cloned when callers request
|
|
196
216
|
# an instance.
|
|
197
|
-
self._butler = Butler.from_config(config)
|
|
217
|
+
self._butler = Butler.from_config(config, writeable=writeable)
|
|
198
218
|
assert isinstance(self._butler, lsst.daf.butler.direct_butler.DirectButler)
|
|
199
219
|
|
|
200
220
|
# Load caches so that data is available in cloned instances without
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# This file is part of daf_butler.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("TemporaryForIngest",)
|
|
31
|
+
|
|
32
|
+
import dataclasses
|
|
33
|
+
import glob
|
|
34
|
+
from contextlib import contextmanager
|
|
35
|
+
from typing import TYPE_CHECKING, Self, cast
|
|
36
|
+
|
|
37
|
+
from lsst.resources import ResourcePath
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from collections.abc import Iterator
|
|
41
|
+
from types import TracebackType
|
|
42
|
+
|
|
43
|
+
from .._butler import Butler
|
|
44
|
+
from .._dataset_ref import DatasetRef
|
|
45
|
+
from .._file_dataset import FileDataset
|
|
46
|
+
from .._limited_butler import LimitedButler
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclasses.dataclass
|
|
50
|
+
class TemporaryForIngest:
|
|
51
|
+
"""A context manager for generating temporary paths that will be ingested
|
|
52
|
+
as butler datasets.
|
|
53
|
+
|
|
54
|
+
Notes
|
|
55
|
+
-----
|
|
56
|
+
Neither this class nor its `make_path` method run ingest automatically when
|
|
57
|
+
their context manager is exited; the `ingest` method must always be called
|
|
58
|
+
explicitly.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
butler: Butler
|
|
62
|
+
"""Full butler to obtain a predicted path from and ingest into."""
|
|
63
|
+
|
|
64
|
+
ref: DatasetRef
|
|
65
|
+
"""Description of the dataset to ingest."""
|
|
66
|
+
|
|
67
|
+
dataset: FileDataset = dataclasses.field(init=False)
|
|
68
|
+
"""The dataset that will be passed to `Butler.ingest`."""
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def path(self) -> ResourcePath:
|
|
72
|
+
"""The temporary path.
|
|
73
|
+
|
|
74
|
+
Guaranteed to be a local POSIX path.
|
|
75
|
+
"""
|
|
76
|
+
return cast(ResourcePath, self.dataset.path)
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def ospath(self) -> str:
|
|
80
|
+
"""The temporary path as a complete filename."""
|
|
81
|
+
return self.path.ospath
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
@contextmanager
|
|
85
|
+
def make_path(cls, final_path: ResourcePath) -> Iterator[ResourcePath]:
|
|
86
|
+
"""Return a temporary path context manager given the predicted final
|
|
87
|
+
path.
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
final_path : `lsst.resources.ResourcePath`
|
|
92
|
+
Predicted final path.
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
context : `contextlib.AbstractContextManager`
|
|
97
|
+
A context manager that yields the temporary
|
|
98
|
+
`~lsst.resources.ResourcePath` when entered and deletes that file
|
|
99
|
+
when exited.
|
|
100
|
+
"""
|
|
101
|
+
# Always write to a temporary even if using a local file system -- that
|
|
102
|
+
# gives us atomic writes. If a process is killed as the file is being
|
|
103
|
+
# written we do not want it to remain in the correct place but in
|
|
104
|
+
# corrupt state. For local files write to the output directory not
|
|
105
|
+
# temporary dir.
|
|
106
|
+
prefix = final_path.dirname() if final_path.isLocal else None
|
|
107
|
+
if prefix is not None:
|
|
108
|
+
prefix.mkdir()
|
|
109
|
+
with ResourcePath.temporary_uri(
|
|
110
|
+
suffix=cls._get_temporary_suffix(final_path), prefix=prefix
|
|
111
|
+
) as temporary_path:
|
|
112
|
+
yield temporary_path
|
|
113
|
+
|
|
114
|
+
def ingest(self, record_validation_info: bool = True) -> None:
|
|
115
|
+
"""Ingest the file into the butler.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
record_validation_info : `bool`, optional
|
|
120
|
+
Whether to- record the file size and checksum upon ingest.
|
|
121
|
+
"""
|
|
122
|
+
self.butler.ingest(self.dataset, transfer="move", record_validation_info=record_validation_info)
|
|
123
|
+
|
|
124
|
+
def __enter__(self) -> Self:
|
|
125
|
+
from .._file_dataset import FileDataset
|
|
126
|
+
|
|
127
|
+
final_path = self.butler.getURI(self.ref, predict=True).replace(fragment="")
|
|
128
|
+
prefix = final_path.dirname() if final_path.isLocal else None
|
|
129
|
+
if prefix is not None:
|
|
130
|
+
prefix.mkdir()
|
|
131
|
+
self._temporary_path_context = self.make_path(final_path)
|
|
132
|
+
temporary_path = self._temporary_path_context.__enter__()
|
|
133
|
+
self.dataset = FileDataset(temporary_path, [self.ref], formatter=None)
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
def __exit__(
|
|
137
|
+
self,
|
|
138
|
+
exc_type: type[BaseException] | None,
|
|
139
|
+
exc_value: BaseException | None,
|
|
140
|
+
traceback: TracebackType | None,
|
|
141
|
+
) -> bool | None:
|
|
142
|
+
return self._temporary_path_context.__exit__(exc_type, exc_value, traceback)
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def find_orphaned_temporaries_by_path(cls, final_path: ResourcePath) -> list[ResourcePath]:
|
|
146
|
+
"""Search for temporary files that were not successfully ingested.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
final_path : `lsst.resources.ResourcePath`
|
|
151
|
+
Final path a successfully-ingested file would have.
|
|
152
|
+
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
paths : `list` [ `lsst.resources.ResourcePath` ]
|
|
156
|
+
Files that look like temporaries that might have been created while
|
|
157
|
+
trying to write the target dataset.
|
|
158
|
+
|
|
159
|
+
Notes
|
|
160
|
+
-----
|
|
161
|
+
Orphaned files are only possible when a context manager is interrupted
|
|
162
|
+
by a hard error that prevents any cleanup code from running (e.g.
|
|
163
|
+
sudden loss of power).
|
|
164
|
+
"""
|
|
165
|
+
if not final_path.isLocal:
|
|
166
|
+
# We return true tempfile for non-local predicted paths, so orphans
|
|
167
|
+
# are not our problem (the OS etc. will take care of them).
|
|
168
|
+
return []
|
|
169
|
+
return [
|
|
170
|
+
ResourcePath(filename)
|
|
171
|
+
for filename in glob.glob(
|
|
172
|
+
f"{glob.escape(final_path.dirname().ospath)}*{glob.escape(cls._get_temporary_suffix(final_path))}"
|
|
173
|
+
)
|
|
174
|
+
if filename != final_path.ospath
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def find_orphaned_temporaries_by_ref(cls, ref: DatasetRef, butler: LimitedButler) -> list[ResourcePath]:
|
|
179
|
+
"""Search for temporary files that were not successfully ingested.
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
----------
|
|
183
|
+
ref : `..DatasetRef`
|
|
184
|
+
A dataset reference the temporaries correspond to.
|
|
185
|
+
butler : `lsst.daf.butler.LimitedButler`
|
|
186
|
+
Butler that can be used to obtain a predicted URI for a dataset.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
paths : `list` [ `lsst.resources.ResourcePath` ]
|
|
191
|
+
Files that look like temporaries that might have been created while
|
|
192
|
+
trying to write the target dataset.
|
|
193
|
+
|
|
194
|
+
Notes
|
|
195
|
+
-----
|
|
196
|
+
Orphaned files are only possible when a context manager is interrupted
|
|
197
|
+
by a hard error that prevents any cleanup code from running (e.g.
|
|
198
|
+
sudden loss of power).
|
|
199
|
+
"""
|
|
200
|
+
final_path = butler.getURI(ref, predict=True).replace(fragment="")
|
|
201
|
+
return cls.find_orphaned_temporaries_by_path(final_path)
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def _get_temporary_suffix(path: ResourcePath) -> str:
|
|
205
|
+
ext = path.getExtension()
|
|
206
|
+
basename = path.basename().removesuffix(ext)
|
|
207
|
+
return f"{basename}.tmp{ext}"
|
|
@@ -100,3 +100,4 @@ VisitBackgroundModel: lsst.daf.butler.formatters.json.JsonFormatter
|
|
|
100
100
|
VignettingCorrection: lsst.ts.observatory.control.utils.extras.vignetting_storage.VignettingCorrectionFormatter
|
|
101
101
|
SSPAuxiliaryFile: lsst.pipe.tasks.sspAuxiliaryFile.SSPAuxiliaryFileFormatter
|
|
102
102
|
VisitGeometry: lsst.daf.butler.formatters.json.JsonFormatter
|
|
103
|
+
ProvenanceQuantumGraph: lsst.pipe.base.quantum_graph.formatter.ProvenanceFormatter
|
|
@@ -443,3 +443,18 @@ storageClasses:
|
|
|
443
443
|
pytype: lsst.pipe.tasks.sspAuxiliaryFile.SSPAuxiliaryFile
|
|
444
444
|
VisitGeometry:
|
|
445
445
|
pytype: lsst.obs.base.visit_geometry.VisitGeometry
|
|
446
|
+
ProvenanceQuantumGraph:
|
|
447
|
+
pytype: lsst.pipe.base.quantum_graph.ProvenanceQuantumGraph
|
|
448
|
+
parameters:
|
|
449
|
+
- import_mode # lsst.pipe.base.pipeline_graph.TaskImportMode
|
|
450
|
+
- quanta # iterable of uuid.UUID; quanta to read
|
|
451
|
+
- datasets # iterable of uuid.UUID; datasets to read
|
|
452
|
+
- read_init_quanta # bool, defaults to True; whether to read pre-exec-init info
|
|
453
|
+
derivedComponents:
|
|
454
|
+
packages: Packages # ignores node parameters
|
|
455
|
+
|
|
456
|
+
# UUID keys can be quantum or data IDs (whichever is passed in via
|
|
457
|
+
# parameters). Nested lists are attempts to run the quantum (last is
|
|
458
|
+
# most recent).
|
|
459
|
+
logs: StructuredDataDict # dict[uuid.UUID, list[ButlerLogRecords]]
|
|
460
|
+
metadata: StructuredDataDict # dict[uuid.UUID, list[TaskMetadata]]
|
|
@@ -49,7 +49,7 @@ if TYPE_CHECKING:
|
|
|
49
49
|
# Pydantic requires the possible value types to be explicitly enumerated in
|
|
50
50
|
# order for `uuid.UUID` in particular to work. `typing.Any` does not work
|
|
51
51
|
# here.
|
|
52
|
-
_Record: TypeAlias = dict[str, int | str |
|
|
52
|
+
_Record: TypeAlias = dict[str, int | str | None]
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
class SerializedDatastoreRecordData(pydantic.BaseModel):
|
|
@@ -1068,9 +1068,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
|
|
|
1068
1068
|
# Work out the name we want this ingested file to have
|
|
1069
1069
|
# inside the datastore
|
|
1070
1070
|
tgtLocation = self._calculate_ingested_datastore_name(srcUri, ref, formatter)
|
|
1071
|
-
if not tgtLocation.uri.dirname().exists():
|
|
1072
|
-
log.debug("Folder %s does not exist yet.", tgtLocation.uri.dirname())
|
|
1073
|
-
tgtLocation.uri.dirname().mkdir()
|
|
1074
1071
|
|
|
1075
1072
|
# if we are transferring from a local file to a remote location
|
|
1076
1073
|
# it may be more efficient to get the size and checksum of the
|
|
@@ -1311,12 +1308,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
|
|
|
1311
1308
|
f"and storage class type ({required_pytype})"
|
|
1312
1309
|
)
|
|
1313
1310
|
|
|
1314
|
-
uri = location.uri
|
|
1315
|
-
|
|
1316
|
-
if not uri.dirname().exists():
|
|
1317
|
-
log.debug("Folder %s does not exist yet so creating it.", uri.dirname())
|
|
1318
|
-
uri.dirname().mkdir()
|
|
1319
|
-
|
|
1320
1311
|
if self._transaction is None:
|
|
1321
1312
|
raise RuntimeError("Attempting to write artifact without transaction enabled")
|
|
1322
1313
|
|
|
@@ -1332,6 +1323,7 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
|
|
|
1332
1323
|
|
|
1333
1324
|
# Register a callback to try to delete the uploaded data if
|
|
1334
1325
|
# something fails below
|
|
1326
|
+
uri = location.uri
|
|
1335
1327
|
self._transaction.registerUndo("artifactWrite", _removeFileExists, uri)
|
|
1336
1328
|
|
|
1337
1329
|
# Need to record the specified formatter but if this is a V1 formatter
|
|
@@ -2220,9 +2212,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
|
|
|
2220
2212
|
else:
|
|
2221
2213
|
# Name the zip file based on index contents.
|
|
2222
2214
|
tgtLocation = self.locationFactory.fromPath(index.calculate_zip_file_path_in_store())
|
|
2223
|
-
if not tgtLocation.uri.dirname().exists():
|
|
2224
|
-
log.debug("Folder %s does not exist yet.", tgtLocation.uri.dirname())
|
|
2225
|
-
tgtLocation.uri.dirname().mkdir()
|
|
2226
2215
|
|
|
2227
2216
|
# Transfer the Zip file into the datastore.
|
|
2228
2217
|
if not dry_run:
|
|
@@ -3177,6 +3166,20 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
|
|
|
3177
3166
|
|
|
3178
3167
|
def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]:
|
|
3179
3168
|
# Docstring inherited from the base class.
|
|
3169
|
+
|
|
3170
|
+
# This call to 'bridge.check' filters out "partially deleted" datasets.
|
|
3171
|
+
# Specifically, ones in the unusual edge state that:
|
|
3172
|
+
# 1. They have an entry in the registry dataset tables
|
|
3173
|
+
# 2. They were "trashed" from the datastore, so they are not
|
|
3174
|
+
# present in the "dataset_location" table.)
|
|
3175
|
+
# 3. But the trash has not been "emptied", so there are still entries
|
|
3176
|
+
# in the "opaque" datastore records table.
|
|
3177
|
+
#
|
|
3178
|
+
# As far as I can tell, this can only occur in the case of a concurrent
|
|
3179
|
+
# or aborted call to `Butler.pruneDatasets(unstore=True, purge=False)`.
|
|
3180
|
+
# Datasets (with or without files existing on disk) can persist in
|
|
3181
|
+
# this zombie state indefinitely, until someone manually empties
|
|
3182
|
+
# the trash.
|
|
3180
3183
|
exported_refs = list(self._bridge.check(refs))
|
|
3181
3184
|
ids = {ref.id for ref in exported_refs}
|
|
3182
3185
|
records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {id: {} for id in ids}
|
|
@@ -755,6 +755,11 @@ class DataCoordinate:
|
|
|
755
755
|
to_json = to_json_pydantic
|
|
756
756
|
from_json: ClassVar[Callable[..., Self]] = cast(Callable[..., Self], classmethod(from_json_pydantic))
|
|
757
757
|
|
|
758
|
+
@property
|
|
759
|
+
def dataId(self) -> Self:
|
|
760
|
+
"""Return this `DataCoordinate` instance, unmodified."""
|
|
761
|
+
return self
|
|
762
|
+
|
|
758
763
|
|
|
759
764
|
DataId = DataCoordinate | Mapping[str, Any]
|
|
760
765
|
"""A type-annotation alias for signatures that accept both informal data ID
|