lsst-daf-butler 29.2025.4500__py3-none-any.whl → 29.2025.4800__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/daf/butler/__init__.py +0 -3
- lsst/daf/butler/_butler.py +52 -5
- lsst/daf/butler/_dataset_provenance.py +44 -9
- lsst/daf/butler/_labeled_butler_factory.py +47 -30
- lsst/daf/butler/_quantum_backed.py +12 -2
- lsst/daf/butler/_utilities/thread_safe_cache.py +13 -0
- lsst/daf/butler/cli/cmd/commands.py +6 -3
- lsst/daf/butler/configs/datastores/formatters.yaml +3 -2
- lsst/daf/butler/configs/datastores/writeRecipes.yaml +6 -6
- lsst/daf/butler/configs/storageClasses.yaml +2 -0
- lsst/daf/butler/delegates/arrowtable.py +3 -1
- lsst/daf/butler/dimensions/_schema.py +2 -24
- lsst/daf/butler/direct_butler/_direct_butler.py +54 -36
- lsst/daf/butler/formatters/logs.py +2 -2
- lsst/daf/butler/logging.py +289 -109
- lsst/daf/butler/queries/_expression_strings.py +3 -3
- lsst/daf/butler/{registry/queries → queries}/expressions/__init__.py +0 -2
- lsst/daf/butler/queries/expressions/categorize.py +59 -0
- lsst/daf/butler/registry/_registry.py +1 -2
- lsst/daf/butler/registry/_registry_base.py +1 -2
- lsst/daf/butler/registry/collections/nameKey.py +20 -3
- lsst/daf/butler/registry/collections/synthIntKey.py +20 -6
- lsst/daf/butler/registry/datasets/byDimensions/_dataset_type_cache.py +12 -52
- lsst/daf/butler/registry/datasets/byDimensions/_manager.py +97 -87
- lsst/daf/butler/registry/datasets/byDimensions/tables.py +17 -12
- lsst/daf/butler/registry/interfaces/_collections.py +45 -8
- lsst/daf/butler/registry/interfaces/_datasets.py +10 -8
- lsst/daf/butler/registry/interfaces/_obscore.py +0 -5
- lsst/daf/butler/registry/managers.py +0 -22
- lsst/daf/butler/registry/obscore/_manager.py +0 -9
- lsst/daf/butler/registry/queries/__init__.py +0 -5
- lsst/daf/butler/registry/queries/_query_datasets.py +2 -2
- lsst/daf/butler/registry/sql_registry.py +35 -10
- lsst/daf/butler/registry/tests/_database.py +2 -2
- lsst/daf/butler/registry/tests/_registry.py +110 -3
- lsst/daf/butler/remote_butler/_remote_butler.py +17 -1
- lsst/daf/butler/remote_butler/server/handlers/_external.py +11 -0
- lsst/daf/butler/remote_butler/server_models.py +8 -0
- lsst/daf/butler/script/_associate.py +13 -15
- lsst/daf/butler/script/_pruneDatasets.py +32 -31
- lsst/daf/butler/script/butlerImport.py +11 -12
- lsst/daf/butler/script/certifyCalibrations.py +19 -19
- lsst/daf/butler/script/collectionChain.py +27 -28
- lsst/daf/butler/script/configValidate.py +11 -9
- lsst/daf/butler/script/exportCalibs.py +60 -61
- lsst/daf/butler/script/ingest_files.py +13 -14
- lsst/daf/butler/script/ingest_zip.py +2 -3
- lsst/daf/butler/script/queryCollections.py +141 -138
- lsst/daf/butler/script/queryDataIds.py +80 -77
- lsst/daf/butler/script/queryDatasetTypes.py +24 -21
- lsst/daf/butler/script/queryDatasets.py +4 -14
- lsst/daf/butler/script/queryDimensionRecords.py +61 -60
- lsst/daf/butler/script/register_dataset_type.py +15 -14
- lsst/daf/butler/script/removeCollections.py +39 -34
- lsst/daf/butler/script/removeDatasetType.py +2 -2
- lsst/daf/butler/script/removeRuns.py +3 -5
- lsst/daf/butler/script/retrieveArtifacts.py +30 -27
- lsst/daf/butler/script/transferDatasets.py +29 -28
- lsst/daf/butler/tests/hybrid_butler.py +8 -1
- lsst/daf/butler/tests/server.py +4 -5
- lsst/daf/butler/version.py +1 -1
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/METADATA +1 -2
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/RECORD +80 -93
- lsst/daf/butler/_column_categorization.py +0 -83
- lsst/daf/butler/_column_tags.py +0 -210
- lsst/daf/butler/_column_type_info.py +0 -180
- lsst/daf/butler/registry/queries/_query_backend.py +0 -529
- lsst/daf/butler/registry/queries/_query_context.py +0 -474
- lsst/daf/butler/registry/queries/_readers.py +0 -348
- lsst/daf/butler/registry/queries/_sql_query_backend.py +0 -184
- lsst/daf/butler/registry/queries/_sql_query_context.py +0 -555
- lsst/daf/butler/registry/queries/butler_sql_engine.py +0 -226
- lsst/daf/butler/registry/queries/expressions/_predicate.py +0 -538
- lsst/daf/butler/registry/queries/expressions/categorize.py +0 -339
- lsst/daf/butler/registry/queries/expressions/check.py +0 -540
- lsst/daf/butler/registry/queries/expressions/normalForm.py +0 -1186
- lsst/daf/butler/registry/queries/find_first_dataset.py +0 -102
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/__init__.py +0 -0
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/exprTree.py +0 -0
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/parser.py +0 -0
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/parserLex.py +0 -0
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/parserYacc.py +0 -0
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/ply/__init__.py +0 -0
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/ply/lex.py +0 -0
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/ply/yacc.py +0 -0
- /lsst/daf/butler/{registry/queries → queries}/expressions/parser/treeVisitor.py +0 -0
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/WHEEL +0 -0
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/entry_points.txt +0 -0
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/licenses/LICENSE +0 -0
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/top_level.txt +0 -0
- {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/zip-safe +0 -0
lsst/daf/butler/__init__.py
CHANGED
|
@@ -41,9 +41,6 @@ from ._butler_config import *
|
|
|
41
41
|
from ._butler_metrics import *
|
|
42
42
|
from ._butler_repo_index import *
|
|
43
43
|
from ._collection_type import CollectionType
|
|
44
|
-
from ._column_categorization import *
|
|
45
|
-
from ._column_tags import *
|
|
46
|
-
from ._column_type_info import *
|
|
47
44
|
from ._config import *
|
|
48
45
|
from ._config_support import LookupKey
|
|
49
46
|
from ._dataset_association import *
|
lsst/daf/butler/_butler.py
CHANGED
|
@@ -36,7 +36,7 @@ from abc import abstractmethod
|
|
|
36
36
|
from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
|
|
37
37
|
from contextlib import AbstractContextManager
|
|
38
38
|
from types import EllipsisType
|
|
39
|
-
from typing import TYPE_CHECKING, Any, TextIO
|
|
39
|
+
from typing import TYPE_CHECKING, Any, Literal, Self, TextIO
|
|
40
40
|
|
|
41
41
|
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
42
42
|
from lsst.utils import doImportType
|
|
@@ -94,7 +94,7 @@ class _DeprecatedDefault:
|
|
|
94
94
|
"""Default value for a deprecated parameter."""
|
|
95
95
|
|
|
96
96
|
|
|
97
|
-
class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
97
|
+
class Butler(LimitedButler, AbstractContextManager): # numpydoc ignore=PR02
|
|
98
98
|
"""Interface for data butler and factory for Butler instances.
|
|
99
99
|
|
|
100
100
|
Parameters
|
|
@@ -358,6 +358,16 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
358
358
|
case _:
|
|
359
359
|
raise TypeError(f"Unknown Butler type '{butler_type}'")
|
|
360
360
|
|
|
361
|
+
def __enter__(self) -> Self:
|
|
362
|
+
return self
|
|
363
|
+
|
|
364
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
|
|
365
|
+
try:
|
|
366
|
+
self.close()
|
|
367
|
+
except Exception:
|
|
368
|
+
_LOG.exception("An exception occured during Butler.close()")
|
|
369
|
+
return False
|
|
370
|
+
|
|
361
371
|
@staticmethod
|
|
362
372
|
def makeRepo(
|
|
363
373
|
root: ResourcePathExpression,
|
|
@@ -506,9 +516,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
506
516
|
# Create Registry and populate tables
|
|
507
517
|
registryConfig = RegistryConfig(config.get("registry"))
|
|
508
518
|
dimensionConfig = DimensionConfig(dimensionConfig)
|
|
509
|
-
_RegistryFactory(registryConfig).create_from_config(
|
|
519
|
+
registry = _RegistryFactory(registryConfig).create_from_config(
|
|
510
520
|
dimensionConfig=dimensionConfig, butlerRoot=root_uri
|
|
511
521
|
)
|
|
522
|
+
registry.close()
|
|
512
523
|
|
|
513
524
|
_LOG.verbose("Wrote new Butler configuration file to %s", configURI)
|
|
514
525
|
|
|
@@ -1012,7 +1023,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1012
1023
|
@abstractmethod
|
|
1013
1024
|
def get_dataset(
|
|
1014
1025
|
self,
|
|
1015
|
-
id: DatasetId,
|
|
1026
|
+
id: DatasetId | str,
|
|
1016
1027
|
*,
|
|
1017
1028
|
storage_class: str | StorageClass | None = None,
|
|
1018
1029
|
dimension_records: bool = False,
|
|
@@ -1023,7 +1034,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1023
1034
|
Parameters
|
|
1024
1035
|
----------
|
|
1025
1036
|
id : `DatasetId`
|
|
1026
|
-
The unique identifier for the dataset
|
|
1037
|
+
The unique identifier for the dataset, as an instance of
|
|
1038
|
+
`uuid.UUID` or a string containing a hexadecimal number.
|
|
1027
1039
|
storage_class : `str` or `StorageClass` or `None`
|
|
1028
1040
|
A storage class to use when creating the returned entry. If given
|
|
1029
1041
|
it must be compatible with the default storage class.
|
|
@@ -1040,6 +1052,26 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1040
1052
|
"""
|
|
1041
1053
|
raise NotImplementedError()
|
|
1042
1054
|
|
|
1055
|
+
@abstractmethod
|
|
1056
|
+
def get_many_datasets(self, ids: Iterable[DatasetId | str]) -> list[DatasetRef]:
|
|
1057
|
+
"""Retrieve a list of dataset entries.
|
|
1058
|
+
|
|
1059
|
+
Parameters
|
|
1060
|
+
----------
|
|
1061
|
+
ids : `~collections.abc.Iterable` [ `DatasetId` or `str` ]
|
|
1062
|
+
The unique identifiers for the datasets, as instances of
|
|
1063
|
+
`uuid.UUID` or strings containing a hexadecimal number.
|
|
1064
|
+
|
|
1065
|
+
Returns
|
|
1066
|
+
-------
|
|
1067
|
+
refs : `list` [ `DatasetRef` ]
|
|
1068
|
+
A list containing a `DatasetRef` for each of the given dataset IDs.
|
|
1069
|
+
If a dataset was not found, no error is thrown -- it is just not
|
|
1070
|
+
included in the list. The returned datasets are in no particular
|
|
1071
|
+
order.
|
|
1072
|
+
"""
|
|
1073
|
+
raise NotImplementedError()
|
|
1074
|
+
|
|
1043
1075
|
@abstractmethod
|
|
1044
1076
|
def find_dataset(
|
|
1045
1077
|
self,
|
|
@@ -2201,3 +2233,18 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
2201
2233
|
Metrics object to record butler statistics.
|
|
2202
2234
|
"""
|
|
2203
2235
|
raise NotImplementedError()
|
|
2236
|
+
|
|
2237
|
+
@abstractmethod
|
|
2238
|
+
def close(self) -> None:
|
|
2239
|
+
"""Release all resources associated with this Butler instance. The
|
|
2240
|
+
instance may no longer be used after this is called.
|
|
2241
|
+
|
|
2242
|
+
Notes
|
|
2243
|
+
-----
|
|
2244
|
+
Instead of calling ``close()``directly, you can use the Butler object
|
|
2245
|
+
as a context manager. For example::
|
|
2246
|
+
with Butler(...) as butler:
|
|
2247
|
+
butler.get(...)
|
|
2248
|
+
# butler is closed after exiting the block.
|
|
2249
|
+
"""
|
|
2250
|
+
raise NotImplementedError()
|
|
@@ -112,6 +112,8 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
112
112
|
sep: str = ".",
|
|
113
113
|
simple_types: bool = False,
|
|
114
114
|
use_upper: bool | None = None,
|
|
115
|
+
max_inputs: int | None = None,
|
|
116
|
+
store_minimalist_inputs: bool = False,
|
|
115
117
|
) -> dict[str, _PROV_TYPES]:
|
|
116
118
|
"""Return provenance as a flattened dictionary.
|
|
117
119
|
|
|
@@ -137,6 +139,13 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
137
139
|
character of the prefix (defined by whether `str.isupper()` returns
|
|
138
140
|
true, else they will be lower case). If `False` the case will be
|
|
139
141
|
lower case, and if `True` the case will be upper case.
|
|
142
|
+
max_inputs : `int` or `None`, optional
|
|
143
|
+
Maximum number of inputs to be recorded in provenance. `None`
|
|
144
|
+
results in all inputs being recorded. If the number of inputs
|
|
145
|
+
exceeds this value no input provenance will be recorded.
|
|
146
|
+
store_minimalist_inputs : `bool`, optional
|
|
147
|
+
If `True` only the ID of the input is stored along with explicit
|
|
148
|
+
extras. If `False` the run and dataset type are also recorded.
|
|
140
149
|
|
|
141
150
|
Returns
|
|
142
151
|
-------
|
|
@@ -155,7 +164,13 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
155
164
|
|
|
156
165
|
Each input dataset will have the ``id``, ``run``, and ``datasettype``
|
|
157
166
|
keys as defined above (but no ``dataid`` key) with an ``input N``
|
|
158
|
-
prefix where ``N`` starts counting at 0.
|
|
167
|
+
prefix where ``N`` starts counting at 0. It is possible to drop
|
|
168
|
+
the ``datasettype`` and ``run`` to save space by using the
|
|
169
|
+
``store_minimalist_inputs`` flag.
|
|
170
|
+
|
|
171
|
+
If there are too many inputs (see the ``max_inputs`` parameters)
|
|
172
|
+
no inputs will be recorded. The number of inputs is always recorded
|
|
173
|
+
to indicate that the inputs were dropped.
|
|
159
174
|
|
|
160
175
|
The quantum ID, if present, will use key ``quantum``.
|
|
161
176
|
|
|
@@ -171,6 +186,7 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
171
186
|
"lsst.butler.dataid.detector": 10,
|
|
172
187
|
"lsst.butler.dataid.instrument": "LSSTCam",
|
|
173
188
|
"lsst.butler.quantum": "d93a735b-08f0-477d-bc95-2cc32d6d898b",
|
|
189
|
+
"lsst.butler.n_inputs": 2,
|
|
174
190
|
"lsst.butler.input.0.id": "3dfd7ba5-5e35-4565-9d87-4b33880ed06c",
|
|
175
191
|
"lsst.butler.input.0.run": "other_run",
|
|
176
192
|
"lsst.butler.input.0.datasettype": "astropy_parquet",
|
|
@@ -206,12 +222,28 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
206
222
|
if self.quantum_id is not None:
|
|
207
223
|
prov[_make_key("quantum")] = self.quantum_id if not simple_types else str(self.quantum_id)
|
|
208
224
|
|
|
209
|
-
|
|
225
|
+
# Record the number of inputs so that people can determine how many
|
|
226
|
+
# there were even if they were dropped because they exceeded the
|
|
227
|
+
# allowed maximum. Do not record the count if we have a null provenance
|
|
228
|
+
# state with no ref and no inputs.
|
|
229
|
+
if ref is not None or len(self.inputs) > 0:
|
|
230
|
+
prov[_make_key("n_inputs")] = len(self.inputs)
|
|
231
|
+
|
|
232
|
+
# Remove all inputs if the maximum is found. Truncating to the
|
|
233
|
+
# maximum (or auto switching to minimalist mode and increasing the
|
|
234
|
+
# maximum by 3) is not preferred.
|
|
235
|
+
inputs = (
|
|
236
|
+
self.inputs
|
|
237
|
+
if max_inputs is None or (max_inputs is not None and len(self.inputs) <= max_inputs)
|
|
238
|
+
else []
|
|
239
|
+
)
|
|
240
|
+
for i, input in enumerate(inputs):
|
|
210
241
|
prov[_make_key("input", i, "id")] = input.id if not simple_types else str(input.id)
|
|
211
|
-
if
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
242
|
+
if not store_minimalist_inputs:
|
|
243
|
+
if input.run is not None: # for mypy
|
|
244
|
+
prov[_make_key("input", i, "run")] = input.run
|
|
245
|
+
if input.datasetType is not None: # for mypy
|
|
246
|
+
prov[_make_key("input", i, "datasettype")] = input.datasetType.name
|
|
215
247
|
|
|
216
248
|
if input.id in self.extras:
|
|
217
249
|
for xk, xv in self.extras[input.id].items():
|
|
@@ -369,7 +401,9 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
369
401
|
# Prefix will always include the separator if it is defined.
|
|
370
402
|
prefix += sep
|
|
371
403
|
|
|
372
|
-
core_provenance = tuple(
|
|
404
|
+
core_provenance = tuple(
|
|
405
|
+
f"{prefix}{k}".lower() for k in ("run", "id", "datasettype", "quantum", "n_inputs")
|
|
406
|
+
)
|
|
373
407
|
|
|
374
408
|
# Need to escape the prefix and separator for regex usage.
|
|
375
409
|
esc_sep = re.escape(sep)
|
|
@@ -445,7 +479,7 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
445
479
|
|
|
446
480
|
quantum_id = None
|
|
447
481
|
ref_id = None
|
|
448
|
-
input_ids = {}
|
|
482
|
+
input_ids: dict[int, uuid.UUID] = {}
|
|
449
483
|
extras: dict[int, dict[str, Any]] = {}
|
|
450
484
|
|
|
451
485
|
for k, standard in prov_keys.items():
|
|
@@ -475,8 +509,9 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
475
509
|
|
|
476
510
|
provenance = cls(quantum_id=quantum_id)
|
|
477
511
|
|
|
512
|
+
input_refs = {ref.id: ref for ref in butler.get_many_datasets(input_ids.values())}
|
|
478
513
|
for i in sorted(input_ids):
|
|
479
|
-
input_ref =
|
|
514
|
+
input_ref = input_refs.get(input_ids[i])
|
|
480
515
|
if input_ref is None:
|
|
481
516
|
raise ValueError(f"Input dataset ({input_ids[i]}) is not known to this butler.")
|
|
482
517
|
provenance.add_input(input_ref)
|
|
@@ -25,9 +25,11 @@
|
|
|
25
25
|
# You should have received a copy of the GNU General Public License
|
|
26
26
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
27
|
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
28
30
|
__all__ = ("LabeledButlerFactory", "LabeledButlerFactoryProtocol")
|
|
29
31
|
|
|
30
|
-
from collections.abc import
|
|
32
|
+
from collections.abc import Mapping
|
|
31
33
|
from typing import Protocol
|
|
32
34
|
|
|
33
35
|
from lsst.resources import ResourcePathExpression
|
|
@@ -38,10 +40,6 @@ from ._butler_repo_index import ButlerRepoIndex
|
|
|
38
40
|
from ._utilities.named_locks import NamedLocks
|
|
39
41
|
from ._utilities.thread_safe_cache import ThreadSafeCache
|
|
40
42
|
|
|
41
|
-
_FactoryFunction = Callable[[str | None], Butler]
|
|
42
|
-
"""Function that takes an access token string or `None`, and returns a Butler
|
|
43
|
-
instance."""
|
|
44
|
-
|
|
45
43
|
|
|
46
44
|
class LabeledButlerFactoryProtocol(Protocol):
|
|
47
45
|
"""Callable to retrieve a butler from a label."""
|
|
@@ -84,7 +82,7 @@ class LabeledButlerFactory:
|
|
|
84
82
|
else:
|
|
85
83
|
self._repositories = dict(repositories)
|
|
86
84
|
|
|
87
|
-
self._factories = ThreadSafeCache[str,
|
|
85
|
+
self._factories = ThreadSafeCache[str, _ButlerFactory]()
|
|
88
86
|
self._initialization_locks = NamedLocks()
|
|
89
87
|
|
|
90
88
|
# This may be overridden by unit tests.
|
|
@@ -138,10 +136,18 @@ class LabeledButlerFactory:
|
|
|
138
136
|
based on the end user instead of the service. See
|
|
139
137
|
https://gafaelfawr.lsst.io/user-guide/gafaelfawringress.html#requesting-delegated-tokens
|
|
140
138
|
"""
|
|
141
|
-
factory = self.
|
|
142
|
-
return factory(access_token)
|
|
139
|
+
factory = self._get_or_create_butler_factory(label)
|
|
140
|
+
return factory.create_butler(access_token)
|
|
141
|
+
|
|
142
|
+
def close(self) -> None:
|
|
143
|
+
"""Reset the factory cache, and release any resources associated with
|
|
144
|
+
the cached instances.
|
|
145
|
+
"""
|
|
146
|
+
factories = self._factories.clear()
|
|
147
|
+
for factory in factories.values():
|
|
148
|
+
factory.close()
|
|
143
149
|
|
|
144
|
-
def
|
|
150
|
+
def _get_or_create_butler_factory(self, label: str) -> _ButlerFactory:
|
|
145
151
|
# We maintain a separate lock per label. We only want to instantiate
|
|
146
152
|
# one factory function per label, because creating the factory sets up
|
|
147
153
|
# shared state that should only exist once per repository. However, we
|
|
@@ -154,16 +160,16 @@ class LabeledButlerFactory:
|
|
|
154
160
|
factory = self._create_butler_factory_function(label)
|
|
155
161
|
return self._factories.set_or_get(label, factory)
|
|
156
162
|
|
|
157
|
-
def _create_butler_factory_function(self, label: str) ->
|
|
163
|
+
def _create_butler_factory_function(self, label: str) -> _ButlerFactory:
|
|
158
164
|
config_uri = self._get_config_uri(label)
|
|
159
165
|
config = ButlerConfig(config_uri)
|
|
160
166
|
butler_type = config.get_butler_type()
|
|
161
167
|
|
|
162
168
|
match butler_type:
|
|
163
169
|
case ButlerType.DIRECT:
|
|
164
|
-
return
|
|
170
|
+
return _DirectButlerFactory(config, self._preload_unsafe_direct_butler_caches)
|
|
165
171
|
case ButlerType.REMOTE:
|
|
166
|
-
return
|
|
172
|
+
return _RemoteButlerFactory(config)
|
|
167
173
|
case _:
|
|
168
174
|
raise TypeError(f"Unknown butler type '{butler_type}' for label '{label}'")
|
|
169
175
|
|
|
@@ -177,34 +183,45 @@ class LabeledButlerFactory:
|
|
|
177
183
|
return config_uri
|
|
178
184
|
|
|
179
185
|
|
|
180
|
-
|
|
181
|
-
|
|
186
|
+
class _ButlerFactory(Protocol):
|
|
187
|
+
def create_butler(self, access_token: str | None) -> Butler: ...
|
|
188
|
+
def close(self) -> None: ...
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class _DirectButlerFactory(_ButlerFactory):
|
|
192
|
+
def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool) -> None:
|
|
193
|
+
import lsst.daf.butler.direct_butler
|
|
182
194
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
195
|
+
# Create a 'template' Butler that will be cloned when callers request
|
|
196
|
+
# an instance.
|
|
197
|
+
self._butler = Butler.from_config(config)
|
|
198
|
+
assert isinstance(self._butler, lsst.daf.butler.direct_butler.DirectButler)
|
|
187
199
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
200
|
+
# Load caches so that data is available in cloned instances without
|
|
201
|
+
# needing to refetch it from the database for every instance.
|
|
202
|
+
self._butler._preload_cache(load_dimension_record_cache=preload_unsafe_caches)
|
|
191
203
|
|
|
192
|
-
def create_butler(access_token: str | None) -> Butler:
|
|
204
|
+
def create_butler(self, access_token: str | None) -> Butler:
|
|
193
205
|
# Access token is ignored because DirectButler does not use Gafaelfawr
|
|
194
206
|
# authentication.
|
|
195
|
-
return
|
|
207
|
+
return self._butler.clone()
|
|
196
208
|
|
|
197
|
-
|
|
209
|
+
def close(self) -> None:
|
|
210
|
+
self._butler.close()
|
|
198
211
|
|
|
199
212
|
|
|
200
|
-
|
|
201
|
-
|
|
213
|
+
class _RemoteButlerFactory(_ButlerFactory):
|
|
214
|
+
def __init__(self, config: ButlerConfig) -> None:
|
|
215
|
+
import lsst.daf.butler.remote_butler._factory
|
|
202
216
|
|
|
203
|
-
|
|
217
|
+
self._factory = lsst.daf.butler.remote_butler._factory.RemoteButlerFactory.create_factory_from_config(
|
|
218
|
+
config
|
|
219
|
+
)
|
|
204
220
|
|
|
205
|
-
def create_butler(access_token: str | None) -> Butler:
|
|
221
|
+
def create_butler(self, access_token: str | None) -> Butler:
|
|
206
222
|
if access_token is None:
|
|
207
223
|
raise ValueError("Access token is required to connect to a Butler server")
|
|
208
|
-
return
|
|
224
|
+
return self._factory.create_butler_for_access_token(access_token)
|
|
209
225
|
|
|
210
|
-
|
|
226
|
+
def close(self) -> None:
|
|
227
|
+
pass
|
|
@@ -55,7 +55,7 @@ from .datastore import Datastore
|
|
|
55
55
|
from .datastore.record_data import DatastoreRecordData, SerializedDatastoreRecordData
|
|
56
56
|
from .datastores.file_datastore.retrieve_artifacts import retrieve_and_zip
|
|
57
57
|
from .dimensions import DimensionUniverse
|
|
58
|
-
from .registry.interfaces import DatastoreRegistryBridgeManager, OpaqueTableStorageManager
|
|
58
|
+
from .registry.interfaces import Database, DatastoreRegistryBridgeManager, OpaqueTableStorageManager
|
|
59
59
|
|
|
60
60
|
if TYPE_CHECKING:
|
|
61
61
|
from ._butler import Butler
|
|
@@ -83,6 +83,9 @@ class QuantumBackedButler(LimitedButler):
|
|
|
83
83
|
The registry dataset type definitions, indexed by name.
|
|
84
84
|
metrics : `lsst.daf.butler.ButlerMetrics` or `None`, optional
|
|
85
85
|
Metrics object for tracking butler statistics.
|
|
86
|
+
database : `Database`, optional
|
|
87
|
+
Database instance used by datastore. Not required -- only provided
|
|
88
|
+
to allow database connections to be closed during cleanup.
|
|
86
89
|
|
|
87
90
|
Notes
|
|
88
91
|
-----
|
|
@@ -130,6 +133,7 @@ class QuantumBackedButler(LimitedButler):
|
|
|
130
133
|
storageClasses: StorageClassFactory,
|
|
131
134
|
dataset_types: Mapping[str, DatasetType] | None = None,
|
|
132
135
|
metrics: ButlerMetrics | None = None,
|
|
136
|
+
database: Database | None = None,
|
|
133
137
|
):
|
|
134
138
|
self._dimensions = dimensions
|
|
135
139
|
self._predicted_inputs = set(predicted_inputs)
|
|
@@ -142,6 +146,7 @@ class QuantumBackedButler(LimitedButler):
|
|
|
142
146
|
self.storageClasses = storageClasses
|
|
143
147
|
self._dataset_types: Mapping[str, DatasetType] = {}
|
|
144
148
|
self._metrics = metrics if metrics is not None else ButlerMetrics()
|
|
149
|
+
self._database = database
|
|
145
150
|
if dataset_types is not None:
|
|
146
151
|
self._dataset_types = dataset_types
|
|
147
152
|
self._datastore.set_retrieve_dataset_type_method(self._retrieve_dataset_type)
|
|
@@ -321,7 +326,7 @@ class QuantumBackedButler(LimitedButler):
|
|
|
321
326
|
Metrics object for gathering butler statistics.
|
|
322
327
|
"""
|
|
323
328
|
butler_config = ButlerConfig(config, searchPaths=search_paths)
|
|
324
|
-
datastore,
|
|
329
|
+
datastore, database = instantiate_standalone_datastore(
|
|
325
330
|
butler_config, dimensions, filename, OpaqueManagerClass, BridgeManagerClass
|
|
326
331
|
)
|
|
327
332
|
|
|
@@ -342,8 +347,13 @@ class QuantumBackedButler(LimitedButler):
|
|
|
342
347
|
storageClasses=storageClasses,
|
|
343
348
|
dataset_types=dataset_types,
|
|
344
349
|
metrics=metrics,
|
|
350
|
+
database=database,
|
|
345
351
|
)
|
|
346
352
|
|
|
353
|
+
def close(self) -> None:
|
|
354
|
+
if self._database is not None:
|
|
355
|
+
self._database.dispose()
|
|
356
|
+
|
|
347
357
|
def _retrieve_dataset_type(self, name: str) -> DatasetType | None:
|
|
348
358
|
"""Return DatasetType defined in registry given dataset type name."""
|
|
349
359
|
return self._dataset_types.get(name)
|
|
@@ -76,3 +76,16 @@ class ThreadSafeCache(Generic[TKey, TValue]):
|
|
|
76
76
|
"""
|
|
77
77
|
with self._mutex:
|
|
78
78
|
return self._cache.setdefault(key, value)
|
|
79
|
+
|
|
80
|
+
def clear(self) -> dict[TKey, TValue]:
|
|
81
|
+
"""Clear the cache.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
old_cache : `dict`
|
|
86
|
+
The values that were contained in the cache prior to clearing it.
|
|
87
|
+
"""
|
|
88
|
+
with self._mutex:
|
|
89
|
+
old = self._cache
|
|
90
|
+
self._cache = {}
|
|
91
|
+
return old
|
|
@@ -33,6 +33,7 @@ from typing import Any
|
|
|
33
33
|
import click
|
|
34
34
|
|
|
35
35
|
from ... import script
|
|
36
|
+
from ..._butler import Butler
|
|
36
37
|
from ..opt import (
|
|
37
38
|
collection_argument,
|
|
38
39
|
collection_type_option,
|
|
@@ -487,9 +488,11 @@ def remove_dataset_type(*args: Any, **kwargs: Any) -> None:
|
|
|
487
488
|
@options_file_option()
|
|
488
489
|
def query_datasets(**kwargs: Any) -> None:
|
|
489
490
|
"""List the datasets in a repository."""
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
table.
|
|
491
|
+
repo = kwargs.pop("repo")
|
|
492
|
+
with Butler.from_config(repo, writeable=False) as butler:
|
|
493
|
+
for table in script.QueryDatasets(butler=butler, **kwargs).getTables():
|
|
494
|
+
print("")
|
|
495
|
+
table.pprint_all()
|
|
493
496
|
print("")
|
|
494
497
|
|
|
495
498
|
|
|
@@ -93,8 +93,9 @@ MultipleCellCoadd: lsst.cell_coadds.CellCoaddFitsFormatter
|
|
|
93
93
|
NNModelPackagePayload: lsst.meas.transiNet.modelPackages.NNModelPackageFormatter
|
|
94
94
|
Timespan: lsst.daf.butler.formatters.json.JsonFormatter
|
|
95
95
|
RegionTimeInfo: lsst.daf.butler.formatters.json.JsonFormatter
|
|
96
|
-
QPEnsemble: lsst.meas.
|
|
97
|
-
PZModel: lsst.meas.
|
|
96
|
+
QPEnsemble: lsst.meas.photoz.base.qp_formatter.QPFormatter
|
|
97
|
+
PZModel: lsst.meas.photoz.base.model_formatter.ModelFormatter
|
|
98
|
+
PhotozModel: lsst.meas.photoz.base.model_formatter.ModelFormatter
|
|
98
99
|
VisitBackgroundModel: lsst.daf.butler.formatters.json.JsonFormatter
|
|
99
100
|
VignettingCorrection: lsst.ts.observatory.control.utils.extras.vignetting_storage.VignettingCorrectionFormatter
|
|
100
101
|
SSPAuxiliaryFile: lsst.pipe.tasks.sspAuxiliaryFile.SSPAuxiliaryFileFormatter
|
|
@@ -12,19 +12,19 @@ lsst.obs.base.formatters.fitsExposure.StandardFitsImageFormatterBase: &StandardF
|
|
|
12
12
|
variance:
|
|
13
13
|
<<: *losslessOptions
|
|
14
14
|
|
|
15
|
-
#
|
|
16
|
-
|
|
17
|
-
image: &
|
|
15
|
+
# Lossy (quantizing) compression with noise sigma subdivided into 16.
|
|
16
|
+
lossy16:
|
|
17
|
+
image: &lossy16Options
|
|
18
18
|
algorithm: RICE_1
|
|
19
19
|
quantization:
|
|
20
20
|
dither: SUBTRACTIVE_DITHER_2
|
|
21
21
|
scaling: STDEV_MASKED
|
|
22
|
-
mask_planes: ["NO_DATA"]
|
|
23
|
-
level:
|
|
22
|
+
mask_planes: ["NO_DATA", "INTRP"]
|
|
23
|
+
level: 16.0
|
|
24
24
|
mask:
|
|
25
25
|
<<: *losslessOptions
|
|
26
26
|
variance:
|
|
27
|
-
<<: *
|
|
27
|
+
<<: *lossy16Options
|
|
28
28
|
|
|
29
29
|
# Set the default
|
|
30
30
|
default:
|
|
@@ -433,6 +433,8 @@ storageClasses:
|
|
|
433
433
|
pytype: qp.Ensemble
|
|
434
434
|
PZModel:
|
|
435
435
|
pytype: rail.core.model.Model
|
|
436
|
+
PhotozModel:
|
|
437
|
+
pytype: rail.core.model.Model
|
|
436
438
|
VisitBackgroundModel:
|
|
437
439
|
pytype: lsst.drp.tasks.fit_visit_background.VisitBackgroundModel
|
|
438
440
|
VignettingCorrection:
|
|
@@ -229,7 +229,9 @@ def _add_arrow_provenance(
|
|
|
229
229
|
type_string = _checkArrowCompatibleType(in_memory_dataset)
|
|
230
230
|
if type_string == "astropy":
|
|
231
231
|
provenance = provenance if provenance is not None else DatasetProvenance()
|
|
232
|
-
prov_dict = provenance.to_flat_dict(
|
|
232
|
+
prov_dict = provenance.to_flat_dict(
|
|
233
|
+
ref, prefix="LSST.BUTLER", sep=".", simple_types=True, max_inputs=2000
|
|
234
|
+
)
|
|
233
235
|
|
|
234
236
|
# Strip any previous provenance.
|
|
235
237
|
DatasetProvenance.strip_provenance_from_flat_dict(in_memory_dataset.meta)
|
|
@@ -29,20 +29,17 @@ from __future__ import annotations
|
|
|
29
29
|
__all__ = ("DimensionRecordSchema", "addDimensionForeignKey")
|
|
30
30
|
|
|
31
31
|
import copy
|
|
32
|
-
from collections.abc import
|
|
32
|
+
from collections.abc import Set
|
|
33
33
|
from typing import TYPE_CHECKING
|
|
34
34
|
|
|
35
|
-
from lsst.utils.classes import
|
|
35
|
+
from lsst.utils.classes import immutable
|
|
36
36
|
|
|
37
37
|
from .. import arrow_utils, ddl
|
|
38
|
-
from .._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag
|
|
39
38
|
from .._named import NamedValueAbstractSet, NamedValueSet
|
|
40
39
|
from ..column_spec import RegionColumnSpec, TimespanColumnSpec
|
|
41
40
|
from ..timespan_database_representation import TimespanDatabaseRepresentation
|
|
42
41
|
|
|
43
42
|
if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
|
|
44
|
-
from lsst.daf.relation import ColumnTag
|
|
45
|
-
|
|
46
43
|
from ._elements import Dimension, DimensionElement, KeyColumnSpec, MetadataColumnSpec
|
|
47
44
|
from ._group import DimensionGroup
|
|
48
45
|
|
|
@@ -387,25 +384,6 @@ class DimensionElementFields:
|
|
|
387
384
|
lines.append(" timespan: lsst.daf.butler.Timespan")
|
|
388
385
|
return "\n".join(lines)
|
|
389
386
|
|
|
390
|
-
@property
|
|
391
|
-
@cached_getter
|
|
392
|
-
def columns(self) -> Mapping[ColumnTag, str]:
|
|
393
|
-
"""A mapping from `ColumnTag` to field name for all fields in this
|
|
394
|
-
element's records (`~collections.abc.Mapping`).
|
|
395
|
-
"""
|
|
396
|
-
result: dict[ColumnTag, str] = {}
|
|
397
|
-
for dimension_name, field_name in zip(
|
|
398
|
-
self.element.dimensions.names, self.dimensions.names, strict=True
|
|
399
|
-
):
|
|
400
|
-
result[DimensionKeyColumnTag(dimension_name)] = field_name
|
|
401
|
-
for field_name in self.facts.names:
|
|
402
|
-
result[DimensionRecordColumnTag(self.element.name, field_name)] = field_name
|
|
403
|
-
if self.element.spatial:
|
|
404
|
-
result[DimensionRecordColumnTag(self.element.name, "region")] = "region"
|
|
405
|
-
if self.element.temporal:
|
|
406
|
-
result[DimensionRecordColumnTag(self.element.name, "timespan")] = "timespan"
|
|
407
|
-
return result
|
|
408
|
-
|
|
409
387
|
element: DimensionElement
|
|
410
388
|
"""The dimension element these fields correspond to.
|
|
411
389
|
|