lsst-daf-butler 29.0.1__py3-none-any.whl → 29.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/daf/butler/__init__.py +1 -0
- lsst/daf/butler/_butler.py +57 -10
- lsst/daf/butler/_butler_collections.py +4 -0
- lsst/daf/butler/_butler_instance_options.py +3 -0
- lsst/daf/butler/_butler_metrics.py +117 -0
- lsst/daf/butler/_config.py +1 -1
- lsst/daf/butler/_dataset_ref.py +99 -16
- lsst/daf/butler/_file_dataset.py +78 -3
- lsst/daf/butler/_limited_butler.py +34 -2
- lsst/daf/butler/_quantum_backed.py +23 -4
- lsst/daf/butler/arrow_utils.py +7 -9
- lsst/daf/butler/cli/butler.py +1 -1
- lsst/daf/butler/cli/cmd/_remove_runs.py +2 -0
- lsst/daf/butler/cli/cmd/commands.py +25 -1
- lsst/daf/butler/cli/utils.py +32 -4
- lsst/daf/butler/column_spec.py +77 -34
- lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
- lsst/daf/butler/configs/storageClasses.yaml +2 -0
- lsst/daf/butler/datastore/_datastore.py +30 -4
- lsst/daf/butler/datastore/generic_base.py +2 -2
- lsst/daf/butler/datastores/chainedDatastore.py +63 -92
- lsst/daf/butler/datastores/fileDatastore.py +371 -97
- lsst/daf/butler/datastores/inMemoryDatastore.py +33 -5
- lsst/daf/butler/dimensions/_coordinate.py +4 -15
- lsst/daf/butler/dimensions/_group.py +15 -5
- lsst/daf/butler/dimensions/_record_set.py +469 -4
- lsst/daf/butler/dimensions/_record_table.py +1 -1
- lsst/daf/butler/dimensions/_records.py +127 -6
- lsst/daf/butler/dimensions/_universe.py +12 -8
- lsst/daf/butler/dimensions/record_cache.py +1 -2
- lsst/daf/butler/direct_butler/_direct_butler.py +406 -225
- lsst/daf/butler/direct_query_driver/_driver.py +30 -11
- lsst/daf/butler/direct_query_driver/_query_builder.py +74 -17
- lsst/daf/butler/direct_query_driver/_sql_column_visitor.py +28 -1
- lsst/daf/butler/pydantic_utils.py +26 -0
- lsst/daf/butler/queries/_expression_strings.py +24 -0
- lsst/daf/butler/queries/_identifiers.py +4 -1
- lsst/daf/butler/queries/_query.py +48 -1
- lsst/daf/butler/queries/expression_factory.py +16 -0
- lsst/daf/butler/queries/overlaps.py +1 -1
- lsst/daf/butler/{direct_query_driver/_predicate_constraints_summary.py → queries/predicate_constraints_summary.py} +2 -2
- lsst/daf/butler/queries/tree/_column_expression.py +39 -0
- lsst/daf/butler/queries/tree/_column_set.py +1 -1
- lsst/daf/butler/queries/tree/_predicate.py +19 -9
- lsst/daf/butler/registry/bridge/ephemeral.py +16 -6
- lsst/daf/butler/registry/bridge/monolithic.py +78 -37
- lsst/daf/butler/registry/collections/_base.py +23 -6
- lsst/daf/butler/registry/connectionString.py +5 -10
- lsst/daf/butler/registry/databases/postgresql.py +50 -0
- lsst/daf/butler/registry/databases/sqlite.py +46 -0
- lsst/daf/butler/registry/datasets/byDimensions/_manager.py +77 -64
- lsst/daf/butler/registry/datasets/byDimensions/summaries.py +4 -4
- lsst/daf/butler/registry/dimensions/static.py +20 -8
- lsst/daf/butler/registry/interfaces/_bridge.py +13 -1
- lsst/daf/butler/registry/interfaces/_database.py +21 -0
- lsst/daf/butler/registry/interfaces/_datasets.py +4 -16
- lsst/daf/butler/registry/interfaces/_dimensions.py +7 -2
- lsst/daf/butler/registry/queries/expressions/_predicate.py +35 -19
- lsst/daf/butler/registry/queries/expressions/check.py +29 -10
- lsst/daf/butler/registry/queries/expressions/normalForm.py +15 -0
- lsst/daf/butler/registry/queries/expressions/parser/exprTree.py +136 -23
- lsst/daf/butler/registry/queries/expressions/parser/parserLex.py +10 -1
- lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py +47 -24
- lsst/daf/butler/registry/queries/expressions/parser/treeVisitor.py +49 -10
- lsst/daf/butler/registry/sql_registry.py +17 -45
- lsst/daf/butler/registry/tests/_registry.py +60 -32
- lsst/daf/butler/remote_butler/_http_connection.py +15 -3
- lsst/daf/butler/remote_butler/_query_driver.py +5 -7
- lsst/daf/butler/remote_butler/_registry.py +3 -2
- lsst/daf/butler/remote_butler/_remote_butler.py +50 -27
- lsst/daf/butler/remote_butler/server/_config.py +68 -13
- lsst/daf/butler/remote_butler/server/_dependencies.py +68 -3
- lsst/daf/butler/remote_butler/server/_gafaelfawr.py +125 -0
- lsst/daf/butler/remote_butler/server/_server.py +11 -4
- lsst/daf/butler/remote_butler/server/_telemetry.py +105 -0
- lsst/daf/butler/remote_butler/server/handlers/_external.py +10 -2
- lsst/daf/butler/remote_butler/server/handlers/_query_serialization.py +5 -7
- lsst/daf/butler/remote_butler/server/handlers/_query_streaming.py +7 -3
- lsst/daf/butler/script/ingest_zip.py +13 -1
- lsst/daf/butler/script/queryCollections.py +185 -29
- lsst/daf/butler/script/removeRuns.py +2 -5
- lsst/daf/butler/script/retrieveArtifacts.py +1 -0
- lsst/daf/butler/script/transferDatasets.py +5 -0
- lsst/daf/butler/tests/butler_queries.py +236 -23
- lsst/daf/butler/tests/cliCmdTestBase.py +1 -1
- lsst/daf/butler/tests/hybrid_butler.py +37 -8
- lsst/daf/butler/tests/hybrid_butler_registry.py +15 -2
- lsst/daf/butler/tests/server.py +28 -3
- lsst/daf/butler/version.py +1 -1
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/METADATA +1 -1
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/RECORD +99 -96
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/WHEEL +1 -1
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/entry_points.txt +0 -0
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/LICENSE +0 -0
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/top_level.txt +0 -0
- {lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/zip-safe +0 -0
lsst/daf/butler/__init__.py
CHANGED
|
@@ -38,6 +38,7 @@ from . import ddl, time_utils
|
|
|
38
38
|
from ._butler import *
|
|
39
39
|
from ._butler_collections import *
|
|
40
40
|
from ._butler_config import *
|
|
41
|
+
from ._butler_metrics import *
|
|
41
42
|
from ._butler_repo_index import *
|
|
42
43
|
from ._collection_type import CollectionType
|
|
43
44
|
from ._column_categorization import *
|
lsst/daf/butler/_butler.py
CHANGED
|
@@ -46,6 +46,7 @@ from lsst.utils.logging import getLogger
|
|
|
46
46
|
from ._butler_collections import ButlerCollections
|
|
47
47
|
from ._butler_config import ButlerConfig, ButlerType
|
|
48
48
|
from ._butler_instance_options import ButlerInstanceOptions
|
|
49
|
+
from ._butler_metrics import ButlerMetrics
|
|
49
50
|
from ._butler_repo_index import ButlerRepoIndex
|
|
50
51
|
from ._config import Config, ConfigSubset
|
|
51
52
|
from ._exceptions import EmptyQueryResultError, InvalidQueryError
|
|
@@ -89,6 +90,10 @@ class SpecificButlerDataset:
|
|
|
89
90
|
dataset: DatasetRef | None
|
|
90
91
|
|
|
91
92
|
|
|
93
|
+
class _DeprecatedDefault:
|
|
94
|
+
"""Default value for a deprecated parameter."""
|
|
95
|
+
|
|
96
|
+
|
|
92
97
|
class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
93
98
|
"""Interface for data butler and factory for Butler instances.
|
|
94
99
|
|
|
@@ -154,6 +159,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
154
159
|
writeable: bool | None = None,
|
|
155
160
|
inferDefaults: bool = True,
|
|
156
161
|
without_datastore: bool = False,
|
|
162
|
+
metrics: ButlerMetrics | None = None,
|
|
157
163
|
**kwargs: Any,
|
|
158
164
|
) -> Butler:
|
|
159
165
|
if cls is Butler:
|
|
@@ -165,6 +171,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
165
171
|
writeable=writeable,
|
|
166
172
|
inferDefaults=inferDefaults,
|
|
167
173
|
without_datastore=without_datastore,
|
|
174
|
+
metrics=metrics,
|
|
168
175
|
**kwargs,
|
|
169
176
|
)
|
|
170
177
|
|
|
@@ -183,6 +190,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
183
190
|
writeable: bool | None = None,
|
|
184
191
|
inferDefaults: bool = True,
|
|
185
192
|
without_datastore: bool = False,
|
|
193
|
+
metrics: ButlerMetrics | None = None,
|
|
186
194
|
**kwargs: Any,
|
|
187
195
|
) -> Butler:
|
|
188
196
|
"""Create butler instance from configuration.
|
|
@@ -230,6 +238,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
230
238
|
without_datastore : `bool`, optional
|
|
231
239
|
If `True` do not attach a datastore to this butler. Any attempts
|
|
232
240
|
to use a datastore will fail.
|
|
241
|
+
metrics : `ButlerMetrics` or `None`, optional
|
|
242
|
+
Metrics object to record butler usage statistics.
|
|
233
243
|
**kwargs : `Any`
|
|
234
244
|
Default data ID key-value pairs. These may only identify
|
|
235
245
|
"governor" dimensions like ``instrument`` and ``skymap``.
|
|
@@ -300,6 +310,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
300
310
|
# passing the "butler" parameter to its constructor. This has
|
|
301
311
|
# been moved out of the constructor into Butler.clone().
|
|
302
312
|
butler = kwargs.pop("butler", None)
|
|
313
|
+
metrics = metrics if metrics is not None else ButlerMetrics()
|
|
303
314
|
if butler is not None:
|
|
304
315
|
if not isinstance(butler, Butler):
|
|
305
316
|
raise TypeError("'butler' parameter must be a Butler instance")
|
|
@@ -307,10 +318,17 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
307
318
|
raise TypeError(
|
|
308
319
|
"Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
|
|
309
320
|
)
|
|
310
|
-
return butler.clone(
|
|
321
|
+
return butler.clone(
|
|
322
|
+
collections=collections, run=run, inferDefaults=inferDefaults, metrics=metrics, dataId=kwargs
|
|
323
|
+
)
|
|
311
324
|
|
|
312
325
|
options = ButlerInstanceOptions(
|
|
313
|
-
collections=collections,
|
|
326
|
+
collections=collections,
|
|
327
|
+
run=run,
|
|
328
|
+
writeable=writeable,
|
|
329
|
+
inferDefaults=inferDefaults,
|
|
330
|
+
metrics=metrics,
|
|
331
|
+
kwargs=kwargs,
|
|
314
332
|
)
|
|
315
333
|
|
|
316
334
|
# Load the Butler configuration. This may involve searching the
|
|
@@ -1274,7 +1292,13 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1274
1292
|
raise NotImplementedError()
|
|
1275
1293
|
|
|
1276
1294
|
@abstractmethod
|
|
1277
|
-
def removeRuns(
|
|
1295
|
+
def removeRuns(
|
|
1296
|
+
self,
|
|
1297
|
+
names: Iterable[str],
|
|
1298
|
+
unstore: bool | type[_DeprecatedDefault] = _DeprecatedDefault,
|
|
1299
|
+
*,
|
|
1300
|
+
unlink_from_chains: bool = False,
|
|
1301
|
+
) -> None:
|
|
1278
1302
|
"""Remove one or more `~CollectionType.RUN` collections and the
|
|
1279
1303
|
datasets within them.
|
|
1280
1304
|
|
|
@@ -1287,7 +1311,13 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1287
1311
|
they are present, and attempt to rollback the registry deletions if
|
|
1288
1312
|
datastore deletions fail (which may not always be possible). If
|
|
1289
1313
|
`False`, datastore records for these datasets are still removed,
|
|
1290
|
-
but any artifacts (e.g. files) will not be.
|
|
1314
|
+
but any artifacts (e.g. files) will not be. This parameter is now
|
|
1315
|
+
deprecated and no longer has any effect. Files are always deleted
|
|
1316
|
+
from datastores unless they were ingested using full URIs.
|
|
1317
|
+
unlink_from_chains : `bool`, optional
|
|
1318
|
+
If `True` remove the RUN collection from any chains prior to
|
|
1319
|
+
removing the RUN. If `False` the removal will fail if any chains
|
|
1320
|
+
still refer to the RUN.
|
|
1291
1321
|
|
|
1292
1322
|
Raises
|
|
1293
1323
|
------
|
|
@@ -1360,7 +1390,14 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1360
1390
|
raise NotImplementedError()
|
|
1361
1391
|
|
|
1362
1392
|
@abstractmethod
|
|
1363
|
-
def ingest_zip(
|
|
1393
|
+
def ingest_zip(
|
|
1394
|
+
self,
|
|
1395
|
+
zip_file: ResourcePathExpression,
|
|
1396
|
+
transfer: str = "auto",
|
|
1397
|
+
*,
|
|
1398
|
+
transfer_dimensions: bool = False,
|
|
1399
|
+
dry_run: bool = False,
|
|
1400
|
+
) -> None:
|
|
1364
1401
|
"""Ingest a Zip file into this butler.
|
|
1365
1402
|
|
|
1366
1403
|
The Zip file must have been created by `retrieve_artifacts_zip`.
|
|
@@ -1371,10 +1408,17 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1371
1408
|
Path to the Zip file.
|
|
1372
1409
|
transfer : `str`, optional
|
|
1373
1410
|
Method to use to transfer the Zip into the datastore.
|
|
1411
|
+
transfer_dimensions : `bool`, optional
|
|
1412
|
+
If `True`, dimension record data associated with the new datasets
|
|
1413
|
+
will be transferred from the Zip file, if present.
|
|
1414
|
+
dry_run : `bool`, optional
|
|
1415
|
+
If `True` the ingest will be processed without any modifications
|
|
1416
|
+
made to the target butler and as if the target butler did not
|
|
1417
|
+
have any of the datasets.
|
|
1374
1418
|
|
|
1375
1419
|
Notes
|
|
1376
1420
|
-----
|
|
1377
|
-
Run collections are created as needed.
|
|
1421
|
+
Run collections and dataset types are created as needed.
|
|
1378
1422
|
"""
|
|
1379
1423
|
raise NotImplementedError()
|
|
1380
1424
|
|
|
@@ -1741,8 +1785,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1741
1785
|
warn_limit = True
|
|
1742
1786
|
with self.query() as query:
|
|
1743
1787
|
result = (
|
|
1744
|
-
query.
|
|
1745
|
-
.
|
|
1788
|
+
query.data_ids(dimensions)
|
|
1789
|
+
.where(data_id, where, bind=bind, **kwargs)
|
|
1746
1790
|
.order_by(*ensure_iterable(order_by))
|
|
1747
1791
|
.limit(query_limit)
|
|
1748
1792
|
)
|
|
@@ -1979,8 +2023,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1979
2023
|
warn_limit = True
|
|
1980
2024
|
with self.query() as query:
|
|
1981
2025
|
result = (
|
|
1982
|
-
query.
|
|
1983
|
-
.
|
|
2026
|
+
query.dimension_records(element)
|
|
2027
|
+
.where(data_id, where, bind=bind, **kwargs)
|
|
1984
2028
|
.order_by(*ensure_iterable(order_by))
|
|
1985
2029
|
.limit(query_limit)
|
|
1986
2030
|
)
|
|
@@ -2123,6 +2167,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
2123
2167
|
run: str | None | EllipsisType = ...,
|
|
2124
2168
|
inferDefaults: bool | EllipsisType = ...,
|
|
2125
2169
|
dataId: dict[str, str] | EllipsisType = ...,
|
|
2170
|
+
metrics: ButlerMetrics | None = None,
|
|
2126
2171
|
) -> Butler:
|
|
2127
2172
|
"""Return a new Butler instance connected to the same repository
|
|
2128
2173
|
as this one, optionally overriding ``collections``, ``run``,
|
|
@@ -2142,5 +2187,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
2142
2187
|
dataId : `str`
|
|
2143
2188
|
Same as ``kwargs`` passed to the constructor. If omitted, copies
|
|
2144
2189
|
values from original object.
|
|
2190
|
+
metrics : `ButlerMetrics` or `None`, optional
|
|
2191
|
+
Metrics object to record butler statistics.
|
|
2145
2192
|
"""
|
|
2146
2193
|
raise NotImplementedError()
|
|
@@ -108,6 +108,10 @@ class ButlerCollections(ABC, Sequence):
|
|
|
108
108
|
"""Collection defaults associated with this butler."""
|
|
109
109
|
raise NotImplementedError("Defaults must be implemented by a subclass")
|
|
110
110
|
|
|
111
|
+
def __str__(self) -> str:
|
|
112
|
+
"""Return string representation."""
|
|
113
|
+
return f"{self.__class__.__name__}(defaults={self.defaults})"
|
|
114
|
+
|
|
111
115
|
@abstractmethod
|
|
112
116
|
def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
|
|
113
117
|
"""Add children to the end of a CHAINED collection.
|
|
@@ -30,6 +30,8 @@ __all__ = ("ButlerInstanceOptions",)
|
|
|
30
30
|
import dataclasses
|
|
31
31
|
from typing import Any
|
|
32
32
|
|
|
33
|
+
from ._butler_metrics import ButlerMetrics
|
|
34
|
+
|
|
33
35
|
|
|
34
36
|
@dataclasses.dataclass(frozen=True)
|
|
35
37
|
class ButlerInstanceOptions:
|
|
@@ -43,4 +45,5 @@ class ButlerInstanceOptions:
|
|
|
43
45
|
run: str | None = None
|
|
44
46
|
writeable: bool | None = None
|
|
45
47
|
inferDefaults: bool = True
|
|
48
|
+
metrics: ButlerMetrics = dataclasses.field(default_factory=ButlerMetrics)
|
|
46
49
|
kwargs: dict[str, Any] = dataclasses.field(default_factory=dict)
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# This file is part of daf_butler.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from collections.abc import Callable, Iterator
|
|
31
|
+
from contextlib import contextmanager
|
|
32
|
+
|
|
33
|
+
from pydantic import BaseModel
|
|
34
|
+
|
|
35
|
+
from lsst.utils.logging import LsstLoggers
|
|
36
|
+
from lsst.utils.timer import time_this
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ButlerMetrics(BaseModel):
|
|
40
|
+
"""Metrics collected during Butler operations."""
|
|
41
|
+
|
|
42
|
+
time_in_put: float = 0.0
|
|
43
|
+
"""Wall-clock time, in seconds, spent in put()."""
|
|
44
|
+
|
|
45
|
+
time_in_get: float = 0.0
|
|
46
|
+
"""Wall-clock time, in seconds, spent in get()."""
|
|
47
|
+
|
|
48
|
+
n_get: int = 0
|
|
49
|
+
"""Number of datasets retrieved with get()."""
|
|
50
|
+
|
|
51
|
+
n_put: int = 0
|
|
52
|
+
"""Number of datasets stored with put()."""
|
|
53
|
+
|
|
54
|
+
def reset(self) -> None:
|
|
55
|
+
"""Reset all metrics."""
|
|
56
|
+
self.time_in_put = 0.0
|
|
57
|
+
self.time_in_get = 0.0
|
|
58
|
+
self.n_get = 0
|
|
59
|
+
self.n_put = 0
|
|
60
|
+
|
|
61
|
+
def increment_get(self, duration: float) -> None:
|
|
62
|
+
"""Increment time for get().
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
duration : `float`
|
|
67
|
+
Duration to add to the get() statistics.
|
|
68
|
+
"""
|
|
69
|
+
self.time_in_get += duration
|
|
70
|
+
self.n_get += 1
|
|
71
|
+
|
|
72
|
+
def increment_put(self, duration: float) -> None:
|
|
73
|
+
"""Increment time for put().
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
duration : `float`
|
|
78
|
+
Duration to add to the put() statistics.
|
|
79
|
+
"""
|
|
80
|
+
self.time_in_put += duration
|
|
81
|
+
self.n_put += 1
|
|
82
|
+
|
|
83
|
+
@contextmanager
|
|
84
|
+
def _timer(
|
|
85
|
+
self, handler: Callable[[float], None], log: LsstLoggers | None = None, msg: str | None = None
|
|
86
|
+
) -> Iterator[None]:
|
|
87
|
+
with time_this(log=log, msg=msg) as timer:
|
|
88
|
+
yield
|
|
89
|
+
handler(timer.duration)
|
|
90
|
+
|
|
91
|
+
@contextmanager
|
|
92
|
+
def instrument_get(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
|
|
93
|
+
"""Run code and increment get statistics.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
log : `logging.Logger` or `None`
|
|
98
|
+
Logger to use for any timing information.
|
|
99
|
+
msg : `str` or `None`
|
|
100
|
+
Any message to be included in log output.
|
|
101
|
+
"""
|
|
102
|
+
with self._timer(self.increment_get, log=log, msg=msg):
|
|
103
|
+
yield
|
|
104
|
+
|
|
105
|
+
@contextmanager
|
|
106
|
+
def instrument_put(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
|
|
107
|
+
"""Run code and increment put statistics.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
log : `logging.Logger` or `None`
|
|
112
|
+
Logger to use for any timing information.
|
|
113
|
+
msg : `str` or `None`
|
|
114
|
+
Any message to be included in log output.
|
|
115
|
+
"""
|
|
116
|
+
with self._timer(self.increment_put, log=log, msg=msg):
|
|
117
|
+
yield
|
lsst/daf/butler/_config.py
CHANGED
|
@@ -1254,7 +1254,7 @@ class ConfigSubset(Config):
|
|
|
1254
1254
|
|
|
1255
1255
|
Global defaults, at lowest priority, are found in the ``config``
|
|
1256
1256
|
directory of the butler source tree. Additional defaults can be
|
|
1257
|
-
defined using the environment variable ``$
|
|
1257
|
+
defined using the environment variable ``$DAF_BUTLER_CONFIG_PATH``
|
|
1258
1258
|
which is a PATH-like variable where paths at the front of the list
|
|
1259
1259
|
have priority over those later.
|
|
1260
1260
|
|
lsst/daf/butler/_dataset_ref.py
CHANGED
|
@@ -66,8 +66,11 @@ from ._named import NamedKeyDict
|
|
|
66
66
|
from .datastore.stored_file_info import StoredDatastoreItemInfo
|
|
67
67
|
from .dimensions import (
|
|
68
68
|
DataCoordinate,
|
|
69
|
+
DimensionDataAttacher,
|
|
70
|
+
DimensionDataExtractor,
|
|
69
71
|
DimensionGroup,
|
|
70
72
|
DimensionUniverse,
|
|
73
|
+
SerializableDimensionData,
|
|
71
74
|
SerializedDataCoordinate,
|
|
72
75
|
SerializedDataId,
|
|
73
76
|
)
|
|
@@ -907,6 +910,62 @@ class MinimalistSerializableDatasetRef(pydantic.BaseModel):
|
|
|
907
910
|
data_id: SerializedDataId
|
|
908
911
|
"""Data coordinate of this dataset."""
|
|
909
912
|
|
|
913
|
+
def to_dataset_ref(
|
|
914
|
+
self,
|
|
915
|
+
id: DatasetId,
|
|
916
|
+
*,
|
|
917
|
+
dataset_type: DatasetType,
|
|
918
|
+
universe: DimensionUniverse,
|
|
919
|
+
attacher: DimensionDataAttacher | None = None,
|
|
920
|
+
) -> DatasetRef:
|
|
921
|
+
"""Convert serialized object to a `DatasetRef`.
|
|
922
|
+
|
|
923
|
+
Parameters
|
|
924
|
+
----------
|
|
925
|
+
id : `DatasetId`
|
|
926
|
+
UUID identifying the dataset.
|
|
927
|
+
dataset_type : `DatasetType`
|
|
928
|
+
`DatasetType` record corresponding to the dataset type name in the
|
|
929
|
+
serialized object.
|
|
930
|
+
universe : `DimensionUniverse`
|
|
931
|
+
Dimension universe for the dataset.
|
|
932
|
+
attacher : `DimensionDataAttacher`, optional
|
|
933
|
+
If provided, will be used to add dimension records to the
|
|
934
|
+
deserialized `DatasetRef` instance.
|
|
935
|
+
|
|
936
|
+
Returns
|
|
937
|
+
-------
|
|
938
|
+
ref : `DatasetRef`
|
|
939
|
+
The deserialized object.
|
|
940
|
+
"""
|
|
941
|
+
assert dataset_type.name == self.dataset_type_name, (
|
|
942
|
+
"Given DatasetType does not match the serialized dataset type name"
|
|
943
|
+
)
|
|
944
|
+
simple_data_id = SerializedDataCoordinate(dataId=self.data_id)
|
|
945
|
+
data_id = DataCoordinate.from_simple(simple=simple_data_id, universe=universe)
|
|
946
|
+
if attacher:
|
|
947
|
+
data_ids = attacher.attach(dataset_type.dimensions, [data_id])
|
|
948
|
+
data_id = data_ids[0]
|
|
949
|
+
return DatasetRef(
|
|
950
|
+
id=id,
|
|
951
|
+
run=self.run,
|
|
952
|
+
datasetType=dataset_type,
|
|
953
|
+
dataId=data_id,
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
@staticmethod
|
|
957
|
+
def from_dataset_ref(ref: DatasetRef) -> MinimalistSerializableDatasetRef:
|
|
958
|
+
"""Serialize a ``DatasetRef` to a simplified format.
|
|
959
|
+
|
|
960
|
+
Parameters
|
|
961
|
+
----------
|
|
962
|
+
ref : `DatasetRef`
|
|
963
|
+
`DatasetRef` object to serialize.
|
|
964
|
+
"""
|
|
965
|
+
return MinimalistSerializableDatasetRef(
|
|
966
|
+
dataset_type_name=ref.datasetType.name, run=ref.run, data_id=dict(ref.dataId.mapping)
|
|
967
|
+
)
|
|
968
|
+
|
|
910
969
|
|
|
911
970
|
class SerializedDatasetRefContainer(pydantic.BaseModel):
|
|
912
971
|
"""Serializable model for a collection of DatasetRef.
|
|
@@ -938,6 +997,9 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
|
|
|
938
997
|
compact_refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
|
|
939
998
|
"""Minimal dataset ref information indexed by UUID."""
|
|
940
999
|
|
|
1000
|
+
dimension_records: SerializableDimensionData | None = None
|
|
1001
|
+
"""Dimension record information"""
|
|
1002
|
+
|
|
941
1003
|
def __len__(self) -> int:
|
|
942
1004
|
"""Return the number of datasets in the container."""
|
|
943
1005
|
return len(self.compact_refs)
|
|
@@ -957,19 +1019,32 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
|
|
|
957
1019
|
universe: DimensionUniverse | None = None
|
|
958
1020
|
dataset_types: dict[str, SerializedDatasetType] = {}
|
|
959
1021
|
compact_refs: dict[uuid.UUID, MinimalistSerializableDatasetRef] = {}
|
|
1022
|
+
data_ids: list[DataCoordinate] = []
|
|
1023
|
+
dimensions: list[DimensionGroup] = []
|
|
960
1024
|
for ref in refs:
|
|
961
|
-
simple_ref = ref.to_simple()
|
|
962
|
-
dataset_type = simple_ref.datasetType
|
|
963
|
-
assert dataset_type is not None # For mypy
|
|
964
1025
|
if universe is None:
|
|
965
1026
|
universe = ref.datasetType.dimensions.universe
|
|
966
|
-
if (name :=
|
|
967
|
-
dataset_types[name] =
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
1027
|
+
if (name := ref.datasetType.name) not in dataset_types:
|
|
1028
|
+
dataset_types[name] = ref.datasetType.to_simple()
|
|
1029
|
+
compact_refs[ref.id] = MinimalistSerializableDatasetRef.from_dataset_ref(ref)
|
|
1030
|
+
if ref.dataId.hasRecords():
|
|
1031
|
+
dimensions.append(ref.datasetType.dimensions)
|
|
1032
|
+
data_ids.append(ref.dataId)
|
|
1033
|
+
|
|
1034
|
+
# Extract dimension record metadata if present.
|
|
1035
|
+
dimension_records = None
|
|
1036
|
+
if data_ids and len(compact_refs) == len(data_ids):
|
|
1037
|
+
dimension_group = DimensionGroup.union(*dimensions, universe=universe)
|
|
1038
|
+
|
|
1039
|
+
# Records were attached to all refs. Store them.
|
|
1040
|
+
extractor = DimensionDataExtractor.from_dimension_group(
|
|
1041
|
+
dimension_group,
|
|
1042
|
+
ignore_cached=False,
|
|
1043
|
+
include_skypix=False,
|
|
972
1044
|
)
|
|
1045
|
+
extractor.update(data_ids)
|
|
1046
|
+
dimension_records = SerializableDimensionData.from_record_sets(extractor.records.values())
|
|
1047
|
+
|
|
973
1048
|
if universe:
|
|
974
1049
|
universe_version = universe.version
|
|
975
1050
|
universe_namespace = universe.namespace
|
|
@@ -982,6 +1057,7 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
|
|
|
982
1057
|
universe_namespace=universe_namespace,
|
|
983
1058
|
dataset_types=dataset_types,
|
|
984
1059
|
compact_refs=compact_refs,
|
|
1060
|
+
dimension_records=dimension_records,
|
|
985
1061
|
)
|
|
986
1062
|
|
|
987
1063
|
def to_refs(self, universe: DimensionUniverse) -> list[DatasetRef]:
|
|
@@ -1019,15 +1095,22 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
|
|
|
1019
1095
|
name: DatasetType.from_simple(dtype, universe=universe)
|
|
1020
1096
|
for name, dtype in self.dataset_types.items()
|
|
1021
1097
|
}
|
|
1098
|
+
|
|
1099
|
+
# Dimension records can be attached if available.
|
|
1100
|
+
# We assume that all dimension information was stored.
|
|
1101
|
+
attacher = None
|
|
1102
|
+
if self.dimension_records:
|
|
1103
|
+
attacher = DimensionDataAttacher(
|
|
1104
|
+
deserializers=self.dimension_records.make_deserializers(universe)
|
|
1105
|
+
)
|
|
1106
|
+
|
|
1022
1107
|
refs: list[DatasetRef] = []
|
|
1023
1108
|
for id_, minimal in self.compact_refs.items():
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
datasetType=dataset_types[minimal.dataset_type_name],
|
|
1030
|
-
dataId=data_id,
|
|
1109
|
+
ref = minimal.to_dataset_ref(
|
|
1110
|
+
id_,
|
|
1111
|
+
dataset_type=dataset_types[minimal.dataset_type_name],
|
|
1112
|
+
universe=universe,
|
|
1113
|
+
attacher=attacher,
|
|
1031
1114
|
)
|
|
1032
1115
|
refs.append(ref)
|
|
1033
1116
|
return refs
|
lsst/daf/butler/_file_dataset.py
CHANGED
|
@@ -27,15 +27,21 @@
|
|
|
27
27
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
|
-
__all__ =
|
|
30
|
+
__all__ = ("FileDataset", "SerializedFileDataset")
|
|
31
31
|
|
|
32
|
+
import uuid
|
|
33
|
+
from collections.abc import Callable
|
|
32
34
|
from dataclasses import dataclass
|
|
33
|
-
from typing import Any
|
|
35
|
+
from typing import Any, TypeAlias
|
|
36
|
+
|
|
37
|
+
import pydantic
|
|
34
38
|
|
|
35
39
|
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
36
40
|
|
|
37
|
-
from ._dataset_ref import DatasetRef
|
|
41
|
+
from ._dataset_ref import DatasetRef, MinimalistSerializableDatasetRef
|
|
42
|
+
from ._dataset_type import DatasetType
|
|
38
43
|
from ._formatter import FormatterParameter
|
|
44
|
+
from .dimensions import DimensionUniverse
|
|
39
45
|
|
|
40
46
|
|
|
41
47
|
@dataclass
|
|
@@ -87,3 +93,72 @@ class FileDataset:
|
|
|
87
93
|
if not isinstance(other, type(self)):
|
|
88
94
|
return NotImplemented
|
|
89
95
|
return str(self.path) < str(other.path)
|
|
96
|
+
|
|
97
|
+
def to_simple(self) -> SerializedFileDataset:
|
|
98
|
+
"""
|
|
99
|
+
Convert this instance to a simplified, JSON-serializable object.
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
serialized : `SerializedFileDataset`
|
|
104
|
+
Serializable representation of this `FileDataset` instance.
|
|
105
|
+
"""
|
|
106
|
+
if self.formatter is None:
|
|
107
|
+
formatter = None
|
|
108
|
+
elif isinstance(self.formatter, str):
|
|
109
|
+
formatter = self.formatter
|
|
110
|
+
else:
|
|
111
|
+
formatter = self.formatter.name()
|
|
112
|
+
|
|
113
|
+
refs = {ref.id: MinimalistSerializableDatasetRef.from_dataset_ref(ref) for ref in self.refs}
|
|
114
|
+
|
|
115
|
+
return SerializedFileDataset(
|
|
116
|
+
refs=refs,
|
|
117
|
+
path=str(self.path),
|
|
118
|
+
formatter=formatter,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
def from_simple(
|
|
123
|
+
dataset: SerializedFileDataset, *, dataset_type_loader: DatasetTypeLoader, universe: DimensionUniverse
|
|
124
|
+
) -> FileDataset:
|
|
125
|
+
"""
|
|
126
|
+
Deserialize a `SerializedFileDataset` into a `FileDataset`.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
dataset : `SerializedFileDataset`
|
|
131
|
+
Object to deserialize.
|
|
132
|
+
dataset_type_loader : `Callable` [[ `str` ], `DatasetType` ]
|
|
133
|
+
Function that takes a string dataset type name as its
|
|
134
|
+
only parameter, and returns an instance of `DatasetType`.
|
|
135
|
+
Used to deserialize the `DatasetRef` instances contained
|
|
136
|
+
in the serialized `FileDataset`.
|
|
137
|
+
universe : `DimensionUniverse`
|
|
138
|
+
Dimension universe associated with the `Butler` instance that
|
|
139
|
+
created the serialized `FileDataset` instance.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
file_dataset : `FileDataset`
|
|
144
|
+
Deserialized equivalent of the input dataset.
|
|
145
|
+
"""
|
|
146
|
+
refs = [
|
|
147
|
+
ref.to_dataset_ref(id, universe=universe, dataset_type=dataset_type_loader(ref.dataset_type_name))
|
|
148
|
+
for id, ref in dataset.refs.items()
|
|
149
|
+
]
|
|
150
|
+
return FileDataset(path=dataset.path, refs=refs, formatter=dataset.formatter)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
DatasetTypeLoader: TypeAlias = Callable[[str], DatasetType]
|
|
154
|
+
"""Type signature for a function that takes a string dataset type name as its
|
|
155
|
+
only parameter, and returns an instance of `DatasetType`.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class SerializedFileDataset(pydantic.BaseModel):
|
|
160
|
+
"""Serializable format of `FileDataset` object."""
|
|
161
|
+
|
|
162
|
+
refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
|
|
163
|
+
path: str
|
|
164
|
+
formatter: str | None = None
|
|
@@ -31,11 +31,13 @@ __all__ = ("LimitedButler",)
|
|
|
31
31
|
|
|
32
32
|
import logging
|
|
33
33
|
from abc import ABC, abstractmethod
|
|
34
|
-
from collections.abc import Iterable
|
|
34
|
+
from collections.abc import Iterable, Iterator
|
|
35
|
+
from contextlib import contextmanager
|
|
35
36
|
from typing import Any, ClassVar
|
|
36
37
|
|
|
37
38
|
from lsst.resources import ResourcePath
|
|
38
39
|
|
|
40
|
+
from ._butler_metrics import ButlerMetrics
|
|
39
41
|
from ._dataset_provenance import DatasetProvenance
|
|
40
42
|
from ._dataset_ref import DatasetRef
|
|
41
43
|
from ._deferredDatasetHandle import DeferredDatasetHandle
|
|
@@ -138,7 +140,8 @@ class LimitedButler(ABC):
|
|
|
138
140
|
to use a resolved `DatasetRef`. Subclasses can support more options.
|
|
139
141
|
"""
|
|
140
142
|
log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
|
|
141
|
-
|
|
143
|
+
with self._metrics.instrument_get(log, msg="Retrieved dataset"):
|
|
144
|
+
return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
|
|
142
145
|
|
|
143
146
|
def getDeferred(
|
|
144
147
|
self,
|
|
@@ -415,6 +418,30 @@ class LimitedButler(ABC):
|
|
|
415
418
|
"""
|
|
416
419
|
raise NotImplementedError()
|
|
417
420
|
|
|
421
|
+
@contextmanager
|
|
422
|
+
def record_metrics(self, metrics: ButlerMetrics | None = None) -> Iterator[ButlerMetrics]:
|
|
423
|
+
"""Enable new metrics recording context.
|
|
424
|
+
|
|
425
|
+
Parameters
|
|
426
|
+
----------
|
|
427
|
+
metrics : `lsst.daf.butler.ButlerMetrics`
|
|
428
|
+
Optional override metrics object. If given, this will be the
|
|
429
|
+
same object returned by the context manager.
|
|
430
|
+
|
|
431
|
+
Yields
|
|
432
|
+
------
|
|
433
|
+
metrics : `lsst.daf.butler.ButlerMetrics`
|
|
434
|
+
Metrics recorded within this context. This temporarily replaces
|
|
435
|
+
any existing metrics object associated with this butler.
|
|
436
|
+
"""
|
|
437
|
+
old_metrics = self._metrics
|
|
438
|
+
new_metrics = metrics if metrics is not None else ButlerMetrics()
|
|
439
|
+
try:
|
|
440
|
+
self._metrics = new_metrics
|
|
441
|
+
yield new_metrics
|
|
442
|
+
finally:
|
|
443
|
+
self._metrics = old_metrics
|
|
444
|
+
|
|
418
445
|
@property
|
|
419
446
|
@abstractmethod
|
|
420
447
|
def dimensions(self) -> DimensionUniverse:
|
|
@@ -430,3 +457,8 @@ class LimitedButler(ABC):
|
|
|
430
457
|
"""An object that maps known storage class names to objects that fully
|
|
431
458
|
describe them (`StorageClassFactory`).
|
|
432
459
|
"""
|
|
460
|
+
|
|
461
|
+
_metrics: ButlerMetrics
|
|
462
|
+
"""An object for recording metrics associated with this butler.
|
|
463
|
+
(`ButlerMetrics`)
|
|
464
|
+
"""
|