lsst-daf-butler 29.2025.1700__py3-none-any.whl → 29.2025.1900__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. lsst/daf/butler/__init__.py +1 -0
  2. lsst/daf/butler/_butler.py +23 -6
  3. lsst/daf/butler/_butler_instance_options.py +3 -0
  4. lsst/daf/butler/_butler_metrics.py +117 -0
  5. lsst/daf/butler/_dataset_ref.py +99 -16
  6. lsst/daf/butler/_file_dataset.py +78 -3
  7. lsst/daf/butler/_limited_butler.py +34 -2
  8. lsst/daf/butler/_quantum_backed.py +20 -2
  9. lsst/daf/butler/arrow_utils.py +4 -4
  10. lsst/daf/butler/datastores/fileDatastore.py +80 -38
  11. lsst/daf/butler/dimensions/_group.py +15 -5
  12. lsst/daf/butler/dimensions/_record_set.py +354 -2
  13. lsst/daf/butler/dimensions/record_cache.py +1 -2
  14. lsst/daf/butler/direct_butler/_direct_butler.py +53 -36
  15. lsst/daf/butler/direct_query_driver/_sql_column_visitor.py +9 -0
  16. lsst/daf/butler/queries/_expression_strings.py +17 -0
  17. lsst/daf/butler/queries/_identifiers.py +4 -1
  18. lsst/daf/butler/queries/expression_factory.py +16 -0
  19. lsst/daf/butler/queries/tree/_predicate.py +3 -1
  20. lsst/daf/butler/registry/databases/postgresql.py +50 -0
  21. lsst/daf/butler/registry/databases/sqlite.py +46 -0
  22. lsst/daf/butler/registry/interfaces/_database.py +21 -0
  23. lsst/daf/butler/registry/queries/expressions/_predicate.py +9 -0
  24. lsst/daf/butler/registry/queries/expressions/check.py +10 -0
  25. lsst/daf/butler/registry/queries/expressions/normalForm.py +11 -0
  26. lsst/daf/butler/registry/queries/expressions/parser/exprTree.py +114 -24
  27. lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py +26 -9
  28. lsst/daf/butler/registry/queries/expressions/parser/treeVisitor.py +35 -8
  29. lsst/daf/butler/remote_butler/_remote_butler.py +33 -16
  30. lsst/daf/butler/tests/butler_queries.py +122 -0
  31. lsst/daf/butler/tests/hybrid_butler.py +17 -3
  32. lsst/daf/butler/version.py +1 -1
  33. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/METADATA +1 -1
  34. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/RECORD +42 -41
  35. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/WHEEL +1 -1
  36. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/entry_points.txt +0 -0
  37. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/licenses/COPYRIGHT +0 -0
  38. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/licenses/LICENSE +0 -0
  39. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/licenses/bsd_license.txt +0 -0
  40. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/licenses/gpl-v3.0.txt +0 -0
  41. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/top_level.txt +0 -0
  42. {lsst_daf_butler-29.2025.1700.dist-info → lsst_daf_butler-29.2025.1900.dist-info}/zip-safe +0 -0
@@ -38,6 +38,7 @@ from . import ddl, time_utils
38
38
  from ._butler import *
39
39
  from ._butler_collections import *
40
40
  from ._butler_config import *
41
+ from ._butler_metrics import *
41
42
  from ._butler_repo_index import *
42
43
  from ._collection_type import CollectionType
43
44
  from ._column_categorization import *
@@ -46,6 +46,7 @@ from lsst.utils.logging import getLogger
46
46
  from ._butler_collections import ButlerCollections
47
47
  from ._butler_config import ButlerConfig, ButlerType
48
48
  from ._butler_instance_options import ButlerInstanceOptions
49
+ from ._butler_metrics import ButlerMetrics
49
50
  from ._butler_repo_index import ButlerRepoIndex
50
51
  from ._config import Config, ConfigSubset
51
52
  from ._exceptions import EmptyQueryResultError, InvalidQueryError
@@ -154,6 +155,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
154
155
  writeable: bool | None = None,
155
156
  inferDefaults: bool = True,
156
157
  without_datastore: bool = False,
158
+ metrics: ButlerMetrics | None = None,
157
159
  **kwargs: Any,
158
160
  ) -> Butler:
159
161
  if cls is Butler:
@@ -165,6 +167,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
165
167
  writeable=writeable,
166
168
  inferDefaults=inferDefaults,
167
169
  without_datastore=without_datastore,
170
+ metrics=metrics,
168
171
  **kwargs,
169
172
  )
170
173
 
@@ -183,6 +186,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
183
186
  writeable: bool | None = None,
184
187
  inferDefaults: bool = True,
185
188
  without_datastore: bool = False,
189
+ metrics: ButlerMetrics | None = None,
186
190
  **kwargs: Any,
187
191
  ) -> Butler:
188
192
  """Create butler instance from configuration.
@@ -230,6 +234,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
230
234
  without_datastore : `bool`, optional
231
235
  If `True` do not attach a datastore to this butler. Any attempts
232
236
  to use a datastore will fail.
237
+ metrics : `ButlerMetrics` or `None`, optional
238
+ Metrics object to record butler usage statistics.
233
239
  **kwargs : `Any`
234
240
  Default data ID key-value pairs. These may only identify
235
241
  "governor" dimensions like ``instrument`` and ``skymap``.
@@ -300,6 +306,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
300
306
  # passing the "butler" parameter to its constructor. This has
301
307
  # been moved out of the constructor into Butler.clone().
302
308
  butler = kwargs.pop("butler", None)
309
+ metrics = metrics if metrics is not None else ButlerMetrics()
303
310
  if butler is not None:
304
311
  if not isinstance(butler, Butler):
305
312
  raise TypeError("'butler' parameter must be a Butler instance")
@@ -307,10 +314,17 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
307
314
  raise TypeError(
308
315
  "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
309
316
  )
310
- return butler.clone(collections=collections, run=run, inferDefaults=inferDefaults, dataId=kwargs)
317
+ return butler.clone(
318
+ collections=collections, run=run, inferDefaults=inferDefaults, metrics=metrics, dataId=kwargs
319
+ )
311
320
 
312
321
  options = ButlerInstanceOptions(
313
- collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
322
+ collections=collections,
323
+ run=run,
324
+ writeable=writeable,
325
+ inferDefaults=inferDefaults,
326
+ metrics=metrics,
327
+ kwargs=kwargs,
314
328
  )
315
329
 
316
330
  # Load the Butler configuration. This may involve searching the
@@ -1741,8 +1755,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1741
1755
  warn_limit = True
1742
1756
  with self.query() as query:
1743
1757
  result = (
1744
- query.where(data_id, where, bind=bind, **kwargs)
1745
- .data_ids(dimensions)
1758
+ query.data_ids(dimensions)
1759
+ .where(data_id, where, bind=bind, **kwargs)
1746
1760
  .order_by(*ensure_iterable(order_by))
1747
1761
  .limit(query_limit)
1748
1762
  )
@@ -1979,8 +1993,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1979
1993
  warn_limit = True
1980
1994
  with self.query() as query:
1981
1995
  result = (
1982
- query.where(data_id, where, bind=bind, **kwargs)
1983
- .dimension_records(element)
1996
+ query.dimension_records(element)
1997
+ .where(data_id, where, bind=bind, **kwargs)
1984
1998
  .order_by(*ensure_iterable(order_by))
1985
1999
  .limit(query_limit)
1986
2000
  )
@@ -2123,6 +2137,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2123
2137
  run: str | None | EllipsisType = ...,
2124
2138
  inferDefaults: bool | EllipsisType = ...,
2125
2139
  dataId: dict[str, str] | EllipsisType = ...,
2140
+ metrics: ButlerMetrics | None = None,
2126
2141
  ) -> Butler:
2127
2142
  """Return a new Butler instance connected to the same repository
2128
2143
  as this one, optionally overriding ``collections``, ``run``,
@@ -2142,5 +2157,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2142
2157
  dataId : `str`
2143
2158
  Same as ``kwargs`` passed to the constructor. If omitted, copies
2144
2159
  values from original object.
2160
+ metrics : `ButlerMetrics` or `None`, optional
2161
+ Metrics object to record butler statistics.
2145
2162
  """
2146
2163
  raise NotImplementedError()
@@ -30,6 +30,8 @@ __all__ = ("ButlerInstanceOptions",)
30
30
  import dataclasses
31
31
  from typing import Any
32
32
 
33
+ from ._butler_metrics import ButlerMetrics
34
+
33
35
 
34
36
  @dataclasses.dataclass(frozen=True)
35
37
  class ButlerInstanceOptions:
@@ -43,4 +45,5 @@ class ButlerInstanceOptions:
43
45
  run: str | None = None
44
46
  writeable: bool | None = None
45
47
  inferDefaults: bool = True
48
+ metrics: ButlerMetrics = dataclasses.field(default_factory=ButlerMetrics)
46
49
  kwargs: dict[str, Any] = dataclasses.field(default_factory=dict)
@@ -0,0 +1,117 @@
1
+ # This file is part of daf_butler.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ from collections.abc import Callable, Iterator
31
+ from contextlib import contextmanager
32
+
33
+ from pydantic import BaseModel
34
+
35
+ from lsst.utils.logging import LsstLoggers
36
+ from lsst.utils.timer import time_this
37
+
38
+
39
+ class ButlerMetrics(BaseModel):
40
+ """Metrics collected during Butler operations."""
41
+
42
+ time_in_put: float = 0.0
43
+ """Wall-clock time, in seconds, spent in put()."""
44
+
45
+ time_in_get: float = 0.0
46
+ """Wall-clock time, in seconds, spent in get()."""
47
+
48
+ n_get: int = 0
49
+ """Number of datasets retrieved with get()."""
50
+
51
+ n_put: int = 0
52
+ """Number of datasets stored with put()."""
53
+
54
+ def reset(self) -> None:
55
+ """Reset all metrics."""
56
+ self.time_in_put = 0.0
57
+ self.time_in_get = 0.0
58
+ self.n_get = 0
59
+ self.n_put = 0
60
+
61
+ def increment_get(self, duration: float) -> None:
62
+ """Increment time for get().
63
+
64
+ Parameters
65
+ ----------
66
+ duration : `float`
67
+ Duration to add to the get() statistics.
68
+ """
69
+ self.time_in_get += duration
70
+ self.n_get += 1
71
+
72
+ def increment_put(self, duration: float) -> None:
73
+ """Increment time for put().
74
+
75
+ Parameters
76
+ ----------
77
+ duration : `float`
78
+ Duration to add to the put() statistics.
79
+ """
80
+ self.time_in_put += duration
81
+ self.n_put += 1
82
+
83
+ @contextmanager
84
+ def _timer(
85
+ self, handler: Callable[[float], None], log: LsstLoggers | None = None, msg: str | None = None
86
+ ) -> Iterator[None]:
87
+ with time_this(log=log, msg=msg) as timer:
88
+ yield
89
+ handler(timer.duration)
90
+
91
+ @contextmanager
92
+ def instrument_get(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
93
+ """Run code and increment get statistics.
94
+
95
+ Parameters
96
+ ----------
97
+ log : `logging.Logger` or `None`
98
+ Logger to use for any timing information.
99
+ msg : `str` or `None`
100
+ Any message to be included in log output.
101
+ """
102
+ with self._timer(self.increment_get, log=log, msg=msg):
103
+ yield
104
+
105
+ @contextmanager
106
+ def instrument_put(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
107
+ """Run code and increment put statistics.
108
+
109
+ Parameters
110
+ ----------
111
+ log : `logging.Logger` or `None`
112
+ Logger to use for any timing information.
113
+ msg : `str` or `None`
114
+ Any message to be included in log output.
115
+ """
116
+ with self._timer(self.increment_put, log=log, msg=msg):
117
+ yield
@@ -66,8 +66,11 @@ from ._named import NamedKeyDict
66
66
  from .datastore.stored_file_info import StoredDatastoreItemInfo
67
67
  from .dimensions import (
68
68
  DataCoordinate,
69
+ DimensionDataAttacher,
70
+ DimensionDataExtractor,
69
71
  DimensionGroup,
70
72
  DimensionUniverse,
73
+ SerializableDimensionData,
71
74
  SerializedDataCoordinate,
72
75
  SerializedDataId,
73
76
  )
@@ -907,6 +910,62 @@ class MinimalistSerializableDatasetRef(pydantic.BaseModel):
907
910
  data_id: SerializedDataId
908
911
  """Data coordinate of this dataset."""
909
912
 
913
+ def to_dataset_ref(
914
+ self,
915
+ id: DatasetId,
916
+ *,
917
+ dataset_type: DatasetType,
918
+ universe: DimensionUniverse,
919
+ attacher: DimensionDataAttacher | None = None,
920
+ ) -> DatasetRef:
921
+ """Convert serialized object to a `DatasetRef`.
922
+
923
+ Parameters
924
+ ----------
925
+ id : `DatasetId`
926
+ UUID identifying the dataset.
927
+ dataset_type : `DatasetType`
928
+ `DatasetType` record corresponding to the dataset type name in the
929
+ serialized object.
930
+ universe : `DimensionUniverse`
931
+ Dimension universe for the dataset.
932
+ attacher : `DimensionDataAttacher`, optional
933
+ If provided, will be used to add dimension records to the
934
+ deserialized `DatasetRef` instance.
935
+
936
+ Returns
937
+ -------
938
+ ref : `DatasetRef`
939
+ The deserialized object.
940
+ """
941
+ assert dataset_type.name == self.dataset_type_name, (
942
+ "Given DatasetType does not match the serialized dataset type name"
943
+ )
944
+ simple_data_id = SerializedDataCoordinate(dataId=self.data_id)
945
+ data_id = DataCoordinate.from_simple(simple=simple_data_id, universe=universe)
946
+ if attacher:
947
+ data_ids = attacher.attach(dataset_type.dimensions, [data_id])
948
+ data_id = data_ids[0]
949
+ return DatasetRef(
950
+ id=id,
951
+ run=self.run,
952
+ datasetType=dataset_type,
953
+ dataId=data_id,
954
+ )
955
+
956
+ @staticmethod
957
+ def from_dataset_ref(ref: DatasetRef) -> MinimalistSerializableDatasetRef:
958
+ """Serialize a ``DatasetRef` to a simplified format.
959
+
960
+ Parameters
961
+ ----------
962
+ ref : `DatasetRef`
963
+ `DatasetRef` object to serialize.
964
+ """
965
+ return MinimalistSerializableDatasetRef(
966
+ dataset_type_name=ref.datasetType.name, run=ref.run, data_id=dict(ref.dataId.mapping)
967
+ )
968
+
910
969
 
911
970
  class SerializedDatasetRefContainer(pydantic.BaseModel):
912
971
  """Serializable model for a collection of DatasetRef.
@@ -938,6 +997,9 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
938
997
  compact_refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
939
998
  """Minimal dataset ref information indexed by UUID."""
940
999
 
1000
+ dimension_records: SerializableDimensionData | None = None
1001
+ """Dimension record information"""
1002
+
941
1003
  def __len__(self) -> int:
942
1004
  """Return the number of datasets in the container."""
943
1005
  return len(self.compact_refs)
@@ -957,19 +1019,32 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
957
1019
  universe: DimensionUniverse | None = None
958
1020
  dataset_types: dict[str, SerializedDatasetType] = {}
959
1021
  compact_refs: dict[uuid.UUID, MinimalistSerializableDatasetRef] = {}
1022
+ data_ids: list[DataCoordinate] = []
1023
+ dimensions: list[DimensionGroup] = []
960
1024
  for ref in refs:
961
- simple_ref = ref.to_simple()
962
- dataset_type = simple_ref.datasetType
963
- assert dataset_type is not None # For mypy
964
1025
  if universe is None:
965
1026
  universe = ref.datasetType.dimensions.universe
966
- if (name := dataset_type.name) not in dataset_types:
967
- dataset_types[name] = dataset_type
968
- data_id = simple_ref.dataId
969
- assert data_id is not None # For mypy
970
- compact_refs[simple_ref.id] = MinimalistSerializableDatasetRef(
971
- dataset_type_name=name, run=simple_ref.run, data_id=data_id.dataId
1027
+ if (name := ref.datasetType.name) not in dataset_types:
1028
+ dataset_types[name] = ref.datasetType.to_simple()
1029
+ compact_refs[ref.id] = MinimalistSerializableDatasetRef.from_dataset_ref(ref)
1030
+ if ref.dataId.hasRecords():
1031
+ dimensions.append(ref.datasetType.dimensions)
1032
+ data_ids.append(ref.dataId)
1033
+
1034
+ # Extract dimension record metadata if present.
1035
+ dimension_records = None
1036
+ if data_ids and len(compact_refs) == len(data_ids):
1037
+ dimension_group = DimensionGroup.union(*dimensions, universe=universe)
1038
+
1039
+ # Records were attached to all refs. Store them.
1040
+ extractor = DimensionDataExtractor.from_dimension_group(
1041
+ dimension_group,
1042
+ ignore_cached=False,
1043
+ include_skypix=False,
972
1044
  )
1045
+ extractor.update(data_ids)
1046
+ dimension_records = SerializableDimensionData.from_record_sets(extractor.records.values())
1047
+
973
1048
  if universe:
974
1049
  universe_version = universe.version
975
1050
  universe_namespace = universe.namespace
@@ -982,6 +1057,7 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
982
1057
  universe_namespace=universe_namespace,
983
1058
  dataset_types=dataset_types,
984
1059
  compact_refs=compact_refs,
1060
+ dimension_records=dimension_records,
985
1061
  )
986
1062
 
987
1063
  def to_refs(self, universe: DimensionUniverse) -> list[DatasetRef]:
@@ -1019,15 +1095,22 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
1019
1095
  name: DatasetType.from_simple(dtype, universe=universe)
1020
1096
  for name, dtype in self.dataset_types.items()
1021
1097
  }
1098
+
1099
+ # Dimension records can be attached if available.
1100
+ # We assume that all dimension information was stored.
1101
+ attacher = None
1102
+ if self.dimension_records:
1103
+ attacher = DimensionDataAttacher(
1104
+ deserializers=self.dimension_records.make_deserializers(universe)
1105
+ )
1106
+
1022
1107
  refs: list[DatasetRef] = []
1023
1108
  for id_, minimal in self.compact_refs.items():
1024
- simple_data_id = SerializedDataCoordinate(dataId=minimal.data_id)
1025
- data_id = DataCoordinate.from_simple(simple=simple_data_id, universe=universe)
1026
- ref = DatasetRef(
1027
- id=id_,
1028
- run=minimal.run,
1029
- datasetType=dataset_types[minimal.dataset_type_name],
1030
- dataId=data_id,
1109
+ ref = minimal.to_dataset_ref(
1110
+ id_,
1111
+ dataset_type=dataset_types[minimal.dataset_type_name],
1112
+ universe=universe,
1113
+ attacher=attacher,
1031
1114
  )
1032
1115
  refs.append(ref)
1033
1116
  return refs
@@ -27,15 +27,21 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
- __all__ = ["FileDataset"]
30
+ __all__ = ("FileDataset", "SerializedFileDataset")
31
31
 
32
+ import uuid
33
+ from collections.abc import Callable
32
34
  from dataclasses import dataclass
33
- from typing import Any
35
+ from typing import Any, TypeAlias
36
+
37
+ import pydantic
34
38
 
35
39
  from lsst.resources import ResourcePath, ResourcePathExpression
36
40
 
37
- from ._dataset_ref import DatasetRef
41
+ from ._dataset_ref import DatasetRef, MinimalistSerializableDatasetRef
42
+ from ._dataset_type import DatasetType
38
43
  from ._formatter import FormatterParameter
44
+ from .dimensions import DimensionUniverse
39
45
 
40
46
 
41
47
  @dataclass
@@ -87,3 +93,72 @@ class FileDataset:
87
93
  if not isinstance(other, type(self)):
88
94
  return NotImplemented
89
95
  return str(self.path) < str(other.path)
96
+
97
+ def to_simple(self) -> SerializedFileDataset:
98
+ """
99
+ Convert this instance to a simplified, JSON-serializable object.
100
+
101
+ Returns
102
+ -------
103
+ serialized : `SerializedFileDataset`
104
+ Serializable representation of this `FileDataset` instance.
105
+ """
106
+ if self.formatter is None:
107
+ formatter = None
108
+ elif isinstance(self.formatter, str):
109
+ formatter = self.formatter
110
+ else:
111
+ formatter = self.formatter.name()
112
+
113
+ refs = {ref.id: MinimalistSerializableDatasetRef.from_dataset_ref(ref) for ref in self.refs}
114
+
115
+ return SerializedFileDataset(
116
+ refs=refs,
117
+ path=str(self.path),
118
+ formatter=formatter,
119
+ )
120
+
121
+ @staticmethod
122
+ def from_simple(
123
+ dataset: SerializedFileDataset, *, dataset_type_loader: DatasetTypeLoader, universe: DimensionUniverse
124
+ ) -> FileDataset:
125
+ """
126
+ Deserialize a `SerializedFileDataset` into a `FileDataset`.
127
+
128
+ Parameters
129
+ ----------
130
+ dataset : `SerializedFileDataset`
131
+ Object to deserialize.
132
+ dataset_type_loader : `Callable` [[ `str` ], `DatasetType` ]
133
+ Function that takes a string dataset type name as its
134
+ only parameter, and returns an instance of `DatasetType`.
135
+ Used to deserialize the `DatasetRef` instances contained
136
+ in the serialized `FileDataset`.
137
+ universe : `DimensionUniverse`
138
+ Dimension universe associated with the `Butler` instance that
139
+ created the serialized `FileDataset` instance.
140
+
141
+ Returns
142
+ -------
143
+ file_dataset : `FileDataset`
144
+ Deserialized equivalent of the input dataset.
145
+ """
146
+ refs = [
147
+ ref.to_dataset_ref(id, universe=universe, dataset_type=dataset_type_loader(ref.dataset_type_name))
148
+ for id, ref in dataset.refs.items()
149
+ ]
150
+ return FileDataset(path=dataset.path, refs=refs, formatter=dataset.formatter)
151
+
152
+
153
+ DatasetTypeLoader: TypeAlias = Callable[[str], DatasetType]
154
+ """Type signature for a function that takes a string dataset type name as its
155
+ only parameter, and returns an instance of `DatasetType`.
156
+ """
157
+
158
+
159
+ class SerializedFileDataset(pydantic.BaseModel):
160
+ """Serializable format of `FileDataset` object."""
161
+
162
+ refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
163
+ path: str
164
+ formatter: str | None = None
@@ -31,11 +31,13 @@ __all__ = ("LimitedButler",)
31
31
 
32
32
  import logging
33
33
  from abc import ABC, abstractmethod
34
- from collections.abc import Iterable
34
+ from collections.abc import Iterable, Iterator
35
+ from contextlib import contextmanager
35
36
  from typing import Any, ClassVar
36
37
 
37
38
  from lsst.resources import ResourcePath
38
39
 
40
+ from ._butler_metrics import ButlerMetrics
39
41
  from ._dataset_provenance import DatasetProvenance
40
42
  from ._dataset_ref import DatasetRef
41
43
  from ._deferredDatasetHandle import DeferredDatasetHandle
@@ -138,7 +140,8 @@ class LimitedButler(ABC):
138
140
  to use a resolved `DatasetRef`. Subclasses can support more options.
139
141
  """
140
142
  log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
141
- return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
143
+ with self._metrics.instrument_get(log, msg="Retrieved dataset"):
144
+ return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
142
145
 
143
146
  def getDeferred(
144
147
  self,
@@ -415,6 +418,30 @@ class LimitedButler(ABC):
415
418
  """
416
419
  raise NotImplementedError()
417
420
 
421
+ @contextmanager
422
+ def record_metrics(self, metrics: ButlerMetrics | None = None) -> Iterator[ButlerMetrics]:
423
+ """Enable new metrics recording context.
424
+
425
+ Parameters
426
+ ----------
427
+ metrics : `lsst.daf.butler.ButlerMetrics`
428
+ Optional override metrics object. If given, this will be the
429
+ same object returned by the context manager.
430
+
431
+ Yields
432
+ ------
433
+ metrics : `lsst.daf.butler.ButlerMetrics`
434
+ Metrics recorded within this context. This temporarily replaces
435
+ any existing metrics object associated with this butler.
436
+ """
437
+ old_metrics = self._metrics
438
+ new_metrics = metrics if metrics is not None else ButlerMetrics()
439
+ try:
440
+ self._metrics = new_metrics
441
+ yield new_metrics
442
+ finally:
443
+ self._metrics = old_metrics
444
+
418
445
  @property
419
446
  @abstractmethod
420
447
  def dimensions(self) -> DimensionUniverse:
@@ -430,3 +457,8 @@ class LimitedButler(ABC):
430
457
  """An object that maps known storage class names to objects that fully
431
458
  describe them (`StorageClassFactory`).
432
459
  """
460
+
461
+ _metrics: ButlerMetrics
462
+ """An object for recording metrics associated with this butler.
463
+ (`ButlerMetrics`)
464
+ """
@@ -43,6 +43,7 @@ import pydantic
43
43
  from lsst.resources import ResourcePath, ResourcePathExpression
44
44
 
45
45
  from ._butler_config import ButlerConfig
46
+ from ._butler_metrics import ButlerMetrics
46
47
  from ._config import Config
47
48
  from ._dataset_provenance import DatasetProvenance
48
49
  from ._dataset_ref import DatasetId, DatasetRef
@@ -118,6 +119,8 @@ class QuantumBackedButler(LimitedButler):
118
119
  Object managing all storage class definitions.
119
120
  dataset_types : `~collections.abc.Mapping` [`str`, `DatasetType`]
120
121
  The registry dataset type definitions, indexed by name.
122
+ metrics : `lsst.daf.butler.ButlerMetrics` or `None`, optional
123
+ Metrics object for tracking butler statistics.
121
124
 
122
125
  Notes
123
126
  -----
@@ -164,6 +167,7 @@ class QuantumBackedButler(LimitedButler):
164
167
  datastore: Datastore,
165
168
  storageClasses: StorageClassFactory,
166
169
  dataset_types: Mapping[str, DatasetType] | None = None,
170
+ metrics: ButlerMetrics | None = None,
167
171
  ):
168
172
  self._dimensions = dimensions
169
173
  self._predicted_inputs = set(predicted_inputs)
@@ -175,6 +179,7 @@ class QuantumBackedButler(LimitedButler):
175
179
  self._datastore = datastore
176
180
  self.storageClasses = storageClasses
177
181
  self._dataset_types: Mapping[str, DatasetType] = {}
182
+ self._metrics = metrics if metrics is not None else ButlerMetrics()
178
183
  if dataset_types is not None:
179
184
  self._dataset_types = dataset_types
180
185
  self._datastore.set_retrieve_dataset_type_method(self._retrieve_dataset_type)
@@ -190,6 +195,7 @@ class QuantumBackedButler(LimitedButler):
190
195
  BridgeManagerClass: type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
191
196
  search_paths: list[str] | None = None,
192
197
  dataset_types: Mapping[str, DatasetType] | None = None,
198
+ metrics: ButlerMetrics | None = None,
193
199
  ) -> QuantumBackedButler:
194
200
  """Construct a new `QuantumBackedButler` from repository configuration
195
201
  and helper types.
@@ -219,6 +225,8 @@ class QuantumBackedButler(LimitedButler):
219
225
  dataset_types : `~collections.abc.Mapping` [`str`, `DatasetType`], \
220
226
  optional
221
227
  Mapping of the dataset type name to its registry definition.
228
+ metrics : `lsst.daf.butler.ButlerMetrics` or `None`, optional
229
+ Metrics object for gathering butler statistics.
222
230
  """
223
231
  predicted_inputs = [ref.id for ref in itertools.chain.from_iterable(quantum.inputs.values())]
224
232
  predicted_inputs += [ref.id for ref in quantum.initInputs.values()]
@@ -234,6 +242,7 @@ class QuantumBackedButler(LimitedButler):
234
242
  BridgeManagerClass=BridgeManagerClass,
235
243
  search_paths=search_paths,
236
244
  dataset_types=dataset_types,
245
+ metrics=metrics,
237
246
  )
238
247
 
239
248
  @classmethod
@@ -249,6 +258,7 @@ class QuantumBackedButler(LimitedButler):
249
258
  BridgeManagerClass: type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
250
259
  search_paths: list[str] | None = None,
251
260
  dataset_types: Mapping[str, DatasetType] | None = None,
261
+ metrics: ButlerMetrics | None = None,
252
262
  ) -> QuantumBackedButler:
253
263
  """Construct a new `QuantumBackedButler` from sets of input and output
254
264
  dataset IDs.
@@ -281,6 +291,8 @@ class QuantumBackedButler(LimitedButler):
281
291
  dataset_types : `~collections.abc.Mapping` [`str`, `DatasetType`], \
282
292
  optional
283
293
  Mapping of the dataset type name to its registry definition.
294
+ metrics : `lsst.daf.butler.ButlerMetrics` or `None`, optional
295
+ Metrics object for gathering butler statistics.
284
296
  """
285
297
  return cls._initialize(
286
298
  config=config,
@@ -293,6 +305,7 @@ class QuantumBackedButler(LimitedButler):
293
305
  BridgeManagerClass=BridgeManagerClass,
294
306
  search_paths=search_paths,
295
307
  dataset_types=dataset_types,
308
+ metrics=metrics,
296
309
  )
297
310
 
298
311
  @classmethod
@@ -309,6 +322,7 @@ class QuantumBackedButler(LimitedButler):
309
322
  BridgeManagerClass: type[DatastoreRegistryBridgeManager] = MonolithicDatastoreRegistryBridgeManager,
310
323
  search_paths: list[str] | None = None,
311
324
  dataset_types: Mapping[str, DatasetType] | None = None,
325
+ metrics: ButlerMetrics | None = None,
312
326
  ) -> QuantumBackedButler:
313
327
  """Initialize quantum-backed butler.
314
328
 
@@ -341,6 +355,8 @@ class QuantumBackedButler(LimitedButler):
341
355
  Additional search paths for butler configuration.
342
356
  dataset_types : `~collections.abc.Mapping` [`str`, `DatasetType`]
343
357
  Mapping of the dataset type name to its registry definition.
358
+ metrics : `lsst.daf.butler.ButlerMetrics` or `None`, optional
359
+ Metrics object for gathering butler statistics.
344
360
  """
345
361
  butler_config = ButlerConfig(config, searchPaths=search_paths)
346
362
  butler_root = butler_config.get("root", butler_config.configDir)
@@ -373,6 +389,7 @@ class QuantumBackedButler(LimitedButler):
373
389
  datastore,
374
390
  storageClasses=storageClasses,
375
391
  dataset_types=dataset_types,
392
+ metrics=metrics,
376
393
  )
377
394
 
378
395
  def _retrieve_dataset_type(self, name: str) -> DatasetType | None:
@@ -459,8 +476,9 @@ class QuantumBackedButler(LimitedButler):
459
476
  # Docstring inherited.
460
477
  if ref.id not in self._predicted_outputs:
461
478
  raise RuntimeError("Cannot `put` dataset that was not predicted as an output.")
462
- self._datastore.put(obj, ref, provenance=provenance)
463
- self._actual_output_refs.add(ref)
479
+ with self._metrics.instrument_put(log=_LOG, msg="Put QBB dataset"):
480
+ self._datastore.put(obj, ref, provenance=provenance)
481
+ self._actual_output_refs.add(ref)
464
482
  return ref
465
483
 
466
484
  def pruneDatasets(