lsst-daf-butler 29.0.1rc1__py3-none-any.whl → 29.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. lsst/daf/butler/__init__.py +1 -0
  2. lsst/daf/butler/_butler.py +57 -10
  3. lsst/daf/butler/_butler_collections.py +4 -0
  4. lsst/daf/butler/_butler_instance_options.py +3 -0
  5. lsst/daf/butler/_butler_metrics.py +117 -0
  6. lsst/daf/butler/_config.py +1 -1
  7. lsst/daf/butler/_dataset_ref.py +99 -16
  8. lsst/daf/butler/_file_dataset.py +78 -3
  9. lsst/daf/butler/_limited_butler.py +34 -2
  10. lsst/daf/butler/_quantum_backed.py +23 -4
  11. lsst/daf/butler/arrow_utils.py +7 -9
  12. lsst/daf/butler/cli/butler.py +1 -1
  13. lsst/daf/butler/cli/cmd/_remove_runs.py +2 -0
  14. lsst/daf/butler/cli/cmd/commands.py +25 -1
  15. lsst/daf/butler/cli/utils.py +32 -4
  16. lsst/daf/butler/column_spec.py +77 -34
  17. lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
  18. lsst/daf/butler/configs/storageClasses.yaml +2 -0
  19. lsst/daf/butler/datastore/_datastore.py +30 -4
  20. lsst/daf/butler/datastore/generic_base.py +2 -2
  21. lsst/daf/butler/datastores/chainedDatastore.py +63 -92
  22. lsst/daf/butler/datastores/fileDatastore.py +371 -97
  23. lsst/daf/butler/datastores/inMemoryDatastore.py +33 -5
  24. lsst/daf/butler/dimensions/_coordinate.py +4 -15
  25. lsst/daf/butler/dimensions/_group.py +15 -5
  26. lsst/daf/butler/dimensions/_record_set.py +469 -4
  27. lsst/daf/butler/dimensions/_record_table.py +1 -1
  28. lsst/daf/butler/dimensions/_records.py +127 -6
  29. lsst/daf/butler/dimensions/_universe.py +12 -8
  30. lsst/daf/butler/dimensions/record_cache.py +1 -2
  31. lsst/daf/butler/direct_butler/_direct_butler.py +406 -225
  32. lsst/daf/butler/direct_query_driver/_driver.py +30 -11
  33. lsst/daf/butler/direct_query_driver/_query_builder.py +74 -17
  34. lsst/daf/butler/direct_query_driver/_sql_column_visitor.py +28 -1
  35. lsst/daf/butler/pydantic_utils.py +26 -0
  36. lsst/daf/butler/queries/_expression_strings.py +24 -0
  37. lsst/daf/butler/queries/_identifiers.py +4 -1
  38. lsst/daf/butler/queries/_query.py +48 -1
  39. lsst/daf/butler/queries/expression_factory.py +16 -0
  40. lsst/daf/butler/queries/overlaps.py +1 -1
  41. lsst/daf/butler/{direct_query_driver/_predicate_constraints_summary.py → queries/predicate_constraints_summary.py} +2 -2
  42. lsst/daf/butler/queries/tree/_column_expression.py +39 -0
  43. lsst/daf/butler/queries/tree/_column_set.py +1 -1
  44. lsst/daf/butler/queries/tree/_predicate.py +19 -9
  45. lsst/daf/butler/registry/bridge/ephemeral.py +16 -6
  46. lsst/daf/butler/registry/bridge/monolithic.py +78 -37
  47. lsst/daf/butler/registry/collections/_base.py +23 -6
  48. lsst/daf/butler/registry/connectionString.py +5 -10
  49. lsst/daf/butler/registry/databases/postgresql.py +50 -0
  50. lsst/daf/butler/registry/databases/sqlite.py +46 -0
  51. lsst/daf/butler/registry/datasets/byDimensions/_manager.py +77 -64
  52. lsst/daf/butler/registry/datasets/byDimensions/summaries.py +4 -4
  53. lsst/daf/butler/registry/dimensions/static.py +20 -8
  54. lsst/daf/butler/registry/interfaces/_bridge.py +13 -1
  55. lsst/daf/butler/registry/interfaces/_database.py +21 -0
  56. lsst/daf/butler/registry/interfaces/_datasets.py +4 -16
  57. lsst/daf/butler/registry/interfaces/_dimensions.py +7 -2
  58. lsst/daf/butler/registry/queries/expressions/_predicate.py +35 -19
  59. lsst/daf/butler/registry/queries/expressions/check.py +29 -10
  60. lsst/daf/butler/registry/queries/expressions/normalForm.py +15 -0
  61. lsst/daf/butler/registry/queries/expressions/parser/exprTree.py +136 -23
  62. lsst/daf/butler/registry/queries/expressions/parser/parserLex.py +10 -1
  63. lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py +47 -24
  64. lsst/daf/butler/registry/queries/expressions/parser/treeVisitor.py +49 -10
  65. lsst/daf/butler/registry/sql_registry.py +17 -45
  66. lsst/daf/butler/registry/tests/_registry.py +60 -32
  67. lsst/daf/butler/remote_butler/_http_connection.py +15 -3
  68. lsst/daf/butler/remote_butler/_query_driver.py +5 -7
  69. lsst/daf/butler/remote_butler/_registry.py +3 -2
  70. lsst/daf/butler/remote_butler/_remote_butler.py +50 -27
  71. lsst/daf/butler/remote_butler/server/_config.py +68 -13
  72. lsst/daf/butler/remote_butler/server/_dependencies.py +68 -3
  73. lsst/daf/butler/remote_butler/server/_gafaelfawr.py +125 -0
  74. lsst/daf/butler/remote_butler/server/_server.py +11 -4
  75. lsst/daf/butler/remote_butler/server/_telemetry.py +105 -0
  76. lsst/daf/butler/remote_butler/server/handlers/_external.py +10 -2
  77. lsst/daf/butler/remote_butler/server/handlers/_query_serialization.py +5 -7
  78. lsst/daf/butler/remote_butler/server/handlers/_query_streaming.py +7 -3
  79. lsst/daf/butler/script/ingest_zip.py +13 -1
  80. lsst/daf/butler/script/queryCollections.py +185 -29
  81. lsst/daf/butler/script/removeRuns.py +2 -5
  82. lsst/daf/butler/script/retrieveArtifacts.py +1 -0
  83. lsst/daf/butler/script/transferDatasets.py +5 -0
  84. lsst/daf/butler/tests/butler_queries.py +236 -23
  85. lsst/daf/butler/tests/cliCmdTestBase.py +1 -1
  86. lsst/daf/butler/tests/hybrid_butler.py +37 -8
  87. lsst/daf/butler/tests/hybrid_butler_registry.py +15 -2
  88. lsst/daf/butler/tests/server.py +28 -3
  89. lsst/daf/butler/version.py +1 -1
  90. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/METADATA +1 -1
  91. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/RECORD +99 -96
  92. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/WHEEL +1 -1
  93. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/entry_points.txt +0 -0
  94. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/COPYRIGHT +0 -0
  95. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/LICENSE +0 -0
  96. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/bsd_license.txt +0 -0
  97. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/gpl-v3.0.txt +0 -0
  98. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/top_level.txt +0 -0
  99. {lsst_daf_butler-29.0.1rc1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/zip-safe +0 -0
@@ -38,6 +38,7 @@ from . import ddl, time_utils
38
38
  from ._butler import *
39
39
  from ._butler_collections import *
40
40
  from ._butler_config import *
41
+ from ._butler_metrics import *
41
42
  from ._butler_repo_index import *
42
43
  from ._collection_type import CollectionType
43
44
  from ._column_categorization import *
@@ -46,6 +46,7 @@ from lsst.utils.logging import getLogger
46
46
  from ._butler_collections import ButlerCollections
47
47
  from ._butler_config import ButlerConfig, ButlerType
48
48
  from ._butler_instance_options import ButlerInstanceOptions
49
+ from ._butler_metrics import ButlerMetrics
49
50
  from ._butler_repo_index import ButlerRepoIndex
50
51
  from ._config import Config, ConfigSubset
51
52
  from ._exceptions import EmptyQueryResultError, InvalidQueryError
@@ -89,6 +90,10 @@ class SpecificButlerDataset:
89
90
  dataset: DatasetRef | None
90
91
 
91
92
 
93
+ class _DeprecatedDefault:
94
+ """Default value for a deprecated parameter."""
95
+
96
+
92
97
  class Butler(LimitedButler): # numpydoc ignore=PR02
93
98
  """Interface for data butler and factory for Butler instances.
94
99
 
@@ -154,6 +159,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
154
159
  writeable: bool | None = None,
155
160
  inferDefaults: bool = True,
156
161
  without_datastore: bool = False,
162
+ metrics: ButlerMetrics | None = None,
157
163
  **kwargs: Any,
158
164
  ) -> Butler:
159
165
  if cls is Butler:
@@ -165,6 +171,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
165
171
  writeable=writeable,
166
172
  inferDefaults=inferDefaults,
167
173
  without_datastore=without_datastore,
174
+ metrics=metrics,
168
175
  **kwargs,
169
176
  )
170
177
 
@@ -183,6 +190,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
183
190
  writeable: bool | None = None,
184
191
  inferDefaults: bool = True,
185
192
  without_datastore: bool = False,
193
+ metrics: ButlerMetrics | None = None,
186
194
  **kwargs: Any,
187
195
  ) -> Butler:
188
196
  """Create butler instance from configuration.
@@ -230,6 +238,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
230
238
  without_datastore : `bool`, optional
231
239
  If `True` do not attach a datastore to this butler. Any attempts
232
240
  to use a datastore will fail.
241
+ metrics : `ButlerMetrics` or `None`, optional
242
+ Metrics object to record butler usage statistics.
233
243
  **kwargs : `Any`
234
244
  Default data ID key-value pairs. These may only identify
235
245
  "governor" dimensions like ``instrument`` and ``skymap``.
@@ -300,6 +310,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
300
310
  # passing the "butler" parameter to its constructor. This has
301
311
  # been moved out of the constructor into Butler.clone().
302
312
  butler = kwargs.pop("butler", None)
313
+ metrics = metrics if metrics is not None else ButlerMetrics()
303
314
  if butler is not None:
304
315
  if not isinstance(butler, Butler):
305
316
  raise TypeError("'butler' parameter must be a Butler instance")
@@ -307,10 +318,17 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
307
318
  raise TypeError(
308
319
  "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
309
320
  )
310
- return butler.clone(collections=collections, run=run, inferDefaults=inferDefaults, dataId=kwargs)
321
+ return butler.clone(
322
+ collections=collections, run=run, inferDefaults=inferDefaults, metrics=metrics, dataId=kwargs
323
+ )
311
324
 
312
325
  options = ButlerInstanceOptions(
313
- collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
326
+ collections=collections,
327
+ run=run,
328
+ writeable=writeable,
329
+ inferDefaults=inferDefaults,
330
+ metrics=metrics,
331
+ kwargs=kwargs,
314
332
  )
315
333
 
316
334
  # Load the Butler configuration. This may involve searching the
@@ -1274,7 +1292,13 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1274
1292
  raise NotImplementedError()
1275
1293
 
1276
1294
  @abstractmethod
1277
- def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1295
+ def removeRuns(
1296
+ self,
1297
+ names: Iterable[str],
1298
+ unstore: bool | type[_DeprecatedDefault] = _DeprecatedDefault,
1299
+ *,
1300
+ unlink_from_chains: bool = False,
1301
+ ) -> None:
1278
1302
  """Remove one or more `~CollectionType.RUN` collections and the
1279
1303
  datasets within them.
1280
1304
 
@@ -1287,7 +1311,13 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1287
1311
  they are present, and attempt to rollback the registry deletions if
1288
1312
  datastore deletions fail (which may not always be possible). If
1289
1313
  `False`, datastore records for these datasets are still removed,
1290
- but any artifacts (e.g. files) will not be.
1314
+ but any artifacts (e.g. files) will not be. This parameter is now
1315
+ deprecated and no longer has any effect. Files are always deleted
1316
+ from datastores unless they were ingested using full URIs.
1317
+ unlink_from_chains : `bool`, optional
1318
+ If `True` remove the RUN collection from any chains prior to
1319
+ removing the RUN. If `False` the removal will fail if any chains
1320
+ still refer to the RUN.
1291
1321
 
1292
1322
  Raises
1293
1323
  ------
@@ -1360,7 +1390,14 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1360
1390
  raise NotImplementedError()
1361
1391
 
1362
1392
  @abstractmethod
1363
- def ingest_zip(self, zip_file: ResourcePathExpression, transfer: str = "auto") -> None:
1393
+ def ingest_zip(
1394
+ self,
1395
+ zip_file: ResourcePathExpression,
1396
+ transfer: str = "auto",
1397
+ *,
1398
+ transfer_dimensions: bool = False,
1399
+ dry_run: bool = False,
1400
+ ) -> None:
1364
1401
  """Ingest a Zip file into this butler.
1365
1402
 
1366
1403
  The Zip file must have been created by `retrieve_artifacts_zip`.
@@ -1371,10 +1408,17 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1371
1408
  Path to the Zip file.
1372
1409
  transfer : `str`, optional
1373
1410
  Method to use to transfer the Zip into the datastore.
1411
+ transfer_dimensions : `bool`, optional
1412
+ If `True`, dimension record data associated with the new datasets
1413
+ will be transferred from the Zip file, if present.
1414
+ dry_run : `bool`, optional
1415
+ If `True` the ingest will be processed without any modifications
1416
+ made to the target butler and as if the target butler did not
1417
+ have any of the datasets.
1374
1418
 
1375
1419
  Notes
1376
1420
  -----
1377
- Run collections are created as needed.
1421
+ Run collections and dataset types are created as needed.
1378
1422
  """
1379
1423
  raise NotImplementedError()
1380
1424
 
@@ -1741,8 +1785,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1741
1785
  warn_limit = True
1742
1786
  with self.query() as query:
1743
1787
  result = (
1744
- query.where(data_id, where, bind=bind, **kwargs)
1745
- .data_ids(dimensions)
1788
+ query.data_ids(dimensions)
1789
+ .where(data_id, where, bind=bind, **kwargs)
1746
1790
  .order_by(*ensure_iterable(order_by))
1747
1791
  .limit(query_limit)
1748
1792
  )
@@ -1979,8 +2023,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1979
2023
  warn_limit = True
1980
2024
  with self.query() as query:
1981
2025
  result = (
1982
- query.where(data_id, where, bind=bind, **kwargs)
1983
- .dimension_records(element)
2026
+ query.dimension_records(element)
2027
+ .where(data_id, where, bind=bind, **kwargs)
1984
2028
  .order_by(*ensure_iterable(order_by))
1985
2029
  .limit(query_limit)
1986
2030
  )
@@ -2123,6 +2167,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2123
2167
  run: str | None | EllipsisType = ...,
2124
2168
  inferDefaults: bool | EllipsisType = ...,
2125
2169
  dataId: dict[str, str] | EllipsisType = ...,
2170
+ metrics: ButlerMetrics | None = None,
2126
2171
  ) -> Butler:
2127
2172
  """Return a new Butler instance connected to the same repository
2128
2173
  as this one, optionally overriding ``collections``, ``run``,
@@ -2142,5 +2187,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2142
2187
  dataId : `str`
2143
2188
  Same as ``kwargs`` passed to the constructor. If omitted, copies
2144
2189
  values from original object.
2190
+ metrics : `ButlerMetrics` or `None`, optional
2191
+ Metrics object to record butler statistics.
2145
2192
  """
2146
2193
  raise NotImplementedError()
@@ -108,6 +108,10 @@ class ButlerCollections(ABC, Sequence):
108
108
  """Collection defaults associated with this butler."""
109
109
  raise NotImplementedError("Defaults must be implemented by a subclass")
110
110
 
111
+ def __str__(self) -> str:
112
+ """Return string representation."""
113
+ return f"{self.__class__.__name__}(defaults={self.defaults})"
114
+
111
115
  @abstractmethod
112
116
  def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
113
117
  """Add children to the end of a CHAINED collection.
@@ -30,6 +30,8 @@ __all__ = ("ButlerInstanceOptions",)
30
30
  import dataclasses
31
31
  from typing import Any
32
32
 
33
+ from ._butler_metrics import ButlerMetrics
34
+
33
35
 
34
36
  @dataclasses.dataclass(frozen=True)
35
37
  class ButlerInstanceOptions:
@@ -43,4 +45,5 @@ class ButlerInstanceOptions:
43
45
  run: str | None = None
44
46
  writeable: bool | None = None
45
47
  inferDefaults: bool = True
48
+ metrics: ButlerMetrics = dataclasses.field(default_factory=ButlerMetrics)
46
49
  kwargs: dict[str, Any] = dataclasses.field(default_factory=dict)
@@ -0,0 +1,117 @@
1
+ # This file is part of daf_butler.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ from collections.abc import Callable, Iterator
31
+ from contextlib import contextmanager
32
+
33
+ from pydantic import BaseModel
34
+
35
+ from lsst.utils.logging import LsstLoggers
36
+ from lsst.utils.timer import time_this
37
+
38
+
39
+ class ButlerMetrics(BaseModel):
40
+ """Metrics collected during Butler operations."""
41
+
42
+ time_in_put: float = 0.0
43
+ """Wall-clock time, in seconds, spent in put()."""
44
+
45
+ time_in_get: float = 0.0
46
+ """Wall-clock time, in seconds, spent in get()."""
47
+
48
+ n_get: int = 0
49
+ """Number of datasets retrieved with get()."""
50
+
51
+ n_put: int = 0
52
+ """Number of datasets stored with put()."""
53
+
54
+ def reset(self) -> None:
55
+ """Reset all metrics."""
56
+ self.time_in_put = 0.0
57
+ self.time_in_get = 0.0
58
+ self.n_get = 0
59
+ self.n_put = 0
60
+
61
+ def increment_get(self, duration: float) -> None:
62
+ """Increment time for get().
63
+
64
+ Parameters
65
+ ----------
66
+ duration : `float`
67
+ Duration to add to the get() statistics.
68
+ """
69
+ self.time_in_get += duration
70
+ self.n_get += 1
71
+
72
+ def increment_put(self, duration: float) -> None:
73
+ """Increment time for put().
74
+
75
+ Parameters
76
+ ----------
77
+ duration : `float`
78
+ Duration to add to the put() statistics.
79
+ """
80
+ self.time_in_put += duration
81
+ self.n_put += 1
82
+
83
+ @contextmanager
84
+ def _timer(
85
+ self, handler: Callable[[float], None], log: LsstLoggers | None = None, msg: str | None = None
86
+ ) -> Iterator[None]:
87
+ with time_this(log=log, msg=msg) as timer:
88
+ yield
89
+ handler(timer.duration)
90
+
91
+ @contextmanager
92
+ def instrument_get(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
93
+ """Run code and increment get statistics.
94
+
95
+ Parameters
96
+ ----------
97
+ log : `logging.Logger` or `None`
98
+ Logger to use for any timing information.
99
+ msg : `str` or `None`
100
+ Any message to be included in log output.
101
+ """
102
+ with self._timer(self.increment_get, log=log, msg=msg):
103
+ yield
104
+
105
+ @contextmanager
106
+ def instrument_put(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
107
+ """Run code and increment put statistics.
108
+
109
+ Parameters
110
+ ----------
111
+ log : `logging.Logger` or `None`
112
+ Logger to use for any timing information.
113
+ msg : `str` or `None`
114
+ Any message to be included in log output.
115
+ """
116
+ with self._timer(self.increment_put, log=log, msg=msg):
117
+ yield
@@ -1254,7 +1254,7 @@ class ConfigSubset(Config):
1254
1254
 
1255
1255
  Global defaults, at lowest priority, are found in the ``config``
1256
1256
  directory of the butler source tree. Additional defaults can be
1257
- defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1257
+ defined using the environment variable ``$DAF_BUTLER_CONFIG_PATH``
1258
1258
  which is a PATH-like variable where paths at the front of the list
1259
1259
  have priority over those later.
1260
1260
 
@@ -66,8 +66,11 @@ from ._named import NamedKeyDict
66
66
  from .datastore.stored_file_info import StoredDatastoreItemInfo
67
67
  from .dimensions import (
68
68
  DataCoordinate,
69
+ DimensionDataAttacher,
70
+ DimensionDataExtractor,
69
71
  DimensionGroup,
70
72
  DimensionUniverse,
73
+ SerializableDimensionData,
71
74
  SerializedDataCoordinate,
72
75
  SerializedDataId,
73
76
  )
@@ -907,6 +910,62 @@ class MinimalistSerializableDatasetRef(pydantic.BaseModel):
907
910
  data_id: SerializedDataId
908
911
  """Data coordinate of this dataset."""
909
912
 
913
+ def to_dataset_ref(
914
+ self,
915
+ id: DatasetId,
916
+ *,
917
+ dataset_type: DatasetType,
918
+ universe: DimensionUniverse,
919
+ attacher: DimensionDataAttacher | None = None,
920
+ ) -> DatasetRef:
921
+ """Convert serialized object to a `DatasetRef`.
922
+
923
+ Parameters
924
+ ----------
925
+ id : `DatasetId`
926
+ UUID identifying the dataset.
927
+ dataset_type : `DatasetType`
928
+ `DatasetType` record corresponding to the dataset type name in the
929
+ serialized object.
930
+ universe : `DimensionUniverse`
931
+ Dimension universe for the dataset.
932
+ attacher : `DimensionDataAttacher`, optional
933
+ If provided, will be used to add dimension records to the
934
+ deserialized `DatasetRef` instance.
935
+
936
+ Returns
937
+ -------
938
+ ref : `DatasetRef`
939
+ The deserialized object.
940
+ """
941
+ assert dataset_type.name == self.dataset_type_name, (
942
+ "Given DatasetType does not match the serialized dataset type name"
943
+ )
944
+ simple_data_id = SerializedDataCoordinate(dataId=self.data_id)
945
+ data_id = DataCoordinate.from_simple(simple=simple_data_id, universe=universe)
946
+ if attacher:
947
+ data_ids = attacher.attach(dataset_type.dimensions, [data_id])
948
+ data_id = data_ids[0]
949
+ return DatasetRef(
950
+ id=id,
951
+ run=self.run,
952
+ datasetType=dataset_type,
953
+ dataId=data_id,
954
+ )
955
+
956
+ @staticmethod
957
+ def from_dataset_ref(ref: DatasetRef) -> MinimalistSerializableDatasetRef:
958
+ """Serialize a ``DatasetRef` to a simplified format.
959
+
960
+ Parameters
961
+ ----------
962
+ ref : `DatasetRef`
963
+ `DatasetRef` object to serialize.
964
+ """
965
+ return MinimalistSerializableDatasetRef(
966
+ dataset_type_name=ref.datasetType.name, run=ref.run, data_id=dict(ref.dataId.mapping)
967
+ )
968
+
910
969
 
911
970
  class SerializedDatasetRefContainer(pydantic.BaseModel):
912
971
  """Serializable model for a collection of DatasetRef.
@@ -938,6 +997,9 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
938
997
  compact_refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
939
998
  """Minimal dataset ref information indexed by UUID."""
940
999
 
1000
+ dimension_records: SerializableDimensionData | None = None
1001
+ """Dimension record information"""
1002
+
941
1003
  def __len__(self) -> int:
942
1004
  """Return the number of datasets in the container."""
943
1005
  return len(self.compact_refs)
@@ -957,19 +1019,32 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
957
1019
  universe: DimensionUniverse | None = None
958
1020
  dataset_types: dict[str, SerializedDatasetType] = {}
959
1021
  compact_refs: dict[uuid.UUID, MinimalistSerializableDatasetRef] = {}
1022
+ data_ids: list[DataCoordinate] = []
1023
+ dimensions: list[DimensionGroup] = []
960
1024
  for ref in refs:
961
- simple_ref = ref.to_simple()
962
- dataset_type = simple_ref.datasetType
963
- assert dataset_type is not None # For mypy
964
1025
  if universe is None:
965
1026
  universe = ref.datasetType.dimensions.universe
966
- if (name := dataset_type.name) not in dataset_types:
967
- dataset_types[name] = dataset_type
968
- data_id = simple_ref.dataId
969
- assert data_id is not None # For mypy
970
- compact_refs[simple_ref.id] = MinimalistSerializableDatasetRef(
971
- dataset_type_name=name, run=simple_ref.run, data_id=data_id.dataId
1027
+ if (name := ref.datasetType.name) not in dataset_types:
1028
+ dataset_types[name] = ref.datasetType.to_simple()
1029
+ compact_refs[ref.id] = MinimalistSerializableDatasetRef.from_dataset_ref(ref)
1030
+ if ref.dataId.hasRecords():
1031
+ dimensions.append(ref.datasetType.dimensions)
1032
+ data_ids.append(ref.dataId)
1033
+
1034
+ # Extract dimension record metadata if present.
1035
+ dimension_records = None
1036
+ if data_ids and len(compact_refs) == len(data_ids):
1037
+ dimension_group = DimensionGroup.union(*dimensions, universe=universe)
1038
+
1039
+ # Records were attached to all refs. Store them.
1040
+ extractor = DimensionDataExtractor.from_dimension_group(
1041
+ dimension_group,
1042
+ ignore_cached=False,
1043
+ include_skypix=False,
972
1044
  )
1045
+ extractor.update(data_ids)
1046
+ dimension_records = SerializableDimensionData.from_record_sets(extractor.records.values())
1047
+
973
1048
  if universe:
974
1049
  universe_version = universe.version
975
1050
  universe_namespace = universe.namespace
@@ -982,6 +1057,7 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
982
1057
  universe_namespace=universe_namespace,
983
1058
  dataset_types=dataset_types,
984
1059
  compact_refs=compact_refs,
1060
+ dimension_records=dimension_records,
985
1061
  )
986
1062
 
987
1063
  def to_refs(self, universe: DimensionUniverse) -> list[DatasetRef]:
@@ -1019,15 +1095,22 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
1019
1095
  name: DatasetType.from_simple(dtype, universe=universe)
1020
1096
  for name, dtype in self.dataset_types.items()
1021
1097
  }
1098
+
1099
+ # Dimension records can be attached if available.
1100
+ # We assume that all dimension information was stored.
1101
+ attacher = None
1102
+ if self.dimension_records:
1103
+ attacher = DimensionDataAttacher(
1104
+ deserializers=self.dimension_records.make_deserializers(universe)
1105
+ )
1106
+
1022
1107
  refs: list[DatasetRef] = []
1023
1108
  for id_, minimal in self.compact_refs.items():
1024
- simple_data_id = SerializedDataCoordinate(dataId=minimal.data_id)
1025
- data_id = DataCoordinate.from_simple(simple=simple_data_id, universe=universe)
1026
- ref = DatasetRef(
1027
- id=id_,
1028
- run=minimal.run,
1029
- datasetType=dataset_types[minimal.dataset_type_name],
1030
- dataId=data_id,
1109
+ ref = minimal.to_dataset_ref(
1110
+ id_,
1111
+ dataset_type=dataset_types[minimal.dataset_type_name],
1112
+ universe=universe,
1113
+ attacher=attacher,
1031
1114
  )
1032
1115
  refs.append(ref)
1033
1116
  return refs
@@ -27,15 +27,21 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
- __all__ = ["FileDataset"]
30
+ __all__ = ("FileDataset", "SerializedFileDataset")
31
31
 
32
+ import uuid
33
+ from collections.abc import Callable
32
34
  from dataclasses import dataclass
33
- from typing import Any
35
+ from typing import Any, TypeAlias
36
+
37
+ import pydantic
34
38
 
35
39
  from lsst.resources import ResourcePath, ResourcePathExpression
36
40
 
37
- from ._dataset_ref import DatasetRef
41
+ from ._dataset_ref import DatasetRef, MinimalistSerializableDatasetRef
42
+ from ._dataset_type import DatasetType
38
43
  from ._formatter import FormatterParameter
44
+ from .dimensions import DimensionUniverse
39
45
 
40
46
 
41
47
  @dataclass
@@ -87,3 +93,72 @@ class FileDataset:
87
93
  if not isinstance(other, type(self)):
88
94
  return NotImplemented
89
95
  return str(self.path) < str(other.path)
96
+
97
+ def to_simple(self) -> SerializedFileDataset:
98
+ """
99
+ Convert this instance to a simplified, JSON-serializable object.
100
+
101
+ Returns
102
+ -------
103
+ serialized : `SerializedFileDataset`
104
+ Serializable representation of this `FileDataset` instance.
105
+ """
106
+ if self.formatter is None:
107
+ formatter = None
108
+ elif isinstance(self.formatter, str):
109
+ formatter = self.formatter
110
+ else:
111
+ formatter = self.formatter.name()
112
+
113
+ refs = {ref.id: MinimalistSerializableDatasetRef.from_dataset_ref(ref) for ref in self.refs}
114
+
115
+ return SerializedFileDataset(
116
+ refs=refs,
117
+ path=str(self.path),
118
+ formatter=formatter,
119
+ )
120
+
121
+ @staticmethod
122
+ def from_simple(
123
+ dataset: SerializedFileDataset, *, dataset_type_loader: DatasetTypeLoader, universe: DimensionUniverse
124
+ ) -> FileDataset:
125
+ """
126
+ Deserialize a `SerializedFileDataset` into a `FileDataset`.
127
+
128
+ Parameters
129
+ ----------
130
+ dataset : `SerializedFileDataset`
131
+ Object to deserialize.
132
+ dataset_type_loader : `Callable` [[ `str` ], `DatasetType` ]
133
+ Function that takes a string dataset type name as its
134
+ only parameter, and returns an instance of `DatasetType`.
135
+ Used to deserialize the `DatasetRef` instances contained
136
+ in the serialized `FileDataset`.
137
+ universe : `DimensionUniverse`
138
+ Dimension universe associated with the `Butler` instance that
139
+ created the serialized `FileDataset` instance.
140
+
141
+ Returns
142
+ -------
143
+ file_dataset : `FileDataset`
144
+ Deserialized equivalent of the input dataset.
145
+ """
146
+ refs = [
147
+ ref.to_dataset_ref(id, universe=universe, dataset_type=dataset_type_loader(ref.dataset_type_name))
148
+ for id, ref in dataset.refs.items()
149
+ ]
150
+ return FileDataset(path=dataset.path, refs=refs, formatter=dataset.formatter)
151
+
152
+
153
+ DatasetTypeLoader: TypeAlias = Callable[[str], DatasetType]
154
+ """Type signature for a function that takes a string dataset type name as its
155
+ only parameter, and returns an instance of `DatasetType`.
156
+ """
157
+
158
+
159
+ class SerializedFileDataset(pydantic.BaseModel):
160
+ """Serializable format of `FileDataset` object."""
161
+
162
+ refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
163
+ path: str
164
+ formatter: str | None = None
@@ -31,11 +31,13 @@ __all__ = ("LimitedButler",)
31
31
 
32
32
  import logging
33
33
  from abc import ABC, abstractmethod
34
- from collections.abc import Iterable
34
+ from collections.abc import Iterable, Iterator
35
+ from contextlib import contextmanager
35
36
  from typing import Any, ClassVar
36
37
 
37
38
  from lsst.resources import ResourcePath
38
39
 
40
+ from ._butler_metrics import ButlerMetrics
39
41
  from ._dataset_provenance import DatasetProvenance
40
42
  from ._dataset_ref import DatasetRef
41
43
  from ._deferredDatasetHandle import DeferredDatasetHandle
@@ -138,7 +140,8 @@ class LimitedButler(ABC):
138
140
  to use a resolved `DatasetRef`. Subclasses can support more options.
139
141
  """
140
142
  log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
141
- return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
143
+ with self._metrics.instrument_get(log, msg="Retrieved dataset"):
144
+ return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
142
145
 
143
146
  def getDeferred(
144
147
  self,
@@ -415,6 +418,30 @@ class LimitedButler(ABC):
415
418
  """
416
419
  raise NotImplementedError()
417
420
 
421
+ @contextmanager
422
+ def record_metrics(self, metrics: ButlerMetrics | None = None) -> Iterator[ButlerMetrics]:
423
+ """Enable new metrics recording context.
424
+
425
+ Parameters
426
+ ----------
427
+ metrics : `lsst.daf.butler.ButlerMetrics`
428
+ Optional override metrics object. If given, this will be the
429
+ same object returned by the context manager.
430
+
431
+ Yields
432
+ ------
433
+ metrics : `lsst.daf.butler.ButlerMetrics`
434
+ Metrics recorded within this context. This temporarily replaces
435
+ any existing metrics object associated with this butler.
436
+ """
437
+ old_metrics = self._metrics
438
+ new_metrics = metrics if metrics is not None else ButlerMetrics()
439
+ try:
440
+ self._metrics = new_metrics
441
+ yield new_metrics
442
+ finally:
443
+ self._metrics = old_metrics
444
+
418
445
  @property
419
446
  @abstractmethod
420
447
  def dimensions(self) -> DimensionUniverse:
@@ -430,3 +457,8 @@ class LimitedButler(ABC):
430
457
  """An object that maps known storage class names to objects that fully
431
458
  describe them (`StorageClassFactory`).
432
459
  """
460
+
461
+ _metrics: ButlerMetrics
462
+ """An object for recording metrics associated with this butler.
463
+ (`ButlerMetrics`)
464
+ """