lsst-daf-butler 29.2025.4500__py3-none-any.whl → 29.2025.4800__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. lsst/daf/butler/__init__.py +0 -3
  2. lsst/daf/butler/_butler.py +52 -5
  3. lsst/daf/butler/_dataset_provenance.py +44 -9
  4. lsst/daf/butler/_labeled_butler_factory.py +47 -30
  5. lsst/daf/butler/_quantum_backed.py +12 -2
  6. lsst/daf/butler/_utilities/thread_safe_cache.py +13 -0
  7. lsst/daf/butler/cli/cmd/commands.py +6 -3
  8. lsst/daf/butler/configs/datastores/formatters.yaml +3 -2
  9. lsst/daf/butler/configs/datastores/writeRecipes.yaml +6 -6
  10. lsst/daf/butler/configs/storageClasses.yaml +2 -0
  11. lsst/daf/butler/delegates/arrowtable.py +3 -1
  12. lsst/daf/butler/dimensions/_schema.py +2 -24
  13. lsst/daf/butler/direct_butler/_direct_butler.py +54 -36
  14. lsst/daf/butler/formatters/logs.py +2 -2
  15. lsst/daf/butler/logging.py +289 -109
  16. lsst/daf/butler/queries/_expression_strings.py +3 -3
  17. lsst/daf/butler/{registry/queries → queries}/expressions/__init__.py +0 -2
  18. lsst/daf/butler/queries/expressions/categorize.py +59 -0
  19. lsst/daf/butler/registry/_registry.py +1 -2
  20. lsst/daf/butler/registry/_registry_base.py +1 -2
  21. lsst/daf/butler/registry/collections/nameKey.py +20 -3
  22. lsst/daf/butler/registry/collections/synthIntKey.py +20 -6
  23. lsst/daf/butler/registry/datasets/byDimensions/_dataset_type_cache.py +12 -52
  24. lsst/daf/butler/registry/datasets/byDimensions/_manager.py +97 -87
  25. lsst/daf/butler/registry/datasets/byDimensions/tables.py +17 -12
  26. lsst/daf/butler/registry/interfaces/_collections.py +45 -8
  27. lsst/daf/butler/registry/interfaces/_datasets.py +10 -8
  28. lsst/daf/butler/registry/interfaces/_obscore.py +0 -5
  29. lsst/daf/butler/registry/managers.py +0 -22
  30. lsst/daf/butler/registry/obscore/_manager.py +0 -9
  31. lsst/daf/butler/registry/queries/__init__.py +0 -5
  32. lsst/daf/butler/registry/queries/_query_datasets.py +2 -2
  33. lsst/daf/butler/registry/sql_registry.py +35 -10
  34. lsst/daf/butler/registry/tests/_database.py +2 -2
  35. lsst/daf/butler/registry/tests/_registry.py +110 -3
  36. lsst/daf/butler/remote_butler/_remote_butler.py +17 -1
  37. lsst/daf/butler/remote_butler/server/handlers/_external.py +11 -0
  38. lsst/daf/butler/remote_butler/server_models.py +8 -0
  39. lsst/daf/butler/script/_associate.py +13 -15
  40. lsst/daf/butler/script/_pruneDatasets.py +32 -31
  41. lsst/daf/butler/script/butlerImport.py +11 -12
  42. lsst/daf/butler/script/certifyCalibrations.py +19 -19
  43. lsst/daf/butler/script/collectionChain.py +27 -28
  44. lsst/daf/butler/script/configValidate.py +11 -9
  45. lsst/daf/butler/script/exportCalibs.py +60 -61
  46. lsst/daf/butler/script/ingest_files.py +13 -14
  47. lsst/daf/butler/script/ingest_zip.py +2 -3
  48. lsst/daf/butler/script/queryCollections.py +141 -138
  49. lsst/daf/butler/script/queryDataIds.py +80 -77
  50. lsst/daf/butler/script/queryDatasetTypes.py +24 -21
  51. lsst/daf/butler/script/queryDatasets.py +4 -14
  52. lsst/daf/butler/script/queryDimensionRecords.py +61 -60
  53. lsst/daf/butler/script/register_dataset_type.py +15 -14
  54. lsst/daf/butler/script/removeCollections.py +39 -34
  55. lsst/daf/butler/script/removeDatasetType.py +2 -2
  56. lsst/daf/butler/script/removeRuns.py +3 -5
  57. lsst/daf/butler/script/retrieveArtifacts.py +30 -27
  58. lsst/daf/butler/script/transferDatasets.py +29 -28
  59. lsst/daf/butler/tests/hybrid_butler.py +8 -1
  60. lsst/daf/butler/tests/server.py +4 -5
  61. lsst/daf/butler/version.py +1 -1
  62. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/METADATA +1 -2
  63. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/RECORD +80 -93
  64. lsst/daf/butler/_column_categorization.py +0 -83
  65. lsst/daf/butler/_column_tags.py +0 -210
  66. lsst/daf/butler/_column_type_info.py +0 -180
  67. lsst/daf/butler/registry/queries/_query_backend.py +0 -529
  68. lsst/daf/butler/registry/queries/_query_context.py +0 -474
  69. lsst/daf/butler/registry/queries/_readers.py +0 -348
  70. lsst/daf/butler/registry/queries/_sql_query_backend.py +0 -184
  71. lsst/daf/butler/registry/queries/_sql_query_context.py +0 -555
  72. lsst/daf/butler/registry/queries/butler_sql_engine.py +0 -226
  73. lsst/daf/butler/registry/queries/expressions/_predicate.py +0 -538
  74. lsst/daf/butler/registry/queries/expressions/categorize.py +0 -339
  75. lsst/daf/butler/registry/queries/expressions/check.py +0 -540
  76. lsst/daf/butler/registry/queries/expressions/normalForm.py +0 -1186
  77. lsst/daf/butler/registry/queries/find_first_dataset.py +0 -102
  78. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/__init__.py +0 -0
  79. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/exprTree.py +0 -0
  80. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/parser.py +0 -0
  81. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/parserLex.py +0 -0
  82. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/parserYacc.py +0 -0
  83. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/ply/__init__.py +0 -0
  84. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/ply/lex.py +0 -0
  85. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/ply/yacc.py +0 -0
  86. /lsst/daf/butler/{registry/queries → queries}/expressions/parser/treeVisitor.py +0 -0
  87. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/WHEEL +0 -0
  88. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/entry_points.txt +0 -0
  89. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/licenses/COPYRIGHT +0 -0
  90. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/licenses/LICENSE +0 -0
  91. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/licenses/bsd_license.txt +0 -0
  92. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/licenses/gpl-v3.0.txt +0 -0
  93. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/top_level.txt +0 -0
  94. {lsst_daf_butler-29.2025.4500.dist-info → lsst_daf_butler-29.2025.4800.dist-info}/zip-safe +0 -0
@@ -41,9 +41,6 @@ from ._butler_config import *
41
41
  from ._butler_metrics import *
42
42
  from ._butler_repo_index import *
43
43
  from ._collection_type import CollectionType
44
- from ._column_categorization import *
45
- from ._column_tags import *
46
- from ._column_type_info import *
47
44
  from ._config import *
48
45
  from ._config_support import LookupKey
49
46
  from ._dataset_association import *
@@ -36,7 +36,7 @@ from abc import abstractmethod
36
36
  from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
37
37
  from contextlib import AbstractContextManager
38
38
  from types import EllipsisType
39
- from typing import TYPE_CHECKING, Any, TextIO
39
+ from typing import TYPE_CHECKING, Any, Literal, Self, TextIO
40
40
 
41
41
  from lsst.resources import ResourcePath, ResourcePathExpression
42
42
  from lsst.utils import doImportType
@@ -94,7 +94,7 @@ class _DeprecatedDefault:
94
94
  """Default value for a deprecated parameter."""
95
95
 
96
96
 
97
- class Butler(LimitedButler): # numpydoc ignore=PR02
97
+ class Butler(LimitedButler, AbstractContextManager): # numpydoc ignore=PR02
98
98
  """Interface for data butler and factory for Butler instances.
99
99
 
100
100
  Parameters
@@ -358,6 +358,16 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
358
358
  case _:
359
359
  raise TypeError(f"Unknown Butler type '{butler_type}'")
360
360
 
361
+ def __enter__(self) -> Self:
362
+ return self
363
+
364
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
365
+ try:
366
+ self.close()
367
+ except Exception:
368
+ _LOG.exception("An exception occured during Butler.close()")
369
+ return False
370
+
361
371
  @staticmethod
362
372
  def makeRepo(
363
373
  root: ResourcePathExpression,
@@ -506,9 +516,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
506
516
  # Create Registry and populate tables
507
517
  registryConfig = RegistryConfig(config.get("registry"))
508
518
  dimensionConfig = DimensionConfig(dimensionConfig)
509
- _RegistryFactory(registryConfig).create_from_config(
519
+ registry = _RegistryFactory(registryConfig).create_from_config(
510
520
  dimensionConfig=dimensionConfig, butlerRoot=root_uri
511
521
  )
522
+ registry.close()
512
523
 
513
524
  _LOG.verbose("Wrote new Butler configuration file to %s", configURI)
514
525
 
@@ -1012,7 +1023,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1012
1023
  @abstractmethod
1013
1024
  def get_dataset(
1014
1025
  self,
1015
- id: DatasetId,
1026
+ id: DatasetId | str,
1016
1027
  *,
1017
1028
  storage_class: str | StorageClass | None = None,
1018
1029
  dimension_records: bool = False,
@@ -1023,7 +1034,8 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1023
1034
  Parameters
1024
1035
  ----------
1025
1036
  id : `DatasetId`
1026
- The unique identifier for the dataset.
1037
+ The unique identifier for the dataset, as an instance of
1038
+ `uuid.UUID` or a string containing a hexadecimal number.
1027
1039
  storage_class : `str` or `StorageClass` or `None`
1028
1040
  A storage class to use when creating the returned entry. If given
1029
1041
  it must be compatible with the default storage class.
@@ -1040,6 +1052,26 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1040
1052
  """
1041
1053
  raise NotImplementedError()
1042
1054
 
1055
+ @abstractmethod
1056
+ def get_many_datasets(self, ids: Iterable[DatasetId | str]) -> list[DatasetRef]:
1057
+ """Retrieve a list of dataset entries.
1058
+
1059
+ Parameters
1060
+ ----------
1061
+ ids : `~collections.abc.Iterable` [ `DatasetId` or `str` ]
1062
+ The unique identifiers for the datasets, as instances of
1063
+ `uuid.UUID` or strings containing a hexadecimal number.
1064
+
1065
+ Returns
1066
+ -------
1067
+ refs : `list` [ `DatasetRef` ]
1068
+ A list containing a `DatasetRef` for each of the given dataset IDs.
1069
+ If a dataset was not found, no error is thrown -- it is just not
1070
+ included in the list. The returned datasets are in no particular
1071
+ order.
1072
+ """
1073
+ raise NotImplementedError()
1074
+
1043
1075
  @abstractmethod
1044
1076
  def find_dataset(
1045
1077
  self,
@@ -2201,3 +2233,18 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2201
2233
  Metrics object to record butler statistics.
2202
2234
  """
2203
2235
  raise NotImplementedError()
2236
+
2237
+ @abstractmethod
2238
+ def close(self) -> None:
2239
+ """Release all resources associated with this Butler instance. The
2240
+ instance may no longer be used after this is called.
2241
+
2242
+ Notes
2243
+ -----
2244
+ Instead of calling ``close()``directly, you can use the Butler object
2245
+ as a context manager. For example::
2246
+ with Butler(...) as butler:
2247
+ butler.get(...)
2248
+ # butler is closed after exiting the block.
2249
+ """
2250
+ raise NotImplementedError()
@@ -112,6 +112,8 @@ class DatasetProvenance(pydantic.BaseModel):
112
112
  sep: str = ".",
113
113
  simple_types: bool = False,
114
114
  use_upper: bool | None = None,
115
+ max_inputs: int | None = None,
116
+ store_minimalist_inputs: bool = False,
115
117
  ) -> dict[str, _PROV_TYPES]:
116
118
  """Return provenance as a flattened dictionary.
117
119
 
@@ -137,6 +139,13 @@ class DatasetProvenance(pydantic.BaseModel):
137
139
  character of the prefix (defined by whether `str.isupper()` returns
138
140
  true, else they will be lower case). If `False` the case will be
139
141
  lower case, and if `True` the case will be upper case.
142
+ max_inputs : `int` or `None`, optional
143
+ Maximum number of inputs to be recorded in provenance. `None`
144
+ results in all inputs being recorded. If the number of inputs
145
+ exceeds this value no input provenance will be recorded.
146
+ store_minimalist_inputs : `bool`, optional
147
+ If `True` only the ID of the input is stored along with explicit
148
+ extras. If `False` the run and dataset type are also recorded.
140
149
 
141
150
  Returns
142
151
  -------
@@ -155,7 +164,13 @@ class DatasetProvenance(pydantic.BaseModel):
155
164
 
156
165
  Each input dataset will have the ``id``, ``run``, and ``datasettype``
157
166
  keys as defined above (but no ``dataid`` key) with an ``input N``
158
- prefix where ``N`` starts counting at 0.
167
+ prefix where ``N`` starts counting at 0. It is possible to drop
168
+ the ``datasettype`` and ``run`` to save space by using the
169
+ ``store_minimalist_inputs`` flag.
170
+
171
+ If there are too many inputs (see the ``max_inputs`` parameters)
172
+ no inputs will be recorded. The number of inputs is always recorded
173
+ to indicate that the inputs were dropped.
159
174
 
160
175
  The quantum ID, if present, will use key ``quantum``.
161
176
 
@@ -171,6 +186,7 @@ class DatasetProvenance(pydantic.BaseModel):
171
186
  "lsst.butler.dataid.detector": 10,
172
187
  "lsst.butler.dataid.instrument": "LSSTCam",
173
188
  "lsst.butler.quantum": "d93a735b-08f0-477d-bc95-2cc32d6d898b",
189
+ "lsst.butler.n_inputs": 2,
174
190
  "lsst.butler.input.0.id": "3dfd7ba5-5e35-4565-9d87-4b33880ed06c",
175
191
  "lsst.butler.input.0.run": "other_run",
176
192
  "lsst.butler.input.0.datasettype": "astropy_parquet",
@@ -206,12 +222,28 @@ class DatasetProvenance(pydantic.BaseModel):
206
222
  if self.quantum_id is not None:
207
223
  prov[_make_key("quantum")] = self.quantum_id if not simple_types else str(self.quantum_id)
208
224
 
209
- for i, input in enumerate(self.inputs):
225
+ # Record the number of inputs so that people can determine how many
226
+ # there were even if they were dropped because they exceeded the
227
+ # allowed maximum. Do not record the count if we have a null provenance
228
+ # state with no ref and no inputs.
229
+ if ref is not None or len(self.inputs) > 0:
230
+ prov[_make_key("n_inputs")] = len(self.inputs)
231
+
232
+ # Remove all inputs if the maximum is found. Truncating to the
233
+ # maximum (or auto switching to minimalist mode and increasing the
234
+ # maximum by 3) is not preferred.
235
+ inputs = (
236
+ self.inputs
237
+ if max_inputs is None or (max_inputs is not None and len(self.inputs) <= max_inputs)
238
+ else []
239
+ )
240
+ for i, input in enumerate(inputs):
210
241
  prov[_make_key("input", i, "id")] = input.id if not simple_types else str(input.id)
211
- if input.run is not None: # for mypy
212
- prov[_make_key("input", i, "run")] = input.run
213
- if input.datasetType is not None: # for mypy
214
- prov[_make_key("input", i, "datasettype")] = input.datasetType.name
242
+ if not store_minimalist_inputs:
243
+ if input.run is not None: # for mypy
244
+ prov[_make_key("input", i, "run")] = input.run
245
+ if input.datasetType is not None: # for mypy
246
+ prov[_make_key("input", i, "datasettype")] = input.datasetType.name
215
247
 
216
248
  if input.id in self.extras:
217
249
  for xk, xv in self.extras[input.id].items():
@@ -369,7 +401,9 @@ class DatasetProvenance(pydantic.BaseModel):
369
401
  # Prefix will always include the separator if it is defined.
370
402
  prefix += sep
371
403
 
372
- core_provenance = tuple(f"{prefix}{k}".lower() for k in ("run", "id", "datasettype", "quantum"))
404
+ core_provenance = tuple(
405
+ f"{prefix}{k}".lower() for k in ("run", "id", "datasettype", "quantum", "n_inputs")
406
+ )
373
407
 
374
408
  # Need to escape the prefix and separator for regex usage.
375
409
  esc_sep = re.escape(sep)
@@ -445,7 +479,7 @@ class DatasetProvenance(pydantic.BaseModel):
445
479
 
446
480
  quantum_id = None
447
481
  ref_id = None
448
- input_ids = {}
482
+ input_ids: dict[int, uuid.UUID] = {}
449
483
  extras: dict[int, dict[str, Any]] = {}
450
484
 
451
485
  for k, standard in prov_keys.items():
@@ -475,8 +509,9 @@ class DatasetProvenance(pydantic.BaseModel):
475
509
 
476
510
  provenance = cls(quantum_id=quantum_id)
477
511
 
512
+ input_refs = {ref.id: ref for ref in butler.get_many_datasets(input_ids.values())}
478
513
  for i in sorted(input_ids):
479
- input_ref = butler.get_dataset(input_ids[i])
514
+ input_ref = input_refs.get(input_ids[i])
480
515
  if input_ref is None:
481
516
  raise ValueError(f"Input dataset ({input_ids[i]}) is not known to this butler.")
482
517
  provenance.add_input(input_ref)
@@ -25,9 +25,11 @@
25
25
  # You should have received a copy of the GNU General Public License
26
26
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
 
28
+ from __future__ import annotations
29
+
28
30
  __all__ = ("LabeledButlerFactory", "LabeledButlerFactoryProtocol")
29
31
 
30
- from collections.abc import Callable, Mapping
32
+ from collections.abc import Mapping
31
33
  from typing import Protocol
32
34
 
33
35
  from lsst.resources import ResourcePathExpression
@@ -38,10 +40,6 @@ from ._butler_repo_index import ButlerRepoIndex
38
40
  from ._utilities.named_locks import NamedLocks
39
41
  from ._utilities.thread_safe_cache import ThreadSafeCache
40
42
 
41
- _FactoryFunction = Callable[[str | None], Butler]
42
- """Function that takes an access token string or `None`, and returns a Butler
43
- instance."""
44
-
45
43
 
46
44
  class LabeledButlerFactoryProtocol(Protocol):
47
45
  """Callable to retrieve a butler from a label."""
@@ -84,7 +82,7 @@ class LabeledButlerFactory:
84
82
  else:
85
83
  self._repositories = dict(repositories)
86
84
 
87
- self._factories = ThreadSafeCache[str, _FactoryFunction]()
85
+ self._factories = ThreadSafeCache[str, _ButlerFactory]()
88
86
  self._initialization_locks = NamedLocks()
89
87
 
90
88
  # This may be overridden by unit tests.
@@ -138,10 +136,18 @@ class LabeledButlerFactory:
138
136
  based on the end user instead of the service. See
139
137
  https://gafaelfawr.lsst.io/user-guide/gafaelfawringress.html#requesting-delegated-tokens
140
138
  """
141
- factory = self._get_or_create_butler_factory_function(label)
142
- return factory(access_token)
139
+ factory = self._get_or_create_butler_factory(label)
140
+ return factory.create_butler(access_token)
141
+
142
+ def close(self) -> None:
143
+ """Reset the factory cache, and release any resources associated with
144
+ the cached instances.
145
+ """
146
+ factories = self._factories.clear()
147
+ for factory in factories.values():
148
+ factory.close()
143
149
 
144
- def _get_or_create_butler_factory_function(self, label: str) -> _FactoryFunction:
150
+ def _get_or_create_butler_factory(self, label: str) -> _ButlerFactory:
145
151
  # We maintain a separate lock per label. We only want to instantiate
146
152
  # one factory function per label, because creating the factory sets up
147
153
  # shared state that should only exist once per repository. However, we
@@ -154,16 +160,16 @@ class LabeledButlerFactory:
154
160
  factory = self._create_butler_factory_function(label)
155
161
  return self._factories.set_or_get(label, factory)
156
162
 
157
- def _create_butler_factory_function(self, label: str) -> _FactoryFunction:
163
+ def _create_butler_factory_function(self, label: str) -> _ButlerFactory:
158
164
  config_uri = self._get_config_uri(label)
159
165
  config = ButlerConfig(config_uri)
160
166
  butler_type = config.get_butler_type()
161
167
 
162
168
  match butler_type:
163
169
  case ButlerType.DIRECT:
164
- return _create_direct_butler_factory(config, self._preload_unsafe_direct_butler_caches)
170
+ return _DirectButlerFactory(config, self._preload_unsafe_direct_butler_caches)
165
171
  case ButlerType.REMOTE:
166
- return _create_remote_butler_factory(config)
172
+ return _RemoteButlerFactory(config)
167
173
  case _:
168
174
  raise TypeError(f"Unknown butler type '{butler_type}' for label '{label}'")
169
175
 
@@ -177,34 +183,45 @@ class LabeledButlerFactory:
177
183
  return config_uri
178
184
 
179
185
 
180
- def _create_direct_butler_factory(config: ButlerConfig, preload_unsafe_caches: bool) -> _FactoryFunction:
181
- import lsst.daf.butler.direct_butler
186
+ class _ButlerFactory(Protocol):
187
+ def create_butler(self, access_token: str | None) -> Butler: ...
188
+ def close(self) -> None: ...
189
+
190
+
191
+ class _DirectButlerFactory(_ButlerFactory):
192
+ def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool) -> None:
193
+ import lsst.daf.butler.direct_butler
182
194
 
183
- # Create a 'template' Butler that will be cloned when callers request an
184
- # instance.
185
- butler = Butler.from_config(config)
186
- assert isinstance(butler, lsst.daf.butler.direct_butler.DirectButler)
195
+ # Create a 'template' Butler that will be cloned when callers request
196
+ # an instance.
197
+ self._butler = Butler.from_config(config)
198
+ assert isinstance(self._butler, lsst.daf.butler.direct_butler.DirectButler)
187
199
 
188
- # Load caches so that data is available in cloned instances without
189
- # needing to refetch it from the database for every instance.
190
- butler._preload_cache(load_dimension_record_cache=preload_unsafe_caches)
200
+ # Load caches so that data is available in cloned instances without
201
+ # needing to refetch it from the database for every instance.
202
+ self._butler._preload_cache(load_dimension_record_cache=preload_unsafe_caches)
191
203
 
192
- def create_butler(access_token: str | None) -> Butler:
204
+ def create_butler(self, access_token: str | None) -> Butler:
193
205
  # Access token is ignored because DirectButler does not use Gafaelfawr
194
206
  # authentication.
195
- return butler.clone()
207
+ return self._butler.clone()
196
208
 
197
- return create_butler
209
+ def close(self) -> None:
210
+ self._butler.close()
198
211
 
199
212
 
200
- def _create_remote_butler_factory(config: ButlerConfig) -> _FactoryFunction:
201
- import lsst.daf.butler.remote_butler._factory
213
+ class _RemoteButlerFactory(_ButlerFactory):
214
+ def __init__(self, config: ButlerConfig) -> None:
215
+ import lsst.daf.butler.remote_butler._factory
202
216
 
203
- factory = lsst.daf.butler.remote_butler._factory.RemoteButlerFactory.create_factory_from_config(config)
217
+ self._factory = lsst.daf.butler.remote_butler._factory.RemoteButlerFactory.create_factory_from_config(
218
+ config
219
+ )
204
220
 
205
- def create_butler(access_token: str | None) -> Butler:
221
+ def create_butler(self, access_token: str | None) -> Butler:
206
222
  if access_token is None:
207
223
  raise ValueError("Access token is required to connect to a Butler server")
208
- return factory.create_butler_for_access_token(access_token)
224
+ return self._factory.create_butler_for_access_token(access_token)
209
225
 
210
- return create_butler
226
+ def close(self) -> None:
227
+ pass
@@ -55,7 +55,7 @@ from .datastore import Datastore
55
55
  from .datastore.record_data import DatastoreRecordData, SerializedDatastoreRecordData
56
56
  from .datastores.file_datastore.retrieve_artifacts import retrieve_and_zip
57
57
  from .dimensions import DimensionUniverse
58
- from .registry.interfaces import DatastoreRegistryBridgeManager, OpaqueTableStorageManager
58
+ from .registry.interfaces import Database, DatastoreRegistryBridgeManager, OpaqueTableStorageManager
59
59
 
60
60
  if TYPE_CHECKING:
61
61
  from ._butler import Butler
@@ -83,6 +83,9 @@ class QuantumBackedButler(LimitedButler):
83
83
  The registry dataset type definitions, indexed by name.
84
84
  metrics : `lsst.daf.butler.ButlerMetrics` or `None`, optional
85
85
  Metrics object for tracking butler statistics.
86
+ database : `Database`, optional
87
+ Database instance used by datastore. Not required -- only provided
88
+ to allow database connections to be closed during cleanup.
86
89
 
87
90
  Notes
88
91
  -----
@@ -130,6 +133,7 @@ class QuantumBackedButler(LimitedButler):
130
133
  storageClasses: StorageClassFactory,
131
134
  dataset_types: Mapping[str, DatasetType] | None = None,
132
135
  metrics: ButlerMetrics | None = None,
136
+ database: Database | None = None,
133
137
  ):
134
138
  self._dimensions = dimensions
135
139
  self._predicted_inputs = set(predicted_inputs)
@@ -142,6 +146,7 @@ class QuantumBackedButler(LimitedButler):
142
146
  self.storageClasses = storageClasses
143
147
  self._dataset_types: Mapping[str, DatasetType] = {}
144
148
  self._metrics = metrics if metrics is not None else ButlerMetrics()
149
+ self._database = database
145
150
  if dataset_types is not None:
146
151
  self._dataset_types = dataset_types
147
152
  self._datastore.set_retrieve_dataset_type_method(self._retrieve_dataset_type)
@@ -321,7 +326,7 @@ class QuantumBackedButler(LimitedButler):
321
326
  Metrics object for gathering butler statistics.
322
327
  """
323
328
  butler_config = ButlerConfig(config, searchPaths=search_paths)
324
- datastore, _ = instantiate_standalone_datastore(
329
+ datastore, database = instantiate_standalone_datastore(
325
330
  butler_config, dimensions, filename, OpaqueManagerClass, BridgeManagerClass
326
331
  )
327
332
 
@@ -342,8 +347,13 @@ class QuantumBackedButler(LimitedButler):
342
347
  storageClasses=storageClasses,
343
348
  dataset_types=dataset_types,
344
349
  metrics=metrics,
350
+ database=database,
345
351
  )
346
352
 
353
+ def close(self) -> None:
354
+ if self._database is not None:
355
+ self._database.dispose()
356
+
347
357
  def _retrieve_dataset_type(self, name: str) -> DatasetType | None:
348
358
  """Return DatasetType defined in registry given dataset type name."""
349
359
  return self._dataset_types.get(name)
@@ -76,3 +76,16 @@ class ThreadSafeCache(Generic[TKey, TValue]):
76
76
  """
77
77
  with self._mutex:
78
78
  return self._cache.setdefault(key, value)
79
+
80
+ def clear(self) -> dict[TKey, TValue]:
81
+ """Clear the cache.
82
+
83
+ Returns
84
+ -------
85
+ old_cache : `dict`
86
+ The values that were contained in the cache prior to clearing it.
87
+ """
88
+ with self._mutex:
89
+ old = self._cache
90
+ self._cache = {}
91
+ return old
@@ -33,6 +33,7 @@ from typing import Any
33
33
  import click
34
34
 
35
35
  from ... import script
36
+ from ..._butler import Butler
36
37
  from ..opt import (
37
38
  collection_argument,
38
39
  collection_type_option,
@@ -487,9 +488,11 @@ def remove_dataset_type(*args: Any, **kwargs: Any) -> None:
487
488
  @options_file_option()
488
489
  def query_datasets(**kwargs: Any) -> None:
489
490
  """List the datasets in a repository."""
490
- for table in script.QueryDatasets(**kwargs).getTables():
491
- print("")
492
- table.pprint_all()
491
+ repo = kwargs.pop("repo")
492
+ with Butler.from_config(repo, writeable=False) as butler:
493
+ for table in script.QueryDatasets(butler=butler, **kwargs).getTables():
494
+ print("")
495
+ table.pprint_all()
493
496
  print("")
494
497
 
495
498
 
@@ -93,8 +93,9 @@ MultipleCellCoadd: lsst.cell_coadds.CellCoaddFitsFormatter
93
93
  NNModelPackagePayload: lsst.meas.transiNet.modelPackages.NNModelPackageFormatter
94
94
  Timespan: lsst.daf.butler.formatters.json.JsonFormatter
95
95
  RegionTimeInfo: lsst.daf.butler.formatters.json.JsonFormatter
96
- QPEnsemble: lsst.meas.pz.qp_formatter.QPFormatter
97
- PZModel: lsst.meas.pz.model_formatter.ModelFormatter
96
+ QPEnsemble: lsst.meas.photoz.base.qp_formatter.QPFormatter
97
+ PZModel: lsst.meas.photoz.base.model_formatter.ModelFormatter
98
+ PhotozModel: lsst.meas.photoz.base.model_formatter.ModelFormatter
98
99
  VisitBackgroundModel: lsst.daf.butler.formatters.json.JsonFormatter
99
100
  VignettingCorrection: lsst.ts.observatory.control.utils.extras.vignetting_storage.VignettingCorrectionFormatter
100
101
  SSPAuxiliaryFile: lsst.pipe.tasks.sspAuxiliaryFile.SSPAuxiliaryFileFormatter
@@ -12,19 +12,19 @@ lsst.obs.base.formatters.fitsExposure.StandardFitsImageFormatterBase: &StandardF
12
12
  variance:
13
13
  <<: *losslessOptions
14
14
 
15
- # Basic lossy (quantizing) compression
16
- lossyBasic: &lossyBasic
17
- image: &lossyBasicOptions
15
+ # Lossy (quantizing) compression with noise sigma subdivided into 16.
16
+ lossy16:
17
+ image: &lossy16Options
18
18
  algorithm: RICE_1
19
19
  quantization:
20
20
  dither: SUBTRACTIVE_DITHER_2
21
21
  scaling: STDEV_MASKED
22
- mask_planes: ["NO_DATA"]
23
- level: 10.0
22
+ mask_planes: ["NO_DATA", "INTRP"]
23
+ level: 16.0
24
24
  mask:
25
25
  <<: *losslessOptions
26
26
  variance:
27
- <<: *lossyBasicOptions
27
+ <<: *lossy16Options
28
28
 
29
29
  # Set the default
30
30
  default:
@@ -433,6 +433,8 @@ storageClasses:
433
433
  pytype: qp.Ensemble
434
434
  PZModel:
435
435
  pytype: rail.core.model.Model
436
+ PhotozModel:
437
+ pytype: rail.core.model.Model
436
438
  VisitBackgroundModel:
437
439
  pytype: lsst.drp.tasks.fit_visit_background.VisitBackgroundModel
438
440
  VignettingCorrection:
@@ -229,7 +229,9 @@ def _add_arrow_provenance(
229
229
  type_string = _checkArrowCompatibleType(in_memory_dataset)
230
230
  if type_string == "astropy":
231
231
  provenance = provenance if provenance is not None else DatasetProvenance()
232
- prov_dict = provenance.to_flat_dict(ref, prefix="LSST.BUTLER", sep=".", simple_types=True)
232
+ prov_dict = provenance.to_flat_dict(
233
+ ref, prefix="LSST.BUTLER", sep=".", simple_types=True, max_inputs=2000
234
+ )
233
235
 
234
236
  # Strip any previous provenance.
235
237
  DatasetProvenance.strip_provenance_from_flat_dict(in_memory_dataset.meta)
@@ -29,20 +29,17 @@ from __future__ import annotations
29
29
  __all__ = ("DimensionRecordSchema", "addDimensionForeignKey")
30
30
 
31
31
  import copy
32
- from collections.abc import Mapping, Set
32
+ from collections.abc import Set
33
33
  from typing import TYPE_CHECKING
34
34
 
35
- from lsst.utils.classes import cached_getter, immutable
35
+ from lsst.utils.classes import immutable
36
36
 
37
37
  from .. import arrow_utils, ddl
38
- from .._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag
39
38
  from .._named import NamedValueAbstractSet, NamedValueSet
40
39
  from ..column_spec import RegionColumnSpec, TimespanColumnSpec
41
40
  from ..timespan_database_representation import TimespanDatabaseRepresentation
42
41
 
43
42
  if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
44
- from lsst.daf.relation import ColumnTag
45
-
46
43
  from ._elements import Dimension, DimensionElement, KeyColumnSpec, MetadataColumnSpec
47
44
  from ._group import DimensionGroup
48
45
 
@@ -387,25 +384,6 @@ class DimensionElementFields:
387
384
  lines.append(" timespan: lsst.daf.butler.Timespan")
388
385
  return "\n".join(lines)
389
386
 
390
- @property
391
- @cached_getter
392
- def columns(self) -> Mapping[ColumnTag, str]:
393
- """A mapping from `ColumnTag` to field name for all fields in this
394
- element's records (`~collections.abc.Mapping`).
395
- """
396
- result: dict[ColumnTag, str] = {}
397
- for dimension_name, field_name in zip(
398
- self.element.dimensions.names, self.dimensions.names, strict=True
399
- ):
400
- result[DimensionKeyColumnTag(dimension_name)] = field_name
401
- for field_name in self.facts.names:
402
- result[DimensionRecordColumnTag(self.element.name, field_name)] = field_name
403
- if self.element.spatial:
404
- result[DimensionRecordColumnTag(self.element.name, "region")] = "region"
405
- if self.element.temporal:
406
- result[DimensionRecordColumnTag(self.element.name, "timespan")] = "timespan"
407
- return result
408
-
409
387
  element: DimensionElement
410
388
  """The dimension element these fields correspond to.
411
389