PyPI - lsst-daf-butler - Versions diffs - 29.0.1__py3-none-any.whl → 29.1.0rc1__py3-none-any.whl - Mend

lsst-daf-butler 29.0.1py3-none-any.whl → 29.1.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

lsst/daf/butler/__init__.py +1 -0
lsst/daf/butler/_butler.py +57 -10
lsst/daf/butler/_butler_collections.py +4 -0
lsst/daf/butler/_butler_instance_options.py +3 -0
lsst/daf/butler/_butler_metrics.py +117 -0
lsst/daf/butler/_config.py +1 -1
lsst/daf/butler/_dataset_ref.py +99 -16
lsst/daf/butler/_file_dataset.py +78 -3
lsst/daf/butler/_limited_butler.py +34 -2
lsst/daf/butler/_quantum_backed.py +23 -4
lsst/daf/butler/arrow_utils.py +7 -9
lsst/daf/butler/cli/butler.py +1 -1
lsst/daf/butler/cli/cmd/_remove_runs.py +2 -0
lsst/daf/butler/cli/cmd/commands.py +25 -1
lsst/daf/butler/cli/utils.py +32 -4
lsst/daf/butler/column_spec.py +77 -34
lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
lsst/daf/butler/configs/storageClasses.yaml +2 -0
lsst/daf/butler/datastore/_datastore.py +30 -4
lsst/daf/butler/datastore/generic_base.py +2 -2
lsst/daf/butler/datastores/chainedDatastore.py +63 -92
lsst/daf/butler/datastores/fileDatastore.py +371 -97
lsst/daf/butler/datastores/inMemoryDatastore.py +33 -5
lsst/daf/butler/dimensions/_coordinate.py +4 -15
lsst/daf/butler/dimensions/_group.py +15 -5
lsst/daf/butler/dimensions/_record_set.py +469 -4
lsst/daf/butler/dimensions/_record_table.py +1 -1
lsst/daf/butler/dimensions/_records.py +127 -6
lsst/daf/butler/dimensions/_universe.py +12 -8
lsst/daf/butler/dimensions/record_cache.py +1 -2
lsst/daf/butler/direct_butler/_direct_butler.py +406 -225
lsst/daf/butler/direct_query_driver/_driver.py +30 -11
lsst/daf/butler/direct_query_driver/_query_builder.py +74 -17
lsst/daf/butler/direct_query_driver/_sql_column_visitor.py +28 -1
lsst/daf/butler/pydantic_utils.py +26 -0
lsst/daf/butler/queries/_expression_strings.py +24 -0
lsst/daf/butler/queries/_identifiers.py +4 -1
lsst/daf/butler/queries/_query.py +48 -1
lsst/daf/butler/queries/expression_factory.py +16 -0
lsst/daf/butler/queries/overlaps.py +1 -1
lsst/daf/butler/{direct_query_driver/_predicate_constraints_summary.py → queries/predicate_constraints_summary.py} +2 -2
lsst/daf/butler/queries/tree/_column_expression.py +39 -0
lsst/daf/butler/queries/tree/_column_set.py +1 -1
lsst/daf/butler/queries/tree/_predicate.py +19 -9
lsst/daf/butler/registry/bridge/ephemeral.py +16 -6
lsst/daf/butler/registry/bridge/monolithic.py +78 -37
lsst/daf/butler/registry/collections/_base.py +23 -6
lsst/daf/butler/registry/connectionString.py +5 -10
lsst/daf/butler/registry/databases/postgresql.py +50 -0
lsst/daf/butler/registry/databases/sqlite.py +46 -0
lsst/daf/butler/registry/datasets/byDimensions/_manager.py +77 -64
lsst/daf/butler/registry/datasets/byDimensions/summaries.py +4 -4
lsst/daf/butler/registry/dimensions/static.py +20 -8
lsst/daf/butler/registry/interfaces/_bridge.py +13 -1
lsst/daf/butler/registry/interfaces/_database.py +21 -0
lsst/daf/butler/registry/interfaces/_datasets.py +4 -16
lsst/daf/butler/registry/interfaces/_dimensions.py +7 -2
lsst/daf/butler/registry/queries/expressions/_predicate.py +35 -19
lsst/daf/butler/registry/queries/expressions/check.py +29 -10
lsst/daf/butler/registry/queries/expressions/normalForm.py +15 -0
lsst/daf/butler/registry/queries/expressions/parser/exprTree.py +136 -23
lsst/daf/butler/registry/queries/expressions/parser/parserLex.py +10 -1
lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py +47 -24
lsst/daf/butler/registry/queries/expressions/parser/treeVisitor.py +49 -10
lsst/daf/butler/registry/sql_registry.py +17 -45
lsst/daf/butler/registry/tests/_registry.py +60 -32
lsst/daf/butler/remote_butler/_http_connection.py +15 -3
lsst/daf/butler/remote_butler/_query_driver.py +5 -7
lsst/daf/butler/remote_butler/_registry.py +3 -2
lsst/daf/butler/remote_butler/_remote_butler.py +50 -27
lsst/daf/butler/remote_butler/server/_config.py +68 -13
lsst/daf/butler/remote_butler/server/_dependencies.py +68 -3
lsst/daf/butler/remote_butler/server/_gafaelfawr.py +125 -0
lsst/daf/butler/remote_butler/server/_server.py +11 -4
lsst/daf/butler/remote_butler/server/_telemetry.py +105 -0
lsst/daf/butler/remote_butler/server/handlers/_external.py +10 -2
lsst/daf/butler/remote_butler/server/handlers/_query_serialization.py +5 -7
lsst/daf/butler/remote_butler/server/handlers/_query_streaming.py +7 -3
lsst/daf/butler/script/ingest_zip.py +13 -1
lsst/daf/butler/script/queryCollections.py +185 -29
lsst/daf/butler/script/removeRuns.py +2 -5
lsst/daf/butler/script/retrieveArtifacts.py +1 -0
lsst/daf/butler/script/transferDatasets.py +5 -0
lsst/daf/butler/tests/butler_queries.py +236 -23
lsst/daf/butler/tests/cliCmdTestBase.py +1 -1
lsst/daf/butler/tests/hybrid_butler.py +37 -8
lsst/daf/butler/tests/hybrid_butler_registry.py +15 -2
lsst/daf/butler/tests/server.py +28 -3
lsst/daf/butler/version.py +1 -1
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/METADATA +1 -1
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/RECORD +99 -96
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/WHEEL +1 -1
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/entry_points.txt +0 -0
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/COPYRIGHT +0 -0
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/LICENSE +0 -0
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/bsd_license.txt +0 -0
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/licenses/gpl-v3.0.txt +0 -0
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/top_level.txt +0 -0
{lsst_daf_butler-29.0.1.dist-info → lsst_daf_butler-29.1.0rc1.dist-info}/zip-safe +0 -0

lsst/daf/butler/__init__.py CHANGED Viewed

@@ -38,6 +38,7 @@ from . import ddl, time_utils
 from ._butler import *
 from ._butler_collections import *
 from ._butler_config import *
+from ._butler_metrics import *
 from ._butler_repo_index import *
 from ._collection_type import CollectionType
 from ._column_categorization import *

lsst/daf/butler/_butler.py CHANGED Viewed

@@ -46,6 +46,7 @@ from lsst.utils.logging import getLogger
 from ._butler_collections import ButlerCollections
 from ._butler_config import ButlerConfig, ButlerType
 from ._butler_instance_options import ButlerInstanceOptions
+from ._butler_metrics import ButlerMetrics
 from ._butler_repo_index import ButlerRepoIndex
 from ._config import Config, ConfigSubset
 from ._exceptions import EmptyQueryResultError, InvalidQueryError
@@ -89,6 +90,10 @@ class SpecificButlerDataset:
     dataset: DatasetRef | None
+class _DeprecatedDefault:
+    """Default value for a deprecated parameter."""
 class Butler(LimitedButler):  # numpydoc ignore=PR02
     """Interface for data butler and factory for Butler instances.
@@ -154,6 +159,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         writeable: bool | None = None,
         inferDefaults: bool = True,
         without_datastore: bool = False,
+        metrics: ButlerMetrics | None = None,
         **kwargs: Any,
     ) -> Butler:
         if cls is Butler:
@@ -165,6 +171,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
                 writeable=writeable,
                 inferDefaults=inferDefaults,
                 without_datastore=without_datastore,
+                metrics=metrics,
                 **kwargs,
             )
@@ -183,6 +190,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         writeable: bool | None = None,
         inferDefaults: bool = True,
         without_datastore: bool = False,
+        metrics: ButlerMetrics | None = None,
         **kwargs: Any,
     ) -> Butler:
         """Create butler instance from configuration.
@@ -230,6 +238,8 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         without_datastore : `bool`, optional
             If `True` do not attach a datastore to this butler. Any attempts
             to use a datastore will fail.
+        metrics : `ButlerMetrics` or `None`, optional
+            Metrics object to record butler usage statistics.
         **kwargs : `Any`
             Default data ID key-value pairs.  These may only identify
             "governor" dimensions like ``instrument`` and ``skymap``.
@@ -300,6 +310,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         # passing the "butler" parameter to its constructor.  This has
         # been moved out of the constructor into Butler.clone().
         butler = kwargs.pop("butler", None)
+        metrics = metrics if metrics is not None else ButlerMetrics()
         if butler is not None:
             if not isinstance(butler, Butler):
                 raise TypeError("'butler' parameter must be a Butler instance")
@@ -307,10 +318,17 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
                 raise TypeError(
                     "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
                 )
-            return butler.clone(collections=collections, run=run, inferDefaults=inferDefaults, dataId=kwargs)
+            return butler.clone(
+                collections=collections, run=run, inferDefaults=inferDefaults, metrics=metrics, dataId=kwargs
+            )
         options = ButlerInstanceOptions(
-            collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
+            collections=collections,
+            run=run,
+            writeable=writeable,
+            inferDefaults=inferDefaults,
+            metrics=metrics,
+            kwargs=kwargs,
         )
         # Load the Butler configuration.  This may involve searching the
@@ -1274,7 +1292,13 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         raise NotImplementedError()
     @abstractmethod
-    def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
+    def removeRuns(
+        self,
+        names: Iterable[str],
+        unstore: bool | type[_DeprecatedDefault] = _DeprecatedDefault,
+        *,
+        unlink_from_chains: bool = False,
+    ) -> None:
         """Remove one or more `~CollectionType.RUN` collections and the
         datasets within them.
@@ -1287,7 +1311,13 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
             they are present, and attempt to rollback the registry deletions if
             datastore deletions fail (which may not always be possible).  If
             `False`, datastore records for these datasets are still removed,
-            but any artifacts (e.g. files) will not be.
+            but any artifacts (e.g. files) will not be. This parameter is now
+            deprecated and no longer has any effect. Files are always deleted
+            from datastores unless they were ingested using full URIs.
+        unlink_from_chains : `bool`, optional
+            If `True` remove the RUN collection from any chains prior to
+            removing the RUN. If `False` the removal will fail if any chains
+            still refer to the RUN.
         Raises
         ------
@@ -1360,7 +1390,14 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         raise NotImplementedError()
     @abstractmethod
-    def ingest_zip(self, zip_file: ResourcePathExpression, transfer: str = "auto") -> None:
+    def ingest_zip(
+        self,
+        zip_file: ResourcePathExpression,
+        transfer: str = "auto",
+        *,
+        transfer_dimensions: bool = False,
+        dry_run: bool = False,
+    ) -> None:
         """Ingest a Zip file into this butler.
         The Zip file must have been created by `retrieve_artifacts_zip`.
@@ -1371,10 +1408,17 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
             Path to the Zip file.
         transfer : `str`, optional
             Method to use to transfer the Zip into the datastore.
+        transfer_dimensions : `bool`, optional
+            If `True`, dimension record data associated with the new datasets
+            will be transferred from the Zip file, if present.
+        dry_run : `bool`, optional
+            If `True` the ingest will be processed without any modifications
+            made to the target butler and as if the target butler did not
+            have any of the datasets.
         Notes
         -----
-        Run collections are created as needed.
+        Run collections and dataset types are created as needed.
         """
         raise NotImplementedError()
@@ -1741,8 +1785,8 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
             warn_limit = True
         with self.query() as query:
             result = (
-                query.where(data_id, where, bind=bind, **kwargs)
-                .data_ids(dimensions)
+                query.data_ids(dimensions)
+                .where(data_id, where, bind=bind, **kwargs)
                 .order_by(*ensure_iterable(order_by))
                 .limit(query_limit)
             )
@@ -1979,8 +2023,8 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
             warn_limit = True
         with self.query() as query:
             result = (
-                query.where(data_id, where, bind=bind, **kwargs)
-                .dimension_records(element)
+                query.dimension_records(element)
+                .where(data_id, where, bind=bind, **kwargs)
                 .order_by(*ensure_iterable(order_by))
                 .limit(query_limit)
             )
@@ -2123,6 +2167,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         run: str | None | EllipsisType = ...,
         inferDefaults: bool | EllipsisType = ...,
         dataId: dict[str, str] | EllipsisType = ...,
+        metrics: ButlerMetrics | None = None,
     ) -> Butler:
         """Return a new Butler instance connected to the same repository
         as this one, optionally overriding ``collections``, ``run``,
@@ -2142,5 +2187,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         dataId : `str`
             Same as ``kwargs`` passed to the constructor.  If omitted, copies
             values from original object.
+        metrics : `ButlerMetrics` or `None`, optional
+            Metrics object to record butler statistics.
         """
         raise NotImplementedError()

lsst/daf/butler/_butler_collections.py CHANGED Viewed

@@ -108,6 +108,10 @@ class ButlerCollections(ABC, Sequence):
         """Collection defaults associated with this butler."""
         raise NotImplementedError("Defaults must be implemented by a subclass")
+    def __str__(self) -> str:
+        """Return string representation."""
+        return f"{self.__class__.__name__}(defaults={self.defaults})"
     @abstractmethod
     def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
         """Add children to the end of a CHAINED collection.

lsst/daf/butler/_butler_instance_options.py CHANGED Viewed

@@ -30,6 +30,8 @@ __all__ = ("ButlerInstanceOptions",)
 import dataclasses
 from typing import Any
+from ._butler_metrics import ButlerMetrics
 @dataclasses.dataclass(frozen=True)
 class ButlerInstanceOptions:
@@ -43,4 +45,5 @@ class ButlerInstanceOptions:
     run: str | None = None
     writeable: bool | None = None
     inferDefaults: bool = True
+    metrics: ButlerMetrics = dataclasses.field(default_factory=ButlerMetrics)
     kwargs: dict[str, Any] = dataclasses.field(default_factory=dict)

lsst/daf/butler/_butler_metrics.py ADDED Viewed

@@ -0,0 +1,117 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from __future__ import annotations
+from collections.abc import Callable, Iterator
+from contextlib import contextmanager
+from pydantic import BaseModel
+from lsst.utils.logging import LsstLoggers
+from lsst.utils.timer import time_this
+class ButlerMetrics(BaseModel):
+    """Metrics collected during Butler operations."""
+    time_in_put: float = 0.0
+    """Wall-clock time, in seconds, spent in put()."""
+    time_in_get: float = 0.0
+    """Wall-clock time, in seconds, spent in get()."""
+    n_get: int = 0
+    """Number of datasets retrieved with get()."""
+    n_put: int = 0
+    """Number of datasets stored with put()."""
+    def reset(self) -> None:
+        """Reset all metrics."""
+        self.time_in_put = 0.0
+        self.time_in_get = 0.0
+        self.n_get = 0
+        self.n_put = 0
+    def increment_get(self, duration: float) -> None:
+        """Increment time for get().
+        Parameters
+        ----------
+        duration : `float`
+            Duration to add to the get() statistics.
+        """
+        self.time_in_get += duration
+        self.n_get += 1
+    def increment_put(self, duration: float) -> None:
+        """Increment time for put().
+        Parameters
+        ----------
+        duration : `float`
+            Duration to add to the put() statistics.
+        """
+        self.time_in_put += duration
+        self.n_put += 1
+    @contextmanager
+    def _timer(
+        self, handler: Callable[[float], None], log: LsstLoggers | None = None, msg: str | None = None
+    ) -> Iterator[None]:
+        with time_this(log=log, msg=msg) as timer:
+            yield
+        handler(timer.duration)
+    @contextmanager
+    def instrument_get(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
+        """Run code and increment get statistics.
+        Parameters
+        ----------
+        log : `logging.Logger` or `None`
+            Logger to use for any timing information.
+        msg : `str` or `None`
+            Any message to be included in log output.
+        """
+        with self._timer(self.increment_get, log=log, msg=msg):
+            yield
+    @contextmanager
+    def instrument_put(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
+        """Run code and increment put statistics.
+        Parameters
+        ----------
+        log : `logging.Logger` or `None`
+            Logger to use for any timing information.
+        msg : `str` or `None`
+            Any message to be included in log output.
+        """
+        with self._timer(self.increment_put, log=log, msg=msg):
+            yield

lsst/daf/butler/_config.py CHANGED Viewed

@@ -1254,7 +1254,7 @@ class ConfigSubset(Config):
         Global defaults, at lowest priority, are found in the ``config``
         directory of the butler source tree. Additional defaults can be
-        defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
+        defined using the environment variable ``$DAF_BUTLER_CONFIG_PATH``
         which is a PATH-like variable where paths at the front of the list
         have priority over those later.

lsst/daf/butler/_dataset_ref.py CHANGED Viewed

@@ -66,8 +66,11 @@ from ._named import NamedKeyDict
 from .datastore.stored_file_info import StoredDatastoreItemInfo
 from .dimensions import (
     DataCoordinate,
+    DimensionDataAttacher,
+    DimensionDataExtractor,
     DimensionGroup,
     DimensionUniverse,
+    SerializableDimensionData,
     SerializedDataCoordinate,
     SerializedDataId,
 )
@@ -907,6 +910,62 @@ class MinimalistSerializableDatasetRef(pydantic.BaseModel):
     data_id: SerializedDataId
     """Data coordinate of this dataset."""
+    def to_dataset_ref(
+        self,
+        id: DatasetId,
+        *,
+        dataset_type: DatasetType,
+        universe: DimensionUniverse,
+        attacher: DimensionDataAttacher | None = None,
+    ) -> DatasetRef:
+        """Convert serialized object to a `DatasetRef`.
+        Parameters
+        ----------
+        id : `DatasetId`
+            UUID identifying the dataset.
+        dataset_type : `DatasetType`
+            `DatasetType` record corresponding to the dataset type name in the
+            serialized object.
+        universe : `DimensionUniverse`
+            Dimension universe for the dataset.
+        attacher : `DimensionDataAttacher`, optional
+            If provided, will be used to add dimension records to the
+            deserialized `DatasetRef` instance.
+        Returns
+        -------
+        ref : `DatasetRef`
+            The deserialized object.
+        """
+        assert dataset_type.name == self.dataset_type_name, (
+            "Given DatasetType does not match the serialized dataset type name"
+        )
+        simple_data_id = SerializedDataCoordinate(dataId=self.data_id)
+        data_id = DataCoordinate.from_simple(simple=simple_data_id, universe=universe)
+        if attacher:
+            data_ids = attacher.attach(dataset_type.dimensions, [data_id])
+            data_id = data_ids[0]
+        return DatasetRef(
+            id=id,
+            run=self.run,
+            datasetType=dataset_type,
+            dataId=data_id,
+        )
+    @staticmethod
+    def from_dataset_ref(ref: DatasetRef) -> MinimalistSerializableDatasetRef:
+        """Serialize a ``DatasetRef` to a simplified format.
+        Parameters
+        ----------
+        ref : `DatasetRef`
+            `DatasetRef` object to serialize.
+        """
+        return MinimalistSerializableDatasetRef(
+            dataset_type_name=ref.datasetType.name, run=ref.run, data_id=dict(ref.dataId.mapping)
+        )
 class SerializedDatasetRefContainer(pydantic.BaseModel):
     """Serializable model for a collection of DatasetRef.
@@ -938,6 +997,9 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
     compact_refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
     """Minimal dataset ref information indexed by UUID."""
+    dimension_records: SerializableDimensionData | None = None
+    """Dimension record information"""
     def __len__(self) -> int:
         """Return the number of datasets in the container."""
         return len(self.compact_refs)
@@ -957,19 +1019,32 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
         universe: DimensionUniverse | None = None
         dataset_types: dict[str, SerializedDatasetType] = {}
         compact_refs: dict[uuid.UUID, MinimalistSerializableDatasetRef] = {}
+        data_ids: list[DataCoordinate] = []
+        dimensions: list[DimensionGroup] = []
         for ref in refs:
-            simple_ref = ref.to_simple()
-            dataset_type = simple_ref.datasetType
-            assert dataset_type is not None  # For mypy
             if universe is None:
                 universe = ref.datasetType.dimensions.universe
-            if (name := dataset_type.name) not in dataset_types:
-                dataset_types[name] = dataset_type
-            data_id = simple_ref.dataId
-            assert data_id is not None  # For mypy
-            compact_refs[simple_ref.id] = MinimalistSerializableDatasetRef(
-                dataset_type_name=name, run=simple_ref.run, data_id=data_id.dataId
+            if (name := ref.datasetType.name) not in dataset_types:
+                dataset_types[name] = ref.datasetType.to_simple()
+            compact_refs[ref.id] = MinimalistSerializableDatasetRef.from_dataset_ref(ref)
+            if ref.dataId.hasRecords():
+                dimensions.append(ref.datasetType.dimensions)
+                data_ids.append(ref.dataId)
+        # Extract dimension record metadata if present.
+        dimension_records = None
+        if data_ids and len(compact_refs) == len(data_ids):
+            dimension_group = DimensionGroup.union(*dimensions, universe=universe)
+            # Records were attached to all refs. Store them.
+            extractor = DimensionDataExtractor.from_dimension_group(
+                dimension_group,
+                ignore_cached=False,
+                include_skypix=False,
             )
+            extractor.update(data_ids)
+            dimension_records = SerializableDimensionData.from_record_sets(extractor.records.values())
         if universe:
             universe_version = universe.version
             universe_namespace = universe.namespace
@@ -982,6 +1057,7 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
             universe_namespace=universe_namespace,
             dataset_types=dataset_types,
             compact_refs=compact_refs,
+            dimension_records=dimension_records,
         )
     def to_refs(self, universe: DimensionUniverse) -> list[DatasetRef]:
@@ -1019,15 +1095,22 @@ class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
             name: DatasetType.from_simple(dtype, universe=universe)
             for name, dtype in self.dataset_types.items()
         }
+        # Dimension records can be attached if available.
+        # We assume that all dimension information was stored.
+        attacher = None
+        if self.dimension_records:
+            attacher = DimensionDataAttacher(
+                deserializers=self.dimension_records.make_deserializers(universe)
+            )
         refs: list[DatasetRef] = []
         for id_, minimal in self.compact_refs.items():
-            simple_data_id = SerializedDataCoordinate(dataId=minimal.data_id)
-            data_id = DataCoordinate.from_simple(simple=simple_data_id, universe=universe)
-            ref = DatasetRef(
-                id=id_,
-                run=minimal.run,
-                datasetType=dataset_types[minimal.dataset_type_name],
-                dataId=data_id,
+            ref = minimal.to_dataset_ref(
+                id_,
+                dataset_type=dataset_types[minimal.dataset_type_name],
+                universe=universe,
+                attacher=attacher,
             )
             refs.append(ref)
         return refs

lsst/daf/butler/_file_dataset.py CHANGED Viewed

@@ -27,15 +27,21 @@
 from __future__ import annotations
-__all__ = ["FileDataset"]
+__all__ = ("FileDataset", "SerializedFileDataset")
+import uuid
+from collections.abc import Callable
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, TypeAlias
+import pydantic
 from lsst.resources import ResourcePath, ResourcePathExpression
-from ._dataset_ref import DatasetRef
+from ._dataset_ref import DatasetRef, MinimalistSerializableDatasetRef
+from ._dataset_type import DatasetType
 from ._formatter import FormatterParameter
+from .dimensions import DimensionUniverse
 @dataclass
@@ -87,3 +93,72 @@ class FileDataset:
         if not isinstance(other, type(self)):
             return NotImplemented
         return str(self.path) < str(other.path)
+    def to_simple(self) -> SerializedFileDataset:
+        """
+        Convert this instance to a simplified, JSON-serializable object.
+        Returns
+        -------
+        serialized : `SerializedFileDataset`
+            Serializable representation of this `FileDataset` instance.
+        """
+        if self.formatter is None:
+            formatter = None
+        elif isinstance(self.formatter, str):
+            formatter = self.formatter
+        else:
+            formatter = self.formatter.name()
+        refs = {ref.id: MinimalistSerializableDatasetRef.from_dataset_ref(ref) for ref in self.refs}
+        return SerializedFileDataset(
+            refs=refs,
+            path=str(self.path),
+            formatter=formatter,
+        )
+    @staticmethod
+    def from_simple(
+        dataset: SerializedFileDataset, *, dataset_type_loader: DatasetTypeLoader, universe: DimensionUniverse
+    ) -> FileDataset:
+        """
+        Deserialize a `SerializedFileDataset` into a `FileDataset`.
+        Parameters
+        ----------
+        dataset : `SerializedFileDataset`
+            Object to deserialize.
+        dataset_type_loader : `Callable` [[ `str` ], `DatasetType` ]
+            Function that takes a string dataset type name as its
+            only parameter, and returns an instance of `DatasetType`.
+            Used to deserialize the `DatasetRef` instances contained
+            in the serialized `FileDataset`.
+        universe : `DimensionUniverse`
+            Dimension universe associated with the `Butler` instance that
+            created the serialized `FileDataset` instance.
+        Returns
+        -------
+        file_dataset : `FileDataset`
+            Deserialized equivalent of the input dataset.
+        """
+        refs = [
+            ref.to_dataset_ref(id, universe=universe, dataset_type=dataset_type_loader(ref.dataset_type_name))
+            for id, ref in dataset.refs.items()
+        ]
+        return FileDataset(path=dataset.path, refs=refs, formatter=dataset.formatter)
+DatasetTypeLoader: TypeAlias = Callable[[str], DatasetType]
+"""Type signature for a function that takes a string dataset type name as its
+only parameter, and returns an instance of `DatasetType`.
+"""
+class SerializedFileDataset(pydantic.BaseModel):
+    """Serializable format of `FileDataset` object."""
+    refs: dict[uuid.UUID, MinimalistSerializableDatasetRef]
+    path: str
+    formatter: str | None = None

lsst/daf/butler/_limited_butler.py CHANGED Viewed

@@ -31,11 +31,13 @@ __all__ = ("LimitedButler",)
 import logging
 from abc import ABC, abstractmethod
-from collections.abc import Iterable
+from collections.abc import Iterable, Iterator
+from contextlib import contextmanager
 from typing import Any, ClassVar
 from lsst.resources import ResourcePath
+from ._butler_metrics import ButlerMetrics
 from ._dataset_provenance import DatasetProvenance
 from ._dataset_ref import DatasetRef
 from ._deferredDatasetHandle import DeferredDatasetHandle
@@ -138,7 +140,8 @@ class LimitedButler(ABC):
         to use a resolved `DatasetRef`. Subclasses can support more options.
         """
         log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass)
-        return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
+        with self._metrics.instrument_get(log, msg="Retrieved dataset"):
+            return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
     def getDeferred(
         self,
@@ -415,6 +418,30 @@ class LimitedButler(ABC):
         """
         raise NotImplementedError()
+    @contextmanager
+    def record_metrics(self, metrics: ButlerMetrics | None = None) -> Iterator[ButlerMetrics]:
+        """Enable new metrics recording context.
+        Parameters
+        ----------
+        metrics : `lsst.daf.butler.ButlerMetrics`
+            Optional override metrics object. If given, this will be the
+            same object returned by the context manager.
+        Yields
+        ------
+        metrics : `lsst.daf.butler.ButlerMetrics`
+            Metrics recorded within this context. This temporarily replaces
+            any existing metrics object associated with this butler.
+        """
+        old_metrics = self._metrics
+        new_metrics = metrics if metrics is not None else ButlerMetrics()
+        try:
+            self._metrics = new_metrics
+            yield new_metrics
+        finally:
+            self._metrics = old_metrics
     @property
     @abstractmethod
     def dimensions(self) -> DimensionUniverse:
@@ -430,3 +457,8 @@ class LimitedButler(ABC):
     """An object that maps known storage class names to objects that fully
     describe them (`StorageClassFactory`).
     """
+    _metrics: ButlerMetrics
+    """An object for recording metrics associated with this butler.
+    (`ButlerMetrics`)
+    """

lsst-daf-butler 29.0.1__py3-none-any.whl → 29.1.0rc1__py3-none-any.whl

lsst-daf-butler 29.0.1py3-none-any.whl → 29.1.0rc1py3-none-any.whl