PyPI - lsst-daf-butler - Versions diffs - 30.0.0rc3__py3-none-any.whl → 30.0.1__py3-none-any.whl - Mend

lsst-daf-butler 30.0.0rc3py3-none-any.whl → 30.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

lsst/daf/butler/_butler.py CHANGED Viewed

@@ -138,7 +138,10 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
     without_datastore : `bool`, optional
         If `True` do not attach a datastore to this butler. Any attempts
         to use a datastore will fail.
-    **kwargs : `Any`
+    metrics : `ButlerMetrics` or `None`
+        External metrics object to be used for tracking butler usage. If `None`
+        a new metrics object is created.
+    **kwargs : `typing.Any`
         Additional keyword arguments passed to a constructor of actual butler
         class.
@@ -240,7 +243,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
             to use a datastore will fail.
         metrics : `ButlerMetrics` or `None`, optional
             Metrics object to record butler usage statistics.
-        **kwargs : `Any`
+        **kwargs : `typing.Any`
             Default data ID key-value pairs.  These may only identify
             "governor" dimensions like ``instrument`` and ``skymap``.
@@ -1390,6 +1393,10 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
             raised if any datasets with the same dataset ID already exist
             in the datastore.
+        Returns
+        -------
+        None
         Raises
         ------
         TypeError
@@ -1429,6 +1436,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         *,
         transfer_dimensions: bool = False,
         dry_run: bool = False,
+        skip_existing: bool = False,
     ) -> None:
         """Ingest a Zip file into this butler.
@@ -1447,6 +1455,14 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
             If `True` the ingest will be processed without any modifications
             made to the target butler and as if the target butler did not
             have any of the datasets.
+        skip_existing : `bool`, optional
+            If `True`, a zip will not be ingested if the dataset entries listed
+            in the index with the same dataset ID already exists in the butler.
+            If `False` (the default), a `ConflictingDefinitionError` will be
+            raised if any datasets with the same dataset ID already exist
+            in the repository. If, somehow, some datasets are known to the
+            butler and some are not, this is currently treated as an error
+            rather than attempting to do a partial ingest.
         Notes
         -----
@@ -2024,7 +2040,7 @@ class Butler(LimitedButler):  # numpydoc ignore=PR02
         Returns
         -------
-        records : `list`[`DimensionRecord`]
+        records : `list` [`DimensionRecord`]
             Dimension records matching the given query parameters.
         Raises

lsst/daf/butler/_butler_collections.py CHANGED Viewed

@@ -360,10 +360,10 @@ class ButlerCollections(ABC, Sequence):
         name : `str`
             The name of the collection of interest.
         include_parents : `bool`, optional
-           If `True` any parents of this collection will be included.
+            If `True` any parents of this collection will be included.
         include_summary : `bool`, optional
-           If `True` dataset type names and governor dimensions of datasets
-           stored in this collection will be included in the result.
+            If `True` dataset type names and governor dimensions of datasets
+            stored in this collection will be included in the result.
         Returns
         -------
@@ -464,7 +464,7 @@ class ButlerCollections(ABC, Sequence):
         Returns
         -------
-        filtered : `~collections.abc.Mapping` [`str`, `list`[`str`]]
+        filtered : `~collections.abc.Mapping` [`str`, `list` [`str`]]
             Mapping of the dataset type name to its corresponding list of
             collection names.
         """

lsst/daf/butler/_butler_metrics.py CHANGED Viewed

@@ -27,6 +27,8 @@
 from __future__ import annotations
+__all__ = ["ButlerMetrics"]
 from collections.abc import Callable, Iterator
 from contextlib import contextmanager
 from typing import Concatenate, ParamSpec

lsst/daf/butler/_dataset_provenance.py CHANGED Viewed

@@ -267,7 +267,7 @@ class DatasetProvenance(pydantic.BaseModel):
         use_upper : `bool` or `None`
             If `True` use upper case for provenance keys, if `False` use lower
             case, if `None` match the case of the prefix.
-        keys : `tuple` of `str` | `int`
+        *keys : `tuple` of `str` | `int`
             Components of key to combine with prefix and separator.
         Returns

lsst/daf/butler/_dataset_ref.py CHANGED Viewed

@@ -479,7 +479,7 @@ class DatasetRef:
         Parameters
         ----------
-        simple : `dict` of [`str`, `Any`]
+        simple : `dict` of [`str`, `typing.Any`]
             The value returned by `to_simple()`.
         universe : `DimensionUniverse`
             The special graph of all known dimensions.

lsst/daf/butler/_exceptions.py CHANGED Viewed

@@ -196,8 +196,8 @@ class ValidationError(RuntimeError):
 class EmptyQueryResultError(Exception):
-    """Exception raised when query methods return an empty result and `explain`
-    flag is set.
+    """Exception raised when query methods return an empty result and
+    ``explain`` flag is set.
     Parameters
     ----------

lsst/daf/butler/_file_dataset.py CHANGED Viewed

@@ -129,7 +129,8 @@ class FileDataset:
         ----------
         dataset : `SerializedFileDataset`
             Object to deserialize.
-        dataset_type_loader : `Callable` [[ `str` ], `DatasetType` ]
+        dataset_type_loader : `~collections.abc.Callable` \
+              [[ `str` ], `DatasetType` ]
             Function that takes a string dataset type name as its
             only parameter, and returns an instance of `DatasetType`.
             Used to deserialize the `DatasetRef` instances contained

lsst/daf/butler/_formatter.py CHANGED Viewed

@@ -910,6 +910,10 @@ class FormatterV2:
         provenance : `DatasetProvenance` | `None`, optional
             Provenance to attach to the file being written.
+        Returns
+        -------
+        None
         Raises
         ------
         FormatterNotImplementedError
@@ -1137,6 +1141,10 @@ class FormatterV2:
         location : `Location`
             Location from which to extract a file extension.
+        Returns
+        -------
+        None
         Raises
         ------
         ValueError
@@ -1583,6 +1591,10 @@ class Formatter(metaclass=ABCMeta):
         location : `Location`
             Location from which to extract a file extension.
+        Returns
+        -------
+        None
         Raises
         ------
         NotImplementedError

lsst/daf/butler/_query_all_datasets.py CHANGED Viewed

@@ -151,6 +151,8 @@ def _filter_collections_and_dataset_types(
     Parameters
     ----------
+    butler
+        Butler repository to use.
     collections
         List of collection names or collection search globs.
     dataset_type_query

lsst/daf/butler/cli/cmd/_remove_runs.py CHANGED Viewed

@@ -114,18 +114,7 @@ def remove_runs(context: click.Context, confirm: bool, force: bool, **kwargs: An
     This command can be used to remove RUN collections and the datasets within
     them.
-    Parameters
-    ----------
-    context : `click.Context`
-        Context provided by Click.
-    confirm : `bool`
-        Confirmation for removal of the run.
-    force : `bool`
-        Force removal.
-    **kwargs : `dict` [`str`, `str`]
-        The parameters to pass to `~lsst.daf.butler.script.removeRuns`.
-    """
+    """  # numpydoc ignore=PR01
     result = script.removeRuns(**kwargs)
     canRemoveRuns = len(result.runs)
     if not canRemoveRuns:

lsst/daf/butler/column_spec.py CHANGED Viewed

@@ -109,12 +109,12 @@ class ColumnValueSerializer(ABC):
         Parameters
         ----------
-        value : `Any`
+        value : `typing.Any`
             Column value to be serialized.
         Returns
         -------
-        value : `Any`
+        value : `typing.Any`
             Column value in serializable format.
         """
         raise NotImplementedError
@@ -125,12 +125,12 @@ class ColumnValueSerializer(ABC):
         Parameters
         ----------
-        value : `Any`
+        value : `typing.Any`
             Serialized column value.
         Returns
         -------
-        value : `Any`
+        value : `typing.Any`
             Deserialized column value.
         """
         raise NotImplementedError

lsst/daf/butler/datastore/_datastore.py CHANGED Viewed

@@ -284,6 +284,14 @@ class DatasetRefURIs(abc.Sequence):
     def __repr__(self) -> str:
         return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
+    def iter_all(self) -> Iterator[ResourcePath]:
+        """Iterate over all URIs without regard to whether they are primary
+        or component.
+        """
+        if self.primaryURI is not None:
+            yield self.primaryURI
+        yield from self.componentURIs.values()
 class Datastore(FileTransferSource, metaclass=ABCMeta):
     """Datastore interface.
@@ -536,7 +544,7 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
         Returns
         -------
-        exists : `dict`[`DatasetRef`, `bool`]
+        exists : `dict` [`DatasetRef`, `bool`]
             Mapping of dataset to boolean indicating whether the dataset
             is known to the datastore.
         """
@@ -825,6 +833,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
             in an external system or if the file is to be compressed in place.
             It is up to the datastore whether this parameter is relevant.
+        Returns
+        -------
+        None
         Raises
         ------
         NotImplementedError
@@ -1143,6 +1155,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
             Determine whether errors should be ignored. When multiple
             refs are being trashed there will be no per-ref check.
+        Returns
+        -------
+        None
         Raises
         ------
         FileNotFoundError
@@ -1278,6 +1294,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
             Entity to compare with configuration retrieved using the
             specified lookup key.
+        Returns
+        -------
+        None
         Raises
         ------
         DatastoreValidationError

lsst/daf/butler/datastore/stored_file_info.py CHANGED Viewed

@@ -423,8 +423,8 @@ def make_datastore_path_relative(path: str) -> str:
     path : `str`
         The file path from a `StoredFileInfo`.
-    Return
-    ------
+    Returns
+    -------
     normalized_path : `str`
         The original path, if it was relative. Otherwise, a version of it that
         was converted to a relative path, stripping URI scheme and netloc from

lsst/daf/butler/datastores/chainedDatastore.py CHANGED Viewed

@@ -1077,6 +1077,10 @@ class ChainedDatastore(Datastore):
             If `True`, output a log message for every validation error
             detected.
+        Returns
+        -------
+        None
         Raises
         ------
         DatastoreValidationError

lsst/daf/butler/datastores/fileDatastore.py CHANGED Viewed

@@ -2152,7 +2152,13 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         return artifact_map
-    def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None:
+    def ingest_zip(
+        self,
+        zip_path: ResourcePath,
+        transfer: str | None,
+        *,
+        dry_run: bool = False,
+    ) -> None:
         """Ingest an indexed Zip file and contents.
         The Zip file must have an index file as created by `retrieveArtifacts`.
@@ -2976,6 +2982,10 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
             If `True`, output a log message for every validation error
             detected.
+        Returns
+        -------
+        None
         Raises
         ------
         DatastoreValidationError

lsst/daf/butler/datastores/file_datastore/get.py CHANGED Viewed

@@ -97,12 +97,12 @@ def generate_datastore_get_information(
     Parameters
     ----------
-    fileLocations : `list`[`DatasetLocationInformation`]
+    fileLocations : `list` [`DatasetLocationInformation`]
         List of file locations for this artifact and their associated datastore
         records.
     ref : `DatasetRef`
         The registry information associated with this artifact.
-    parameters : `Mapping`[`str`, `Any`]
+    parameters : `~collections.abc.Mapping` [`str`, `typing.Any`]
         `StorageClass` and `Formatter` parameters.
     readStorageClass : `StorageClass` | `None`, optional
         The StorageClass to use when ultimately returning the resulting object
@@ -255,12 +255,12 @@ def get_dataset_as_python_object_from_get_info(
     Parameters
     ----------
-    allGetInfo : `list`[`DatastoreFileGetInformation`]
+    allGetInfo : `list` [`DatastoreFileGetInformation`]
         Pre-processed information about each file associated with this
         artifact.
     ref : `DatasetRef`
         The registry information associated with this artifact.
-    parameters : `Mapping`[`str`, `Any`]
+    parameters : `~collections.abc.Mapping` [`str`, `typing.Any`]
         `StorageClass` and `Formatter` parameters.
     cache_manager : `AbstractDatastoreCacheManager`
         The cache manager to use for caching retrieved files.

lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py CHANGED Viewed

@@ -274,7 +274,11 @@ class ZipIndex(BaseModel):
             Path to the Zip file.
         """
         with zip_path.open("rb") as fd, zipfile.ZipFile(fd) as zf:
-            json_data = zf.read(cls.index_name)
+            return cls.from_open_zip(zf)
+    @classmethod
+    def from_open_zip(cls, zf: zipfile.ZipFile) -> Self:
+        json_data = zf.read(cls.index_name)
         return cls.model_validate_json(json_data)

lsst/daf/butler/datastores/file_datastore/transfer.py CHANGED Viewed

@@ -55,8 +55,8 @@ def retrieve_file_transfer_records(
         Cache mapping datastore artifact to existence. Updated by
         this method with details of all artifacts tested.
-    Return
-    ------
+    Returns
+    -------
     files : `FileTransferMap`
         A dictionary from `DatasetId` to a list of `FileTransferRecord`,
         containing information about the files that were found for these

lsst/daf/butler/datastores/inMemoryDatastore.py CHANGED Viewed

@@ -590,6 +590,10 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
         ignore_errors : `bool`, optional
             Indicate that errors should be ignored.
+        Returns
+        -------
+        None
         Raises
         ------
         FileNotFoundError
@@ -721,6 +725,10 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
             If `True`, output a log message for every validation error
             detected.
+        Returns
+        -------
+        None
         Raises
         ------
         DatastoreValidationError

lsst/daf/butler/ddl.py CHANGED Viewed

@@ -537,7 +537,7 @@ class IndexSpec:
     ----------
     *columns : `str`
         Names of the columns to index.
-    **kwargs : `Any`
+    **kwargs : `typing.Any`
         Additional keyword arguments to pass directly to
         `sqlalchemy.schema.Index` constructor. This could be used to provide
         backend-specific options, e.g. to create a ``GIST`` index in PostgreSQL
@@ -556,7 +556,7 @@ class IndexSpec:
     kwargs: dict[str, Any]
     """Additional keyword arguments passed directly to
-    `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `Any` ]).
+    `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `typing.Any` ]).
     """

lsst/daf/butler/dimensions/_coordinate.py CHANGED Viewed

@@ -35,8 +35,6 @@ from __future__ import annotations
 __all__ = (
     "DataCoordinate",
     "DataId",
-    "DataIdKey",
-    "DataIdValue",
     "SerializedDataCoordinate",
     "SerializedDataId",
 )
@@ -55,7 +53,7 @@ from .._timespan import Timespan
 from ..json import from_json_pydantic, to_json_pydantic
 from ..persistence_context import PersistenceContextVars
 from ._group import DimensionGroup
-from ._records import DataIdKey, DataIdValue, DimensionRecord, SerializedDimensionRecord
+from ._records import DataIdValue, DimensionRecord, SerializedDimensionRecord
 if TYPE_CHECKING:  # Imports needed only for type annotations; may be circular.
     from ..registry import Registry
@@ -559,11 +557,11 @@ class DataCoordinate:
         Returns
         -------
         state : `bool`
-            If `True`, `__getitem__`, `get`, and `__contains__` (but not
-            `keys`!) will act as though the mapping includes key-value pairs
-            for implied dimensions, and the `full` property may be used.  If
+            If `True`, ``__getitem__``, `get`, and ``__contains__`` (but not
+            ``keys``!) will act as though the mapping includes key-value pairs
+            for implied dimensions, and the ``full`` property may be used.  If
             `False`, these operations only include key-value pairs for required
-            dimensions, and accessing `full` is an error.  Always `True` if
+            dimensions, and accessing ``full`` is an error.  Always `True` if
             there are no implied dimensions.
         """
         raise NotImplementedError()
@@ -718,7 +716,7 @@ class DataCoordinate:
         Parameters
         ----------
-        simple : `dict` of [`str`, `Any`]
+        simple : `dict` of [`str`, `typing.Any`]
             The `dict` returned by `to_simple()`.
         universe : `DimensionUniverse`
             Object that manages all known dimensions.

lsst/daf/butler/dimensions/_record_set.py CHANGED Viewed

@@ -97,7 +97,7 @@ def fail_record_lookup(
     Returns
     -------
-    record :  `DimensionRecord`
+    record : `DimensionRecord`
         Never returned; this function always raises `LookupError`.
     """
     raise LookupError(

lsst/daf/butler/dimensions/_records.py CHANGED Viewed

@@ -27,7 +27,13 @@
 from __future__ import annotations
-__all__ = ("DimensionRecord", "SerializedDimensionRecord", "SerializedKeyValueDimensionRecord")
+__all__ = (
+    "DataIdKey",
+    "DataIdValue",
+    "DimensionRecord",
+    "SerializedDimensionRecord",
+    "SerializedKeyValueDimensionRecord",
+)
 import itertools
 from collections.abc import Callable, Hashable
@@ -451,8 +457,8 @@ class DimensionRecord:
         registry : `lsst.daf.butler.Registry`, optional
             Registry from which a universe can be extracted. Can be `None`
             if universe is provided explicitly.
-        cacheKey : `Hashable` or `None`
-            If this is not None, it will be used as a key for any cached
+        cacheKey : `collections.abc.Hashable` or `None`
+            If this is not `None`, it will be used as a key for any cached
             reconstruction instead of calculating a value from the serialized
             format.

lsst/daf/butler/direct_butler/_direct_butler.py CHANGED Viewed

@@ -884,6 +884,8 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         if isinstance(datasetRefOrType, DatasetRef):
             if collections is not None:
                 warnings.warn("Collections should not be specified with DatasetRef", stacklevel=3)
+            if predict and not datasetRefOrType.dataId.hasRecords():
+                return datasetRefOrType.expanded(self.registry.expandDataId(datasetRefOrType.dataId))
             # May need to retrieve datastore records if requested.
             if datastore_records and datasetRefOrType._datastore_records is None:
                 datasetRefOrType = self._registry.get_datastore_records(datasetRefOrType)
@@ -936,6 +938,7 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
                     run = self.run
                     if run is None:
                         raise TypeError("Cannot predict dataset ID/location with run=None.")
+                dataId = self.registry.expandDataId(dataId)
                 return DatasetRef(datasetType, dataId, run=run)
             else:
                 if collections is None:
@@ -1655,29 +1658,9 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         *,
         transfer_dimensions: bool = False,
         dry_run: bool = False,
+        skip_existing: bool = False,
     ) -> None:
-        """Ingest a Zip file into this butler.
-        The Zip file must have been created by `retrieve_artifacts_zip`.
-        Parameters
-        ----------
-        zip_file : `lsst.resources.ResourcePathExpression`
-            Path to the Zip file.
-        transfer : `str`, optional
-            Method to use to transfer the Zip into the datastore.
-        transfer_dimensions : `bool`, optional
-            If `True`, dimension record data associated with the new datasets
-            will be transferred from the Zip, if present.
-        dry_run : `bool`, optional
-            If `True` the ingest will be processed without any modifications
-            made to the target butler and as if the target butler did not
-            have any of the datasets.
-        Notes
-        -----
-        Run collections and dataset types are created as needed.
-        """
+        # Docstring inherited.
         if not self.isWriteable():
             raise TypeError("Butler is read-only.")
@@ -1703,6 +1686,29 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             datasets.append(dataset)
             processed_ids.update(unprocessed)
+        new_datasets, existing_datasets = self._partition_datasets_by_known(datasets)
+        if existing_datasets:
+            if skip_existing:
+                _LOG.info(
+                    "Skipping %d datasets from zip file %s which already exist in the repository.",
+                    len(existing_datasets),
+                    zip_file,
+                )
+            else:
+                raise ConflictingDefinitionError(
+                    f"Datastore already contains {len(existing_datasets)} of the given datasets."
+                    f" Example: {existing_datasets[0]}"
+                )
+            if new_datasets:
+                # Can not yet support partial zip ingests where a zip contains
+                # some datasets that are already in another zip.
+                raise ValueError(
+                    f"The given zip file from {zip_file} contains {len(new_datasets)} datasets not known "
+                    f"to this butler but also contains {len(existing_datasets)} datasets already known to "
+                    "this butler. Currently butler can not ingest zip files with overlapping content."
+                )
+            return
         # Ingest doesn't create the RUN collections so we have to do that
         # here.
         #
@@ -1721,7 +1727,18 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             datasets, progress, dry_run=dry_run, transfer_dimensions=transfer_dimensions
         )
-        with self.transaction():
+        # Calculate some statistics based on the given list of datasets.
+        n_datasets = 0
+        for d in datasets:
+            n_datasets += len(d.refs)
+        srefs = "s" if n_datasets != 1 else ""
+        with (
+            self._metrics.instrument_ingest(
+                n_datasets, _LOG, msg=f"Ingesting zip file {zip_file} with {n_datasets} dataset{srefs}"
+            ),
+            self.transaction(),
+        ):
             # Do not need expanded dataset refs so can ignore the return value.
             self._ingest_file_datasets(datasets, import_info, progress, dry_run=dry_run)

lsst/daf/butler/direct_query_driver/_driver.py CHANGED Viewed

@@ -610,15 +610,15 @@ class DirectQueryDriver(QueryDriver):
         ----------
         tree : `.queries.tree.QueryTree`
             Description of the joins and row filters in the query.
+        allow_duplicate_overlaps : `bool`, optional
+            If set to `True` then query will be allowed to generate
+            non-distinct rows for spatial overlaps.
         Returns
         -------
         tree_analysis : `QueryTreeAnalysis`
             Struct containing additional information need to build the joins
             stage of a query.
-        allow_duplicate_overlaps : `bool`, optional
-            If set to `True` then query will be allowed to generate
-            non-distinct rows for spatial overlaps.
         Notes
         -----
@@ -1313,7 +1313,8 @@ class DirectQueryDriver(QueryDriver):
             Mapping of collection names to collection records, must contain
             records for all collections in ``collection_names`` and all their
             children collections.
-        summaries : `~collections.abc.Mapping` [`Any`, `CollectionSummary`]
+        summaries : `~collections.abc.Mapping` [`typing.Any`, \
+              `CollectionSummary`]
             Mapping of collection IDs to collection summaries, must contain
             summaries for all non-chained collections in the collection tree.

lsst/daf/butler/direct_query_driver/_result_page_converter.py CHANGED Viewed

@@ -386,7 +386,7 @@ class _GeneralColumnConverter:
         Returns
         -------
-        value : `Any`
+        value : `typing.Any`
             Result of the conversion.
         """
         raise NotImplementedError()

lsst-daf-butler 30.0.0rc3__py3-none-any.whl → 30.0.1__py3-none-any.whl

lsst-daf-butler 30.0.0rc3py3-none-any.whl → 30.0.1py3-none-any.whl