PyPI - lsst-daf-butler - Versions diffs - 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl - Mend

lsst-daf-butler 30.0.0rc2py3-none-any.whl → 30.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

lsst/daf/butler/column_spec.py CHANGED Viewed

@@ -109,12 +109,12 @@ class ColumnValueSerializer(ABC):
         Parameters
         ----------
-        value : `Any`
+        value : `typing.Any`
             Column value to be serialized.
         Returns
         -------
-        value : `Any`
+        value : `typing.Any`
             Column value in serializable format.
         """
         raise NotImplementedError
@@ -125,12 +125,12 @@ class ColumnValueSerializer(ABC):
         Parameters
         ----------
-        value : `Any`
+        value : `typing.Any`
             Serialized column value.
         Returns
         -------
-        value : `Any`
+        value : `typing.Any`
             Deserialized column value.
         """
         raise NotImplementedError

lsst/daf/butler/configs/datastores/formatters.yaml CHANGED Viewed

@@ -100,3 +100,4 @@ VisitBackgroundModel: lsst.daf.butler.formatters.json.JsonFormatter
 VignettingCorrection: lsst.ts.observatory.control.utils.extras.vignetting_storage.VignettingCorrectionFormatter
 SSPAuxiliaryFile: lsst.pipe.tasks.sspAuxiliaryFile.SSPAuxiliaryFileFormatter
 VisitGeometry: lsst.daf.butler.formatters.json.JsonFormatter
+ProvenanceQuantumGraph: lsst.pipe.base.quantum_graph.formatter.ProvenanceFormatter

lsst/daf/butler/configs/storageClasses.yaml CHANGED Viewed

@@ -443,3 +443,18 @@ storageClasses:
     pytype: lsst.pipe.tasks.sspAuxiliaryFile.SSPAuxiliaryFile
   VisitGeometry:
     pytype: lsst.obs.base.visit_geometry.VisitGeometry
+  ProvenanceQuantumGraph:
+    pytype: lsst.pipe.base.quantum_graph.ProvenanceQuantumGraph
+    parameters:
+      - import_mode   # lsst.pipe.base.pipeline_graph.TaskImportMode
+      - quanta  # iterable of uuid.UUID; quanta to read
+      - datasets  # iterable of uuid.UUID; datasets to read
+      - read_init_quanta  # bool, defaults to True; whether to read pre-exec-init info
+    derivedComponents:
+      packages: Packages  # ignores node parameters
+      # UUID keys can be quantum or data IDs (whichever is passed in via
+      # parameters).  Nested lists are attempts to run the quantum (last is
+      # most recent).
+      logs: StructuredDataDict  # dict[uuid.UUID, list[ButlerLogRecords]]
+      metadata: StructuredDataDict  # dict[uuid.UUID, list[TaskMetadata]]

lsst/daf/butler/datastore/_datastore.py CHANGED Viewed

@@ -284,6 +284,14 @@ class DatasetRefURIs(abc.Sequence):
     def __repr__(self) -> str:
         return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
+    def iter_all(self) -> Iterator[ResourcePath]:
+        """Iterate over all URIs without regard to whether they are primary
+        or component.
+        """
+        if self.primaryURI is not None:
+            yield self.primaryURI
+        yield from self.componentURIs.values()
 class Datastore(FileTransferSource, metaclass=ABCMeta):
     """Datastore interface.
@@ -536,7 +544,7 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
         Returns
         -------
-        exists : `dict`[`DatasetRef`, `bool`]
+        exists : `dict` [`DatasetRef`, `bool`]
             Mapping of dataset to boolean indicating whether the dataset
             is known to the datastore.
         """
@@ -825,6 +833,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
             in an external system or if the file is to be compressed in place.
             It is up to the datastore whether this parameter is relevant.
+        Returns
+        -------
+        None
         Raises
         ------
         NotImplementedError
@@ -1143,6 +1155,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
             Determine whether errors should be ignored. When multiple
             refs are being trashed there will be no per-ref check.
+        Returns
+        -------
+        None
         Raises
         ------
         FileNotFoundError
@@ -1278,6 +1294,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
             Entity to compare with configuration retrieved using the
             specified lookup key.
+        Returns
+        -------
+        None
         Raises
         ------
         DatastoreValidationError

lsst/daf/butler/datastore/record_data.py CHANGED Viewed

@@ -49,7 +49,7 @@ if TYPE_CHECKING:
 # Pydantic requires the possible value types to be explicitly enumerated in
 # order for `uuid.UUID` in particular to work.  `typing.Any` does not work
 # here.
-_Record: TypeAlias = dict[str, int | str | uuid.UUID | None]
+_Record: TypeAlias = dict[str, int | str | None]
 class SerializedDatastoreRecordData(pydantic.BaseModel):

lsst/daf/butler/datastore/stored_file_info.py CHANGED Viewed

@@ -423,8 +423,8 @@ def make_datastore_path_relative(path: str) -> str:
     path : `str`
         The file path from a `StoredFileInfo`.
-    Return
-    ------
+    Returns
+    -------
     normalized_path : `str`
         The original path, if it was relative. Otherwise, a version of it that
         was converted to a relative path, stripping URI scheme and netloc from

lsst/daf/butler/datastores/chainedDatastore.py CHANGED Viewed

@@ -1077,6 +1077,10 @@ class ChainedDatastore(Datastore):
             If `True`, output a log message for every validation error
             detected.
+        Returns
+        -------
+        None
         Raises
         ------
         DatastoreValidationError

lsst/daf/butler/datastores/fileDatastore.py CHANGED Viewed

@@ -1068,9 +1068,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
             # Work out the name we want this ingested file to have
             # inside the datastore
             tgtLocation = self._calculate_ingested_datastore_name(srcUri, ref, formatter)
-            if not tgtLocation.uri.dirname().exists():
-                log.debug("Folder %s does not exist yet.", tgtLocation.uri.dirname())
-                tgtLocation.uri.dirname().mkdir()
             # if we are transferring from a local file to a remote location
             # it may be more efficient to get the size and checksum of the
@@ -1311,12 +1308,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
                     f"and storage class type ({required_pytype})"
                 )
-        uri = location.uri
-        if not uri.dirname().exists():
-            log.debug("Folder %s does not exist yet so creating it.", uri.dirname())
-            uri.dirname().mkdir()
         if self._transaction is None:
             raise RuntimeError("Attempting to write artifact without transaction enabled")
@@ -1332,6 +1323,7 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         # Register a callback to try to delete the uploaded data if
         # something fails below
+        uri = location.uri
         self._transaction.registerUndo("artifactWrite", _removeFileExists, uri)
         # Need to record the specified formatter but if this is a V1 formatter
@@ -2160,7 +2152,13 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         return artifact_map
-    def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None:
+    def ingest_zip(
+        self,
+        zip_path: ResourcePath,
+        transfer: str | None,
+        *,
+        dry_run: bool = False,
+    ) -> None:
         """Ingest an indexed Zip file and contents.
         The Zip file must have an index file as created by `retrieveArtifacts`.
@@ -2220,9 +2218,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         else:
             # Name the zip file based on index contents.
             tgtLocation = self.locationFactory.fromPath(index.calculate_zip_file_path_in_store())
-            if not tgtLocation.uri.dirname().exists():
-                log.debug("Folder %s does not exist yet.", tgtLocation.uri.dirname())
-                tgtLocation.uri.dirname().mkdir()
             # Transfer the Zip file into the datastore.
             if not dry_run:
@@ -2987,6 +2982,10 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
             If `True`, output a log message for every validation error
             detected.
+        Returns
+        -------
+        None
         Raises
         ------
         DatastoreValidationError
@@ -3177,6 +3176,20 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
     def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]:
         # Docstring inherited from the base class.
+        # This call to 'bridge.check' filters out "partially deleted" datasets.
+        # Specifically, ones in the unusual edge state that:
+        # 1. They have an entry in the registry dataset tables
+        # 2. They were "trashed" from the datastore, so they are not
+        # present in the "dataset_location" table.)
+        # 3. But the trash has not been "emptied", so there are still entries
+        #  in the "opaque" datastore records table.
+        #
+        # As far as I can tell, this can only occur in the case of a concurrent
+        # or aborted call to `Butler.pruneDatasets(unstore=True, purge=False)`.
+        # Datasets (with or without files existing on disk) can persist in
+        # this zombie state indefinitely, until someone manually empties
+        # the trash.
         exported_refs = list(self._bridge.check(refs))
         ids = {ref.id for ref in exported_refs}
         records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {id: {} for id in ids}

lsst/daf/butler/datastores/file_datastore/get.py CHANGED Viewed

@@ -97,12 +97,12 @@ def generate_datastore_get_information(
     Parameters
     ----------
-    fileLocations : `list`[`DatasetLocationInformation`]
+    fileLocations : `list` [`DatasetLocationInformation`]
         List of file locations for this artifact and their associated datastore
         records.
     ref : `DatasetRef`
         The registry information associated with this artifact.
-    parameters : `Mapping`[`str`, `Any`]
+    parameters : `~collections.abc.Mapping` [`str`, `typing.Any`]
         `StorageClass` and `Formatter` parameters.
     readStorageClass : `StorageClass` | `None`, optional
         The StorageClass to use when ultimately returning the resulting object
@@ -255,12 +255,12 @@ def get_dataset_as_python_object_from_get_info(
     Parameters
     ----------
-    allGetInfo : `list`[`DatastoreFileGetInformation`]
+    allGetInfo : `list` [`DatastoreFileGetInformation`]
         Pre-processed information about each file associated with this
         artifact.
     ref : `DatasetRef`
         The registry information associated with this artifact.
-    parameters : `Mapping`[`str`, `Any`]
+    parameters : `~collections.abc.Mapping` [`str`, `typing.Any`]
         `StorageClass` and `Formatter` parameters.
     cache_manager : `AbstractDatastoreCacheManager`
         The cache manager to use for caching retrieved files.

lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py CHANGED Viewed

@@ -274,7 +274,11 @@ class ZipIndex(BaseModel):
             Path to the Zip file.
         """
         with zip_path.open("rb") as fd, zipfile.ZipFile(fd) as zf:
-            json_data = zf.read(cls.index_name)
+            return cls.from_open_zip(zf)
+    @classmethod
+    def from_open_zip(cls, zf: zipfile.ZipFile) -> Self:
+        json_data = zf.read(cls.index_name)
         return cls.model_validate_json(json_data)

lsst/daf/butler/datastores/file_datastore/transfer.py CHANGED Viewed

@@ -55,8 +55,8 @@ def retrieve_file_transfer_records(
         Cache mapping datastore artifact to existence. Updated by
         this method with details of all artifacts tested.
-    Return
-    ------
+    Returns
+    -------
     files : `FileTransferMap`
         A dictionary from `DatasetId` to a list of `FileTransferRecord`,
         containing information about the files that were found for these

lsst/daf/butler/datastores/inMemoryDatastore.py CHANGED Viewed

@@ -590,6 +590,10 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
         ignore_errors : `bool`, optional
             Indicate that errors should be ignored.
+        Returns
+        -------
+        None
         Raises
         ------
         FileNotFoundError
@@ -721,6 +725,10 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
             If `True`, output a log message for every validation error
             detected.
+        Returns
+        -------
+        None
         Raises
         ------
         DatastoreValidationError

lsst/daf/butler/ddl.py CHANGED Viewed

@@ -537,7 +537,7 @@ class IndexSpec:
     ----------
     *columns : `str`
         Names of the columns to index.
-    **kwargs : `Any`
+    **kwargs : `typing.Any`
         Additional keyword arguments to pass directly to
         `sqlalchemy.schema.Index` constructor. This could be used to provide
         backend-specific options, e.g. to create a ``GIST`` index in PostgreSQL
@@ -556,7 +556,7 @@ class IndexSpec:
     kwargs: dict[str, Any]
     """Additional keyword arguments passed directly to
-    `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `Any` ]).
+    `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `typing.Any` ]).
     """

lsst/daf/butler/dimensions/_coordinate.py CHANGED Viewed

@@ -35,8 +35,6 @@ from __future__ import annotations
 __all__ = (
     "DataCoordinate",
     "DataId",
-    "DataIdKey",
-    "DataIdValue",
     "SerializedDataCoordinate",
     "SerializedDataId",
 )
@@ -55,7 +53,7 @@ from .._timespan import Timespan
 from ..json import from_json_pydantic, to_json_pydantic
 from ..persistence_context import PersistenceContextVars
 from ._group import DimensionGroup
-from ._records import DataIdKey, DataIdValue, DimensionRecord, SerializedDimensionRecord
+from ._records import DataIdValue, DimensionRecord, SerializedDimensionRecord
 if TYPE_CHECKING:  # Imports needed only for type annotations; may be circular.
     from ..registry import Registry
@@ -559,11 +557,11 @@ class DataCoordinate:
         Returns
         -------
         state : `bool`
-            If `True`, `__getitem__`, `get`, and `__contains__` (but not
-            `keys`!) will act as though the mapping includes key-value pairs
-            for implied dimensions, and the `full` property may be used.  If
+            If `True`, ``__getitem__``, `get`, and ``__contains__`` (but not
+            ``keys``!) will act as though the mapping includes key-value pairs
+            for implied dimensions, and the ``full`` property may be used.  If
             `False`, these operations only include key-value pairs for required
-            dimensions, and accessing `full` is an error.  Always `True` if
+            dimensions, and accessing ``full`` is an error.  Always `True` if
             there are no implied dimensions.
         """
         raise NotImplementedError()
@@ -718,7 +716,7 @@ class DataCoordinate:
         Parameters
         ----------
-        simple : `dict` of [`str`, `Any`]
+        simple : `dict` of [`str`, `typing.Any`]
             The `dict` returned by `to_simple()`.
         universe : `DimensionUniverse`
             Object that manages all known dimensions.
@@ -755,6 +753,11 @@ class DataCoordinate:
     to_json = to_json_pydantic
     from_json: ClassVar[Callable[..., Self]] = cast(Callable[..., Self], classmethod(from_json_pydantic))
+    @property
+    def dataId(self) -> Self:
+        """Return this `DataCoordinate` instance, unmodified."""
+        return self
 DataId = DataCoordinate | Mapping[str, Any]
 """A type-annotation alias for signatures that accept both informal data ID

lsst/daf/butler/dimensions/_record_set.py CHANGED Viewed

@@ -97,7 +97,7 @@ def fail_record_lookup(
     Returns
     -------
-    record :  `DimensionRecord`
+    record : `DimensionRecord`
         Never returned; this function always raises `LookupError`.
     """
     raise LookupError(

lsst/daf/butler/dimensions/_records.py CHANGED Viewed

@@ -27,7 +27,13 @@
 from __future__ import annotations
-__all__ = ("DimensionRecord", "SerializedDimensionRecord", "SerializedKeyValueDimensionRecord")
+__all__ = (
+    "DataIdKey",
+    "DataIdValue",
+    "DimensionRecord",
+    "SerializedDimensionRecord",
+    "SerializedKeyValueDimensionRecord",
+)
 import itertools
 from collections.abc import Callable, Hashable
@@ -451,8 +457,8 @@ class DimensionRecord:
         registry : `lsst.daf.butler.Registry`, optional
             Registry from which a universe can be extracted. Can be `None`
             if universe is provided explicitly.
-        cacheKey : `Hashable` or `None`
-            If this is not None, it will be used as a key for any cached
+        cacheKey : `collections.abc.Hashable` or `None`
+            If this is not `None`, it will be used as a key for any cached
             reconstruction instead of calculating a value from the serialized
             format.

lsst/daf/butler/direct_butler/_direct_butler.py CHANGED Viewed

@@ -884,6 +884,8 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         if isinstance(datasetRefOrType, DatasetRef):
             if collections is not None:
                 warnings.warn("Collections should not be specified with DatasetRef", stacklevel=3)
+            if predict and not datasetRefOrType.dataId.hasRecords():
+                return datasetRefOrType.expanded(self.registry.expandDataId(datasetRefOrType.dataId))
             # May need to retrieve datastore records if requested.
             if datastore_records and datasetRefOrType._datastore_records is None:
                 datasetRefOrType = self._registry.get_datastore_records(datasetRefOrType)
@@ -936,6 +938,7 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
                     run = self.run
                     if run is None:
                         raise TypeError("Cannot predict dataset ID/location with run=None.")
+                dataId = self.registry.expandDataId(dataId)
                 return DatasetRef(datasetType, dataId, run=run)
             else:
                 if collections is None:
@@ -1655,29 +1658,9 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         *,
         transfer_dimensions: bool = False,
         dry_run: bool = False,
+        skip_existing: bool = False,
     ) -> None:
-        """Ingest a Zip file into this butler.
-        The Zip file must have been created by `retrieve_artifacts_zip`.
-        Parameters
-        ----------
-        zip_file : `lsst.resources.ResourcePathExpression`
-            Path to the Zip file.
-        transfer : `str`, optional
-            Method to use to transfer the Zip into the datastore.
-        transfer_dimensions : `bool`, optional
-            If `True`, dimension record data associated with the new datasets
-            will be transferred from the Zip, if present.
-        dry_run : `bool`, optional
-            If `True` the ingest will be processed without any modifications
-            made to the target butler and as if the target butler did not
-            have any of the datasets.
-        Notes
-        -----
-        Run collections and dataset types are created as needed.
-        """
+        # Docstring inherited.
         if not self.isWriteable():
             raise TypeError("Butler is read-only.")
@@ -1703,6 +1686,29 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             datasets.append(dataset)
             processed_ids.update(unprocessed)
+        new_datasets, existing_datasets = self._partition_datasets_by_known(datasets)
+        if existing_datasets:
+            if skip_existing:
+                _LOG.info(
+                    "Skipping %d datasets from zip file %s which already exist in the repository.",
+                    len(existing_datasets),
+                    zip_file,
+                )
+            else:
+                raise ConflictingDefinitionError(
+                    f"Datastore already contains {len(existing_datasets)} of the given datasets."
+                    f" Example: {existing_datasets[0]}"
+                )
+            if new_datasets:
+                # Can not yet support partial zip ingests where a zip contains
+                # some datasets that are already in another zip.
+                raise ValueError(
+                    f"The given zip file from {zip_file} contains {len(new_datasets)} datasets not known "
+                    f"to this butler but also contains {len(existing_datasets)} datasets already known to "
+                    "this butler. Currently butler can not ingest zip files with overlapping content."
+                )
+            return
         # Ingest doesn't create the RUN collections so we have to do that
         # here.
         #
@@ -1721,7 +1727,18 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             datasets, progress, dry_run=dry_run, transfer_dimensions=transfer_dimensions
         )
-        with self.transaction():
+        # Calculate some statistics based on the given list of datasets.
+        n_datasets = 0
+        for d in datasets:
+            n_datasets += len(d.refs)
+        srefs = "s" if n_datasets != 1 else ""
+        with (
+            self._metrics.instrument_ingest(
+                n_datasets, _LOG, msg=f"Ingesting zip file {zip_file} with {n_datasets} dataset{srefs}"
+            ),
+            self.transaction(),
+        ):
             # Do not need expanded dataset refs so can ignore the return value.
             self._ingest_file_datasets(datasets, import_info, progress, dry_run=dry_run)
@@ -1822,12 +1839,25 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
                     f" Example: {existing_datasets[0]}"
                 )
+        # Calculate some statistics based on the given list of datasets.
+        n_files = len(datasets)
+        n_datasets = 0
+        for d in datasets:
+            n_datasets += len(d.refs)
+        sfiles = "s" if n_files != 1 else ""
+        srefs = "s" if n_datasets != 1 else ""
         # We use `datasets` rather `new_datasets` for the Registry
         # portion of this, to let it confirm that everything matches the
         # existing datasets.
         import_info = self._prepare_ingest_file_datasets(datasets, progress)
-        with self.transaction():
+        with (
+            self._metrics.instrument_ingest(
+                n_datasets, _LOG, msg=f"Ingesting {n_files} file{sfiles} with {n_datasets} dataset{srefs}"
+            ),
+            self.transaction(),
+        ):
             self._ingest_file_datasets(datasets, import_info, progress)
             # Bulk-insert everything into Datastore.
@@ -1982,7 +2012,7 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             doImport(filename)  # type: ignore
     def transfer_dimension_records_from(
-        self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
+        self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate]
     ) -> None:
         # Allowed dimensions in the target butler.
         elements = frozenset(element for element in self.dimensions.elements if element.has_own_table)
@@ -2012,16 +2042,13 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             source_butler, data_ids, allowed_elements
         )
-        can_query = True if isinstance(source_butler, Butler) else False
         additional_records: dict[DimensionElement, dict[DataCoordinate, DimensionRecord]] = defaultdict(dict)
         for original_element, record_mapping in primary_records.items():
             # Get dimensions that depend on this dimension.
             populated_by = self.dimensions.get_elements_populated_by(
                 self.dimensions[original_element.name]  # type: ignore
             )
-            for data_id in record_mapping.keys():
+            if populated_by:
                 for element in populated_by:
                     if element not in allowed_elements:
                         continue
@@ -2040,28 +2067,32 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
                         # have to be scanned.
                         continue
-                    if not can_query:
-                        raise RuntimeError(
-                            f"Transferring populated_by records like {element.name} requires a full Butler."
-                        )
+                    if record_mapping:
+                        if not isinstance(source_butler, Butler):
+                            raise RuntimeError(
+                                f"Transferring populated_by records like {element.name}"
+                                " requires a full Butler."
+                            )
-                    records = source_butler.query_dimension_records(  # type: ignore
-                        element.name,
-                        explain=False,
-                        **data_id.mapping,  # type: ignore
-                    )
-                    for record in records:
-                        additional_records[record.definition].setdefault(record.dataId, record)
+                        with source_butler.query() as query:
+                            records = query.join_data_coordinates(record_mapping.keys()).dimension_records(
+                                element.name
+                            )
+                            for record in records:
+                                additional_records[record.definition].setdefault(record.dataId, record)
         # The next step is to walk back through the additional records to
         # pick up any missing content (such as visit_definition needing to
         # know the exposure). Want to ensure we do not request records we
         # already have.
         missing_data_ids = set()
-        for name, record_mapping in additional_records.items():
+        for record_mapping in additional_records.values():
             for data_id in record_mapping.keys():
-                if data_id not in primary_records[name]:
-                    missing_data_ids.add(data_id)
+                for dimension in data_id.dimensions.required:
+                    element = source_butler.dimensions[dimension]
+                    dimension_key = data_id.subset(dimension)
+                    if dimension_key not in primary_records[element]:
+                        missing_data_ids.add(dimension_key)
         # Fill out the new records. Assume that these new records do not
         # also need to carry over additional populated_by records.
@@ -2078,19 +2109,19 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
     def _extract_dimension_records_from_data_ids(
         self,
         source_butler: LimitedButler | Butler,
-        data_ids: set[DataCoordinate],
+        data_ids: Iterable[DataCoordinate],
         allowed_elements: frozenset[DimensionElement],
     ) -> dict[DimensionElement, dict[DataCoordinate, DimensionRecord]]:
         dimension_records: dict[DimensionElement, dict[DataCoordinate, DimensionRecord]] = defaultdict(dict)
+        data_ids = set(data_ids)
+        if not all(data_id.hasRecords() for data_id in data_ids):
+            if isinstance(source_butler, Butler):
+                data_ids = source_butler._expand_data_ids(data_ids)
+            else:
+                raise TypeError("Input butler needs to be a full butler to expand DataId.")
         for data_id in data_ids:
-            # Need an expanded record, if not expanded that we need a full
-            # butler with registry (allow mocks with registry too).
-            if not data_id.hasRecords():
-                if registry := getattr(source_butler, "registry", None):
-                    data_id = registry.expandDataId(data_id)
-                else:
-                    raise TypeError("Input butler needs to be a full butler to expand DataId.")
             # If this butler doesn't know about a dimension in the source
             # butler things will break later.
             for element_name in data_id.dimensions.elements:
@@ -2569,6 +2600,9 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         """Immediately load caches that are used for common operations."""
         self._registry.preload_cache(load_dimension_record_cache=load_dimension_record_cache)
+    def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
+        return self._registry.expand_data_ids(data_ids)
     _config: ButlerConfig
     """Configuration for this Butler instance."""

lsst-daf-butler 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl

lsst-daf-butler 30.0.0rc2py3-none-any.whl → 30.0.1py3-none-any.whl