PyPI - lsst-daf-butler - Versions diffs - 29.2025.4100__py3-none-any.whl → 29.2025.4300__py3-none-any.whl - Mend

lsst-daf-butler 29.2025.4100py3-none-any.whl → 29.2025.4300py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

lsst/daf/butler/_dataset_association.py CHANGED Viewed

@@ -29,15 +29,17 @@ from __future__ import annotations
 __all__ = ("DatasetAssociation",)
-from collections.abc import Iterator
+from collections.abc import Iterator, Mapping
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
+from ._collection_type import CollectionType
 from ._dataset_ref import DatasetRef
 from ._dataset_type import DatasetType
 from ._timespan import Timespan
 if TYPE_CHECKING:
+    from ._butler_collections import CollectionInfo
     from .queries._general_query_results import GeneralQueryResults
@@ -66,7 +68,10 @@ class DatasetAssociation:
     @classmethod
     def from_query_result(
-        cls, result: GeneralQueryResults, dataset_type: DatasetType
+        cls,
+        result: GeneralQueryResults,
+        dataset_type: DatasetType,
+        collection_info: Mapping[str, CollectionInfo] | None = None,
     ) -> Iterator[DatasetAssociation]:
         """Construct dataset associations from the result of general query.
@@ -79,11 +84,31 @@ class DatasetAssociation:
             "timespan" dataset fields for ``dataset_type``.
         dataset_type : `DatasetType`
             Dataset type, query has to include this dataset type.
+        collection_info : `~collections.abc.Mapping` \
+                [`str`, `CollectionInfo`], optional
+            Mapping from collection name to information about it for all
+            collections that may appear in the query results.  If not provided,
+            timespans for `~CollectionType.RUN` and `~CollectionType.TAGGED`
+            collections will be bounded, instead of `None`; this is actually
+            more consistent with how those timespans are used elsewhere in the
+            query system, but is a change from how `DatasetAssocation` has
+            historically worked.
         """
         timespan_key = f"{dataset_type.name}.timespan"
         collection_key = f"{dataset_type.name}.collection"
         for _, refs, row_dict in result.iter_tuples(dataset_type):
-            yield DatasetAssociation(refs[0], row_dict[collection_key], row_dict[timespan_key])
+            collection = row_dict[collection_key]
+            timespan = row_dict[timespan_key]
+            if (
+                collection_info is not None
+                and collection_info[collection].type is not CollectionType.CALIBRATION
+            ):
+                # This behavior is for backwards compatibility only; in most
+                # contexts it makes sense to consider the timespan of a RUN
+                # or TAGGED collection to be unbounded, not None, and that's
+                # what the query results we're iterating over do.
+                timespan = None
+            yield DatasetAssociation(refs[0], collection, timespan)
     def __lt__(self, other: Any) -> bool:
         # Allow sorting of associations

lsst/daf/butler/_registry_shim.py CHANGED Viewed

@@ -36,6 +36,7 @@ from typing import TYPE_CHECKING, Any
 from ._collection_type import CollectionType
 from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
 from ._dataset_type import DatasetType
+from ._exceptions import CalibrationLookupError
 from ._storage_class import StorageClassFactory
 from ._timespan import Timespan
 from .dimensions import (
@@ -48,7 +49,9 @@ from .dimensions import (
 )
 from .registry._collection_summary import CollectionSummary
 from .registry._defaults import RegistryDefaults
+from .registry._exceptions import NoDefaultCollectionError
 from .registry._registry_base import RegistryBase
+from .registry.queries._query_common import resolve_collections
 if TYPE_CHECKING:
     from .direct_butler import DirectButler
@@ -182,13 +185,76 @@ class RegistryShim(RegistryBase):
         *,
         collections: CollectionArgType | None = None,
         timespan: Timespan | None = None,
+        datastore_records: bool = False,
         **kwargs: Any,
     ) -> DatasetRef | None:
         # Docstring inherited from a base class.
-        return self._registry.findDataset(
-            datasetType, dataId, collections=collections, timespan=timespan, **kwargs
+        if not isinstance(datasetType, DatasetType):
+            datasetType = self.getDatasetType(datasetType)
+        dataId = DataCoordinate.standardize(
+            dataId,
+            dimensions=datasetType.dimensions,
+            universe=self.dimensions,
+            defaults=self.defaults.dataId,
+            **kwargs,
         )
+        with self._butler.query() as query:
+            resolved_collections = resolve_collections(self._butler, collections)
+            if not resolved_collections:
+                if collections is None:
+                    raise NoDefaultCollectionError("No collections provided, and no default collections set")
+                else:
+                    return None
+            if datasetType.isCalibration() and timespan is None:
+                # Filter out calibration collections, because with no timespan
+                # we have no way of selecting a dataset from them.
+                collection_info = self._butler.collections.query_info(
+                    resolved_collections, flatten_chains=True
+                )
+                resolved_collections = [
+                    info.name for info in collection_info if info.type != CollectionType.CALIBRATION
+                ]
+                if not resolved_collections:
+                    return None
+            result = query.datasets(datasetType, resolved_collections, find_first=True).limit(2)
+            dataset_type_name = result.dataset_type.name
+            # Search only on the 'required' dimensions for the dataset type.
+            # Any extra values provided by the user are ignored.
+            minimal_data_id = DataCoordinate.standardize(
+                dataId.subset(datasetType.dimensions.required).required, universe=self.dimensions
+            )
+            result = result.where(minimal_data_id)
+            if (
+                datasetType.isCalibration()
+                and timespan is not None
+                and (timespan.begin is not None or timespan.end is not None)
+            ):
+                timespan_column = query.expression_factory[dataset_type_name].timespan
+                result = result.where(timespan_column.overlaps(timespan))
+            datasets = list(result)
+            if len(datasets) == 1:
+                ref = datasets[0]
+                if dataId.hasRecords():
+                    ref = ref.expanded(dataId)
+                # Propagate storage class from user-provided DatasetType, which
+                # may not match the definition in the database.
+                ref = ref.overrideStorageClass(datasetType.storageClass_name)
+                if datastore_records:
+                    ref = self._registry.get_datastore_records(ref)
+                return ref
+            elif len(datasets) == 0:
+                return None
+            else:
+                raise CalibrationLookupError(
+                    f"Ambiguous calibration lookup for {datasetType} with timespan {timespan}"
+                    f" in collections {resolved_collections}."
+                )
     def insertDatasets(
         self,
         datasetType: DatasetType | str,
@@ -200,14 +266,20 @@ class RegistryShim(RegistryBase):
         # Docstring inherited from a base class.
         return self._registry.insertDatasets(datasetType, dataIds, run, expand, idGenerationMode)
-    def _importDatasets(self, datasets: Iterable[DatasetRef], expand: bool = True) -> list[DatasetRef]:
+    def _importDatasets(
+        self, datasets: Iterable[DatasetRef], expand: bool = True, assume_new: bool = False
+    ) -> list[DatasetRef]:
         # Docstring inherited from a base class.
-        return self._registry._importDatasets(datasets, expand)
+        return self._registry._importDatasets(datasets, expand, assume_new)
     def getDataset(self, id: DatasetId) -> DatasetRef | None:
         # Docstring inherited from a base class.
         return self._registry.getDataset(id)
+    def _fetch_run_dataset_ids(self, run: str) -> list[DatasetId]:
+        # Docstring inherited.
+        return self._registry._fetch_run_dataset_ids(run)
     def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
         # Docstring inherited from a base class.
         self._registry.removeDatasets(refs)

lsst/daf/butler/cli/cmd/_remove_collections.py CHANGED Viewed

@@ -41,6 +41,8 @@ from ..utils import ButlerCommand
 noNonRunCollectionsMsg = "No non-RUN collections were found."
 willRemoveCollectionMsg = "The following collections will be removed:"
 removedCollectionsMsg = "Removed collections"
+willRemoveCollectionChainsMsg = "Collections to be removed from their parent collection chains:"
+removedCollectionChainsMsg = "Removed collections from their parent collection chains:"
 canNotRemoveFoundRuns = "The following RUN collections were found but can NOT be removed by this command:"
 didNotRemoveFoundRuns = "Found RUN collections but they can NOT be removed by this command:"
 abortedMsg = "Aborted."
@@ -53,6 +55,11 @@ abortedMsg = "Aborted."
 )
 @confirm_option()
 @options_file_option()
+@click.option(
+    "--remove-from-parents",
+    is_flag=True,
+    help="Forcibly remove the collection even if it is still referenced from collection chains.",
+)
 def remove_collections(**kwargs: Any) -> None:  # numpydoc ignore=PR01
     """Remove one or more non-RUN collections.
@@ -73,6 +80,10 @@ def remove_collections(**kwargs: Any) -> None:  # numpydoc ignore=PR01
             result.removeCollectionsTable.pprint_all(align="<")
         else:
             print("\n" + noNonRunCollectionsMsg)
+        if len(result.removeChainsTable):
+            print("\n" + willRemoveCollectionChainsMsg)
+            result.removeChainsTable.pprint_all(align="<")
+            print()
         if len(result.runsTable):
             print("\n" + canNotRemoveFoundRuns)
             result.runsTable.pprint_all(align="<")
@@ -86,6 +97,10 @@ def remove_collections(**kwargs: Any) -> None:  # numpydoc ignore=PR01
         else:
             print("\n" + removedCollectionsMsg + ":\n")
             result.removeCollectionsTable.pprint_all(align="<")
+            if len(result.removeChainsTable):
+                print("\n" + removedCollectionChainsMsg)
+                result.removeChainsTable.pprint_all(align="<")
+                print()
             if len(result.runsTable):
                 print("\n" + didNotRemoveFoundRuns)
                 result.runsTable.pprint_all(align="<")

lsst/daf/butler/configs/datastores/formatters.yaml CHANGED Viewed

@@ -87,6 +87,7 @@ SpectractorSpectrum: lsst.atmospec.formatters.SpectractorSpectrumFormatter
 SpectractorImage: lsst.atmospec.formatters.SpectractorImageFormatter
 SpectractorFitParameters: lsst.atmospec.formatters.SpectractorFitParametersFormatter
 ScarletModelData: lsst.meas.extensions.scarlet.io.ScarletModelFormatter
+LsstScarletModelData: lsst.meas.extensions.scarlet.io.ScarletModelFormatter
 MetricMeasurementBundle: lsst.daf.butler.formatters.json.JsonFormatter
 MultipleCellCoadd: lsst.cell_coadds.CellCoaddFitsFormatter
 NNModelPackagePayload: lsst.meas.transiNet.modelPackages.NNModelPackageFormatter

lsst/daf/butler/configs/storageClasses.yaml CHANGED Viewed

@@ -412,6 +412,11 @@ storageClasses:
     parameters:
       - blend_id
     delegate: lsst.meas.extensions.scarlet.io.ScarletModelDelegate
+  LsstScarletModelData:
+    pytype: lsst.meas.extensions.scarlet.io.LsstScarletModelData
+    parameters:
+      - blend_id
+    delegate: lsst.meas.extensions.scarlet.io.ScarletModelDelegate
   MetricMeasurementBundle:
     pytype: lsst.analysis.tools.interfaces.MetricMeasurementBundle
   MultipleCellCoadd:

lsst/daf/butler/datastores/fileDatastore.py CHANGED Viewed

@@ -1874,7 +1874,7 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         # Have to handle trustGetRequest mode by checking for the existence
         # of the missing refs on disk.
-        if missing_refs:
+        if missing_refs and not predict:
             dataset_existence = self._mexists_check_expected(missing_refs, None)
             really_missing = set()
             not_missing = set()
@@ -3218,7 +3218,7 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
             return ref
         dataset_type = self._retrieve_dataset_method(ref.datasetType.name)
         if dataset_type is not None:
-            ref = ref.overrideStorageClass(dataset_type.storageClass)
+            ref = ref.overrideStorageClass(dataset_type.storageClass_name)
         return ref
     def get_opaque_table_definitions(self) -> Mapping[str, DatastoreOpaqueTable]:

lsst/daf/butler/direct_butler/_direct_butler.py CHANGED Viewed

@@ -1302,7 +1302,7 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         data_id, kwargs = self._rewrite_data_id(data_id, parent_type, **kwargs)
-        ref = self._registry.findDataset(
+        ref = self.registry.findDataset(
             parent_type,
             data_id,
             collections=collections,
@@ -2107,7 +2107,7 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         dry_run: bool = False,
     ) -> _ImportDatasetsInfo:
         # Docstring inherited.
-        if not self.isWriteable():
+        if not self.isWriteable() and not dry_run:
             raise TypeError("Butler is read-only.")
         # Will iterate through the refs multiple times so need to convert
@@ -2312,7 +2312,7 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
     ) -> collections.abc.Collection[DatasetRef]:
         # Docstring inherited.
         source_refs = list(source_refs)
-        if not self.isWriteable():
+        if not self.isWriteable() and not dry_run:
             raise TypeError("Butler is read-only.")
         progress = Progress("lsst.daf.butler.Butler.transfer_from", level=VERBOSE)

lsst/daf/butler/json.py CHANGED Viewed

@@ -68,7 +68,7 @@ def to_json_pydantic(self: SupportsSimple, minimal: bool = False) -> str:
 def from_json_pydantic(
     cls_: type[SupportsSimple],
-    json_str: str,
+    json_str: str | bytes | bytearray,
     universe: DimensionUniverse | None = None,
     registry: Registry | None = None,
 ) -> SupportsSimple:
@@ -78,7 +78,7 @@ def from_json_pydantic(
     ----------
     cls_ : `type` of `SupportsSimple`
         The Python type being created.
-    json_str : `str`
+    json_str : `str` or `bytes` or `bytearray`
         The JSON string representing this object.
     universe : `DimensionUniverse` or `None`, optional
         The universe required to instantiate some models. Required if

lsst/daf/butler/queries/_expression_strings.py CHANGED Viewed

@@ -241,7 +241,7 @@ class _ConversionVisitor(TreeVisitor[_VisitorResult]):
     def visitBind(self, name: str, node: Node) -> _VisitorResult:
         if name not in self.context.bind:
-            raise InvalidQueryError("Name {name!r} is not in the bind map.")
+            raise InvalidQueryError(f"Name {name!r} is not in the bind map.")
         # Logic in visitIdentifier handles binds.
         return self.visitIdentifier(name, node)

lsst/daf/butler/registry/_registry.py CHANGED Viewed

@@ -631,57 +631,55 @@ class Registry(ABC):
         self,
         datasets: Iterable[DatasetRef],
         expand: bool = True,
+        assume_new: bool = False,
     ) -> list[DatasetRef]:
         """Import one or more datasets into the `Registry`.
-        Difference from `insertDatasets` method is that this method accepts
-        `DatasetRef` instances which should already be resolved and have a
-        dataset ID. If registry supports globally-unique dataset IDs (e.g.
-        `uuid.UUID`) then datasets which already exist in the registry will be
-        ignored if imported again.
+        This differs from `insertDatasets` method in that this method accepts
+        `DatasetRef` instances, which already have a dataset ID.
         Parameters
         ----------
         datasets : `~collections.abc.Iterable` of `DatasetRef`
             Datasets to be inserted. All `DatasetRef` instances must have
-            identical ``datasetType`` and ``run`` attributes. ``run``
+            identical ``run`` attributes. ``run``
             attribute can be `None` and defaults to ``self.defaults.run``.
             Datasets can specify ``id`` attribute which will be used for
-            inserted datasets. All dataset IDs must have the same type
-            (`int` or `uuid.UUID`), if type of dataset IDs does not match
-            configured backend then IDs will be ignored and new IDs will be
-            generated by backend.
+            inserted datasets.
+            Datasets can be of multiple dataset types, but all the dataset
+            types must have the same set of dimensions.
         expand : `bool`, optional
             If `True` (default), expand data IDs as they are inserted.  This is
             necessary in general, but it may be disabled if the caller can
             guarantee this is unnecessary.
+        assume_new : `bool`, optional
+            If `True`, assume datasets are new.  If `False`, datasets that are
+            identical to an existing one are ignored.
         Returns
         -------
         refs : `list` of `DatasetRef`
-            Resolved `DatasetRef` instances for all given data IDs (in the same
-            order). If any of ``datasets`` has an ID which already exists in
-            the database then it will not be inserted or updated, but a
-            resolved `DatasetRef` will be returned for it in any case.
+            `DatasetRef` instances for all given data IDs (in the same order).
+            If any of ``datasets`` has an ID which already exists in the
+            database then it will not be inserted or updated, but a
+            `DatasetRef` will be returned for it in any case.
         Raises
         ------
         lsst.daf.butler.registry.NoDefaultCollectionError
             Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
         lsst.daf.butler.registry.DatasetTypeError
-            Raised if datasets correspond to more than one dataset type or
-            dataset type is not known to registry.
+            Raised if a dataset type is not known to registry.
         lsst.daf.butler.registry.ConflictingDefinitionError
             If a dataset with the same dataset type and data ID as one of those
-            given already exists in ``run``.
+            given already exists in ``run``, or if ``assume_new=True`` and at
+            least one dataset is not new.
         lsst.daf.butler.registry.MissingCollectionError
             Raised if ``run`` does not exist in the registry.
         Notes
         -----
-        This method is considered package-private and internal to Butler
-        implementation. Clients outside daf_butler package should not use this
-        method.
+        This method is considered middleware-internal.
         """
         raise NotImplementedError()
@@ -702,6 +700,27 @@ class Registry(ABC):
         """
         raise NotImplementedError()
+    @abstractmethod
+    def _fetch_run_dataset_ids(self, run: str) -> list[DatasetId]:
+        """Return the IDs of all datasets in the given ``RUN``
+        collection.
+        Parameters
+        ----------
+        run : `str`
+            Name of the collection.
+        Returns
+        -------
+        dataset_ids : `list` [`uuid.UUID`]
+            List of dataset IDs.
+        Notes
+        -----
+        This is a middleware-internal interface.
+        """
+        raise NotImplementedError()
     @abstractmethod
     def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
         """Remove datasets from the Registry.

lsst/daf/butler/registry/_registry_base.py CHANGED Viewed

@@ -231,20 +231,28 @@ class RegistryBase(Registry):
         collectionTypes: Iterable[CollectionType] = CollectionType.all(),
         flattenChains: bool = False,
     ) -> Iterator[DatasetAssociation]:
-        # queryCollections only accepts DatasetType.
         if isinstance(datasetType, str):
             datasetType = self.getDatasetType(datasetType)
-        resolved_collections = self.queryCollections(
-            collections, datasetType=datasetType, collectionTypes=collectionTypes, flattenChains=flattenChains
-        )
         with self._butler.query() as query:
+            resolved_collections = self.queryCollections(
+                collections,
+                datasetType=datasetType,
+                collectionTypes=collectionTypes,
+                flattenChains=flattenChains,
+            )
+            # It's annoyingly difficult to just do the collection query once,
+            # since query_info doesn't accept all the expression types that
+            # queryCollections does.  But it's all cached anyway.
+            collection_info = {
+                info.name: info for info in self._butler.collections.query_info(resolved_collections)
+            }
             query = query.join_dataset_search(datasetType, resolved_collections)
             result = query.general(
                 datasetType.dimensions,
                 dataset_fields={datasetType.name: {"dataset_id", "run", "collection", "timespan"}},
                 find_first=False,
             )
-            yield from DatasetAssociation.from_query_result(result, datasetType)
+            yield from DatasetAssociation.from_query_result(result, datasetType, collection_info)
     def _resolve_dataset_types(self, dataset_types: object | None) -> list[str]:
         if dataset_types is None:

lsst/daf/butler/registry/datasets/byDimensions/_manager.py CHANGED Viewed

@@ -617,6 +617,14 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
             dataset_type_names = set(get_dataset_type_name(dt) for dt in dataset_types)
         return self._summaries.fetch_summaries(collections, dataset_type_names, self._dataset_type_from_row)
+    def fetch_run_dataset_ids(self, run: RunRecord) -> list[DatasetId]:
+        # Docstring inherited.
+        sql = sqlalchemy.select(self._static.dataset.c.id).where(
+            self._static.dataset.c[self._run_key_column] == run.key
+        )
+        with self._db.query(sql) as result:
+            return list(result.scalars())
     def ingest_date_dtype(self) -> type:
         """Return type of the ``ingest_date`` column."""
         schema_version = self.newSchemaVersion()
@@ -698,7 +706,7 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
             for dataId, row in zip(data_id_list, rows, strict=True)
         ]
-    def import_(self, run: RunRecord, refs: list[DatasetRef]) -> None:
+    def import_(self, run: RunRecord, refs: list[DatasetRef], assume_new: bool = False) -> None:
         # Docstring inherited from DatasetRecordStorageManager.
         if not refs:
             # Just in case an empty mapping is provided we want to avoid
@@ -721,7 +729,6 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
             "Table cache should have been populated when looking up dataset types"
         )
         tags_table = self._get_tags_table(dynamic_tables)
         # Current timestamp, type depends on schema version.
         if self._use_astropy_ingest_date:
             # Astropy `now()` precision should be the same as `now()` which
@@ -729,11 +736,8 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
             timestamp = sqlalchemy.sql.literal(astropy.time.Time.now(), type_=ddl.AstropyTimeNsecTai)
         else:
             timestamp = sqlalchemy.sql.literal(datetime.datetime.now(datetime.UTC))
-        # We'll insert all new rows into a temporary table
-        table_spec = makeTagTableSpec(dimensions, type(self._collections), constraints=False)
         collection_fkey_name = self._collections.getCollectionForeignKeyName()
-        tmpRows = [
+        tags_rows = [
             {
                 "dataset_type_id": dataset_type_storage[ref.datasetType.name].dataset_type_id,
                 collection_fkey_name: run.key,
@@ -742,9 +746,29 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
             }
             for ref in refs
         ]
+        if assume_new:
+            self._import_new(run, refs, dataset_type_storage, tags_table, tags_rows, timestamp)
+        else:
+            self._import_guarded(
+                run, refs, dimensions, dataset_type_storage, tags_table, tags_rows, timestamp
+            )
+    def _import_guarded(
+        self,
+        run: RunRecord,
+        refs: list[DatasetRef],
+        dimensions: DimensionGroup,
+        dataset_type_storage: dict[str, _DatasetRecordStorage],
+        tags_table: sqlalchemy.Table,
+        tags_rows: list[dict[str, object]],
+        timestamp: sqlalchemy.BindParameter[astropy.time.Time | datetime.datetime],
+    ) -> None:
+        # We'll insert all new rows into a temporary table
+        table_spec = makeTagTableSpec(dimensions, type(self._collections), constraints=False)
+        collection_fkey_name = self._collections.getCollectionForeignKeyName()
         with self._db.transaction(for_temp_tables=True), self._db.temporary_table(table_spec) as tmp_tags:
             # store all incoming data in a temporary table
-            self._db.insert(tmp_tags, *tmpRows)
+            self._db.insert(tmp_tags, *tags_rows)
             # There are some checks that we want to make for consistency
             # of the new datasets with existing ones.
             self._validate_import(dimensions, tags_table, tmp_tags, run)
@@ -764,17 +788,19 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
                     timestamp.label("ingest_date"),
                 ),
             )
-            # Update the summary tables for this collection in case this
-            # is the first time this dataset type or these governor values
-            # will be inserted there.
-            summary = CollectionSummary()
-            summary.add_datasets(refs)
-            self._summaries.update(
-                run, [storage.dataset_type_id for storage in dataset_type_storage.values()], summary
-            )
+            self._update_summaries(run, refs, dataset_type_storage)
             # Copy from temp table into tags table.
             self._db.insert(tags_table, select=tmp_tags.select())
+    def _update_summaries(
+        self, run: RunRecord, refs: list[DatasetRef], dataset_type_storage: dict[str, _DatasetRecordStorage]
+    ) -> None:
+        summary = CollectionSummary()
+        summary.add_datasets(refs)
+        self._summaries.update(
+            run, [storage.dataset_type_id for storage in dataset_type_storage.values()], summary
+        )
     def _validate_import(
         self,
         dimensions: DimensionGroup,
@@ -899,6 +925,29 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
                     f"but ID {row.new_dataset_id} in new collection {new_collection!r}."
                 )
+    def _import_new(
+        self,
+        run: RunRecord,
+        refs: list[DatasetRef],
+        dataset_type_storage: dict[str, _DatasetRecordStorage],
+        tags_table: sqlalchemy.Table,
+        tags_rows: list[dict[str, object]],
+        timestamp: sqlalchemy.BindParameter[astropy.time.Time | datetime.datetime],
+    ) -> None:
+        static_rows = [
+            {
+                "id": ref.id,
+                "dataset_type_id": dataset_type_storage[ref.datasetType.name].dataset_type_id,
+                self._run_key_column: run.key,
+                "ingest_date": timestamp.value,
+            }
+            for ref in refs
+        ]
+        with self._db.transaction():
+            self._db.insert(self._static.dataset, *static_rows)
+            self._update_summaries(run, refs, dataset_type_storage)
+            self._db.insert(tags_table, *tags_rows)
     def delete(self, datasets: Iterable[DatasetId | DatasetRef]) -> None:
         # Docstring inherited from DatasetRecordStorageManager.
         # Only delete from common dataset table; ON DELETE foreign key clauses
@@ -1425,7 +1474,7 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
             )
             if "timespan" in fields:
                 tags_builder.joins.timespans[fields_key] = self._db.getTimespanRepresentation().fromLiteral(
-                    None
+                    Timespan(None, None)
                 )
         calibs_builder: SqlSelectBuilder | None = None
         if CollectionType.CALIBRATION in collection_types:

lsst/daf/butler/registry/interfaces/_datasets.py CHANGED Viewed

@@ -384,6 +384,23 @@ class DatasetRecordStorageManager(VersionedExtension):
         """
         raise NotImplementedError()
+    @abstractmethod
+    def fetch_run_dataset_ids(self, run: RunRecord) -> list[DatasetId]:
+        """Return the IDs of all datasets in the given ``RUN``
+        collection.
+        Parameters
+        ----------
+        run : `RunRecord`
+            Record describing the collection.
+        Returns
+        -------
+        dataset_ids : `list` [`uuid.UUID`]
+            List of dataset IDs.
+        """
+        raise NotImplementedError()
     @abstractmethod
     def ingest_date_dtype(self) -> type:
         """Return type of the ``ingest_date`` column."""
@@ -424,7 +441,7 @@ class DatasetRecordStorageManager(VersionedExtension):
         raise NotImplementedError()
     @abstractmethod
-    def import_(self, run: RunRecord, refs: list[DatasetRef]) -> None:
+    def import_(self, run: RunRecord, refs: list[DatasetRef], assume_new: bool = False) -> None:
         """Insert one or more dataset entries into the database.
         Parameters
@@ -435,6 +452,9 @@ class DatasetRecordStorageManager(VersionedExtension):
         refs : `list` [ `DatasetRef` ]
             List of datasets to be be inserted.  All of the ``DatasetRef``
             ``run`` attributes must match the ``run`` parameter.
+        assume_new : `bool`, optional
+            If `True`, assume all datasets are new and skip conflict resolution
+            logic.
         """
         raise NotImplementedError()

lsst/daf/butler/registry/queries/_query_common.py CHANGED Viewed

@@ -58,6 +58,9 @@ class CommonQueryArguments:
     def replaceCollections(self, collections: list[str]) -> CommonQueryArguments:
         return dataclasses.replace(self, collections=collections)
+    def replaceDatasetTypes(self, dataset_types: list[str]) -> CommonQueryArguments:
+        return dataclasses.replace(self, dataset_types=dataset_types)
 _T = TypeVar("_T", bound=QueryResultsBase)
 _U = TypeVar("_U", bound=QueryResultsBase)

lsst-daf-butler 29.2025.4100__py3-none-any.whl → 29.2025.4300__py3-none-any.whl

lsst-daf-butler 29.2025.4100py3-none-any.whl → 29.2025.4300py3-none-any.whl