PyPI - lsst-daf-butler - Versions diffs - 29.1.0rc2__py3-none-any.whl → 29.1.0rc4__py3-none-any.whl - Mend

lsst-daf-butler 29.1.0rc2py3-none-any.whl → 29.1.0rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

lsst/daf/butler/datastores/fileDatastore.py CHANGED Viewed

@@ -109,6 +109,9 @@ from lsst.utils.iteration import chunk_iterable
 from lsst.utils.logging import VERBOSE, getLogger
 from lsst.utils.timer import time_this
+from ..datastore import FileTransferMap, FileTransferRecord
+from ..datastore.stored_file_info import make_datastore_path_relative
 if TYPE_CHECKING:
     from lsst.daf.butler import DatasetProvenance, LookupKey
     from lsst.daf.butler.registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
@@ -1972,12 +1975,12 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         return uris
-    @staticmethod
     def _find_missing_records(
-        datastore: FileDatastore,
+        self,
         refs: Iterable[DatasetRef],
         missing_ids: set[DatasetId],
         artifact_existence: dict[ResourcePath, bool] | None = None,
+        warn_for_missing: bool = True,
     ) -> dict[DatasetId, list[StoredFileInfo]]:
         if not missing_ids:
             return {}
@@ -1998,7 +2001,7 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
                 # Ask the source datastore where the missing artifacts
                 # should be.  An execution butler might not know about the
                 # artifacts even if they are there.
-                expected = datastore._get_expected_dataset_locations_info(id_to_ref[missing])
+                expected = self._get_expected_dataset_locations_info(id_to_ref[missing])
                 records[missing] = [info for _, info in expected]
             # Call the mexist helper method in case we have not already
@@ -2007,17 +2010,18 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
             # datastore.mexists() itself does not give us access to the
             # derived datastore record.
             log.verbose("Checking existence of %d datasets unknown to datastore", len(records))
-            ref_exists = datastore._process_mexists_records(
+            ref_exists = self._process_mexists_records(
                 id_to_ref, records, False, artifact_existence=artifact_existence
             )
             # Now go through the records and propagate the ones that exist.
-            location_factory = datastore.locationFactory
+            location_factory = self.locationFactory
             for missing, record_list in records.items():
                 # Skip completely if the ref does not exist.
                 ref = id_to_ref[missing]
                 if not ref_exists[ref]:
-                    log.warning("Asked to transfer dataset %s but no file artifacts exist for it.", ref)
+                    if warn_for_missing:
+                        log.warning("Asked to transfer dataset %s but no file artifacts exist for it.", ref)
                     continue
                 # Check for file artifact to decide which parts of a
                 # disassembled composite do exist. If there is only a
@@ -2107,7 +2111,7 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         if missing_ids and not self.trustGetRequest:
             raise ValueError(f"Number of datasets missing from this datastore: {len(missing_ids)}")
-        missing_records = self._find_missing_records(self, refs, missing_ids)
+        missing_records = self._find_missing_records(refs, missing_ids)
         records.update(missing_records)
         # One artifact can be used by multiple DatasetRef.
@@ -2784,13 +2788,13 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
     @transactional
     def transfer_from(
         self,
-        source_datastore: Datastore,
+        source_records: FileTransferMap,
         refs: Collection[DatasetRef],
         transfer: str = "auto",
         artifact_existence: dict[ResourcePath, bool] | None = None,
         dry_run: bool = False,
     ) -> tuple[set[DatasetRef], set[DatasetRef]]:
-        log.verbose("Transferring %d datasets from %s to %s", len(refs), source_datastore.name, self.name)
+        log.verbose("Transferring %d datasets to %s", len(refs), self.name)
         # Stop early if "direct" transfer mode is requested. That would
         # require that the URI inside the source datastore should be stored
@@ -2805,125 +2809,6 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         if not refs:
             return set(), set()
-        # Potentially can be transferring from a chain.
-        datastores = getattr(source_datastore, "datastores", [source_datastore])
-        incompatible: list[Datastore] = []
-        acceptable: list[FileDatastore] = []
-        for current_source in datastores:
-            if not isinstance(current_source, FileDatastore):
-                incompatible.append(current_source)
-            else:
-                acceptable.append(current_source)
-        if len(incompatible) == len(datastores):
-            if len(datastores) == 1:
-                raise TypeError(
-                    "Can only transfer to a FileDatastore from another FileDatastore, not"
-                    f" {get_full_type_name(source_datastore)}"
-                )
-            else:
-                types = [get_full_type_name(d) for d in datastores]
-                raise TypeError(
-                    f"ChainedDatastore encountered that had no FileDatastores. Had {','.join(types)}"
-                )
-        if len(acceptable) == 1:
-            # No need to filter in advance since there is only one usable
-            # source datastore.
-            return self._transfer_from(
-                acceptable[0], refs, transfer=transfer, artifact_existence=artifact_existence, dry_run=dry_run
-            )
-        # To avoid complaints from the transfer that the source does not have
-        # a ref, partition refs by source datastores, and any unknown to both
-        # are sent to any that support trustGetRequest.
-        unassigned_refs: set[DatasetRef] = set(refs)
-        known_refs: list[set[DatasetRef]] = []
-        for datastore in acceptable:
-            known_to_datastore = {ref for ref, known in datastore.knows_these(refs).items() if known}
-            known_refs.append(known_to_datastore)
-            unassigned_refs -= known_to_datastore
-        if unassigned_refs:
-            for datastore, refs_known_to_datastore in zip(acceptable, known_refs, strict=True):
-                if datastore.trustGetRequest:
-                    # Have to check each datastore in turn. If we do not do
-                    # this warnings will be issued further down for datasets
-                    # that are in one and not the other. The existence cache
-                    # will prevent repeat checks.
-                    exist_in_store = datastore.mexists(unassigned_refs, artifact_existence=artifact_existence)
-                    present = {ref for ref, exists in exist_in_store.items() if exists}
-                    refs_known_to_datastore.update(present)
-                    # Only transferring once so do not need to check later
-                    # datastores.
-                    unassigned_refs -= present
-                    log.debug(
-                        "Adding %d missing refs to list for transfer from %s", len(present), datastore.name
-                    )
-        if unassigned_refs:
-            log.warning(
-                "Encountered %d dataset%s where no file artifacts exist from the "
-                "source datastore and will be skipped.",
-                len(unassigned_refs),
-                "s" if len(unassigned_refs) != 1 else "",
-            )
-        # Once we have accepted refs from one datastore, do not need to try to
-        # transfer them again.
-        accepted: set[DatasetRef] = set()
-        rejected: set[DatasetRef] = set()
-        if artifact_existence is None:
-            artifact_existence = {}
-        for current_source, refs_to_transfer in zip(acceptable, known_refs, strict=True):
-            # Do not transfer if already transferred.
-            refs_to_transfer -= accepted
-            # No need to retry something that has already been rejected.
-            refs_to_transfer -= rejected
-            if not refs_to_transfer:
-                continue
-            log.verbose(
-                "Requesting transfer of %d dataset%s from datastore %s to %s",
-                len(refs_to_transfer),
-                "s" if len(refs_to_transfer) != 1 else "",
-                current_source.name,
-                self.name,
-            )
-            current_accepted, current_rejected = self._transfer_from(
-                current_source,
-                refs_to_transfer,
-                transfer=transfer,
-                artifact_existence=artifact_existence,
-                dry_run=dry_run,
-            )
-            accepted.update(current_accepted)
-            rejected.update(current_rejected)
-        log.verbose(
-            "Finished transfer_from %s to %s with %d accepted, %d rejected, %d requested",
-            source_datastore.name,
-            self.name,
-            len(accepted),
-            len(rejected),
-            len(refs),
-        )
-        return accepted, rejected
-    @transactional
-    def _transfer_from(
-        self,
-        source_datastore: FileDatastore,
-        refs: Collection[DatasetRef],
-        transfer: str = "auto",
-        artifact_existence: dict[ResourcePath, bool] | None = None,
-        dry_run: bool = False,
-    ) -> tuple[set[DatasetRef], set[DatasetRef]]:
         # Empty existence lookup if none given.
         if artifact_existence is None:
             artifact_existence = {}
@@ -2941,46 +2826,8 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
         #   the dataset should be transferred. This will only happen if
         #   the detached Butler has had a local ingest.
-        # What we really want is all the records in the source datastore
-        # associated with these refs. Or derived ones if they don't exist
-        # in the source.
-        log.verbose("Looking up source datastore records in %s", source_datastore.name)
-        source_records = source_datastore._get_stored_records_associated_with_refs(
-            refs, ignore_datastore_records=True
-        )
-        # The source dataset_ids are the keys in these records
-        source_ids = set(source_records)
-        log.debug("Number of datastore records found in source: %d", len(source_ids))
-        requested_ids = {ref.id for ref in refs}
-        missing_ids = requested_ids - source_ids
-        # Missing IDs can be okay if that datastore has allowed
-        # gets based on file existence. Should we transfer what we can
-        # or complain about it and warn?
-        if missing_ids and not source_datastore.trustGetRequest:
-            raise ValueError(
-                f"Some datasets are missing from source datastore {source_datastore}: {missing_ids}"
-            )
-        # Need to map these missing IDs to a DatasetRef so we can guess
-        # the details.
-        if missing_ids:
-            log.info(
-                "Number of expected datasets missing from source datastore records: %d out of %d",
-                len(missing_ids),
-                len(requested_ids),
-            )
-            found_records = self._find_missing_records(
-                source_datastore, refs, missing_ids, artifact_existence
-            )
-            source_records.update(found_records)
         # See if we already have these records
-        log.verbose(
-            "Looking up existing datastore records in target %s for %d refs", self.name, len(requested_ids)
-        )
+        log.verbose("Looking up existing datastore records in target %s for %d refs", self.name, len(refs))
         target_records = self._get_stored_records_associated_with_refs(refs, ignore_datastore_records=True)
         # The artifacts to register
@@ -3017,8 +2864,9 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
                 continue
             # mypy needs to know these are always resolved refs
-            for info in source_records[ref.id]:
-                source_location = info.file_location(source_datastore.locationFactory)
+            for transfer_info in source_records.get(ref.id, []):
+                info = transfer_info.file_info
+                source_location = transfer_info.location
                 target_location = info.file_location(self.locationFactory)
                 if source_location == target_location and not source_location.pathInStore.isabs():
                     # Artifact is already in the target location.
@@ -3096,14 +2944,45 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
             )
         log.verbose(
-            "Finished transfer_from %s to %s with %d accepted, %d rejected",
-            source_datastore.name,
+            "Finished transfer_from to %s with %d accepted, %d rejected",
             self.name,
             len(accepted),
             len(rejected),
         )
         return accepted, rejected
+    def get_file_info_for_transfer(self, dataset_ids: Iterable[DatasetId]) -> FileTransferMap:
+        source_records = self._get_stored_records_associated_with_refs(
+            [FakeDatasetRef(id) for id in dataset_ids], ignore_datastore_records=True
+        )
+        return self._convert_stored_file_info_to_file_transfer_record(source_records)
+    def locate_missing_files_for_transfer(
+        self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool]
+    ) -> FileTransferMap:
+        missing_ids = {ref.id for ref in refs}
+        # Missing IDs can be okay if that datastore has allowed
+        # gets based on file existence. Should we transfer what we can
+        # or complain about it and warn?
+        if not self.trustGetRequest:
+            return {}
+        found_records = self._find_missing_records(
+            refs, missing_ids, artifact_existence, warn_for_missing=False
+        )
+        return self._convert_stored_file_info_to_file_transfer_record(found_records)
+    def _convert_stored_file_info_to_file_transfer_record(
+        self, info_map: dict[DatasetId, list[StoredFileInfo]]
+    ) -> FileTransferMap:
+        output: dict[DatasetId, list[FileTransferRecord]] = {}
+        for k, file_info_list in info_map.items():
+            output[k] = [
+                FileTransferRecord(file_info=info, location=info.file_location(self.locationFactory))
+                for info in file_info_list
+            ]
+        return output
     @transactional
     def forget(self, refs: Iterable[DatasetRef]) -> None:
         # Docstring inherited.
@@ -3357,16 +3236,8 @@ def _to_file_info_payload(
 ) -> FileDatastoreGetPayloadFileInfo:
     location, file_info = info
-    # Make sure that we send only relative paths, to avoid leaking
-    # details of our configuration to the client.
-    path = location.pathInStore
-    if path.isabs():
-        relative_path = path.relativeToPathRoot
-    else:
-        relative_path = str(path)
     datastoreRecords = file_info.to_simple()
-    datastoreRecords.path = relative_path
+    datastoreRecords.path = make_datastore_path_relative(datastoreRecords.path)
     return FileDatastoreGetPayloadFileInfo(
         url=location.uri.generate_presigned_get_url(expiration_time_seconds=url_expiration_time_seconds),

lsst/daf/butler/datastores/file_datastore/transfer.py ADDED Viewed

@@ -0,0 +1,104 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from __future__ import annotations
+from collections.abc import Iterable
+from lsst.resources import ResourcePath
+from lsst.utils.logging import getLogger
+from ..._dataset_ref import DatasetRef
+from ...datastore import FileTransferMap, FileTransferSource
+log = getLogger(__name__)
+def retrieve_file_transfer_records(
+    source_datastore: FileTransferSource,
+    refs: Iterable[DatasetRef],
+    artifact_existence: dict[ResourcePath, bool],
+) -> FileTransferMap:
+    """Look up the datastore records corresponding to the given datasets.
+    Parameters
+    ----------
+    source_datastore : `FileTransferSource`
+        Object used to look up records.
+    refs : `~collections.abc.Iterable` [ `DatasetRef` ]
+        List of datasets to retrieve records for.
+    artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
+        Cache mapping datastore artifact to existence. Updated by
+        this method with details of all artifacts tested.
+    Return
+    ------
+    files : `FileTransferMap`
+        A dictionary from `DatasetId` to a list of `FileTransferRecord`,
+        containing information about the files that were found for these
+        artifacts.  If files were not found for a given `DatasetRef`, there
+        will be no entry for it in this dictionary.
+    Notes
+    -----
+    This will first attempt to look up records using the database, and then
+    fall back to searching the filesystem if the transfer source is configured
+    to do so.
+    """
+    log.verbose("Looking up source datastore records in %s", source_datastore.name)
+    refs_by_id = {ref.id: ref for ref in refs}
+    source_records = source_datastore.get_file_info_for_transfer(refs_by_id.keys())
+    log.debug("Number of datastore records found in source: %d", len(source_records))
+    # If we couldn't find all of the datasets in the database, continue
+    # searching.  Some datastores may have artifacts on disk that do not have
+    # corresponding records in the database.
+    missing_ids = refs_by_id.keys() - source_records.keys()
+    if missing_ids:
+        log.info(
+            "Number of expected datasets missing from source datastore records: %d out of %d",
+            len(missing_ids),
+            len(refs_by_id),
+        )
+        missing_refs = {refs_by_id[id] for id in missing_ids}
+        found_records = source_datastore.locate_missing_files_for_transfer(missing_refs, artifact_existence)
+        source_records |= found_records
+        still_missing = len(missing_refs) - len(found_records)
+        if still_missing:
+            for ref in missing_refs:
+                if ref.id not in found_records:
+                    log.warning("Asked to transfer dataset %s but no file artifacts exist for it.", ref)
+            log.warning(
+                "Encountered %d dataset%s where no file artifacts exist from the "
+                "source datastore and will be skipped.",
+                still_missing,
+                "s" if still_missing != 1 else "",
+            )
+    return source_records

lsst/daf/butler/dimensions/_coordinate.py CHANGED Viewed

@@ -427,6 +427,9 @@ class DataCoordinate:
     @overload
     def get(self, key: str, default: str) -> str: ...
+    @overload
+    def get(self, key: str, default: DataIdValue | None) -> DataIdValue | None: ...
     def get(self, key: str, default: DataIdValue | None = None) -> DataIdValue | None:
         try:
             return self.__getitem__(key)

lsst/daf/butler/direct_butler/_direct_butler.py CHANGED Viewed

@@ -75,6 +75,7 @@ from .._storage_class import StorageClass, StorageClassFactory
 from .._timespan import Timespan
 from ..datastore import Datastore, NullDatastore
 from ..datastores.file_datastore.retrieve_artifacts import ZipIndex, retrieve_and_zip
+from ..datastores.file_datastore.transfer import retrieve_file_transfer_records
 from ..dimensions import DataCoordinate, Dimension, DimensionGroup
 from ..direct_query_driver import DirectQueryDriver
 from ..progress import Progress
@@ -1765,7 +1766,6 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         import_info = self._prepare_for_import_refs(
             self,
             refs,
-            skip_missing=False,
             register_dataset_types=True,
             dry_run=dry_run,
             transfer_dimensions=transfer_dimensions,
@@ -2063,7 +2063,6 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         source_butler: LimitedButler,
         source_refs: Iterable[DatasetRef],
         *,
-        skip_missing: bool = True,
         register_dataset_types: bool = False,
         transfer_dimensions: bool = False,
         dry_run: bool = False,
@@ -2087,27 +2086,6 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             str(self),
         )
-        # In some situations the datastore artifact may be missing
-        # and we do not want that registry entry to be imported.
-        # Asking datastore is not sufficient, the records may have been
-        # purged, we have to ask for the (predicted) URI and check
-        # existence explicitly. Execution butler is set up exactly like
-        # this with no datastore records.
-        artifact_existence: dict[ResourcePath, bool] = {}
-        if skip_missing:
-            dataset_existence = source_butler._datastore.mexists(
-                source_refs, artifact_existence=artifact_existence
-            )
-            source_refs = [ref for ref, exists in dataset_existence.items() if exists]
-            filtered_count = len(source_refs)
-            n_missing = original_count - filtered_count
-            _LOG.verbose(
-                "%d dataset%s removed because the artifact does not exist. Now have %d.",
-                n_missing,
-                "" if n_missing == 1 else "s",
-                filtered_count,
-            )
         # Importing requires that we group the refs by dimension group and run
         # before doing the import.
         source_dataset_types = set()
@@ -2207,7 +2185,7 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             dimension_records = self._extract_all_dimension_records_from_data_ids(
                 source_butler, dataIds, elements
             )
-        return _ImportDatasetsInfo(grouped_refs, dimension_records, artifact_existence)
+        return _ImportDatasetsInfo(grouped_refs, dimension_records)
     def _import_dimension_records(
         self,
@@ -2294,15 +2272,40 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
         dry_run: bool = False,
     ) -> collections.abc.Collection[DatasetRef]:
         # Docstring inherited.
+        source_refs = list(source_refs)
         if not self.isWriteable():
             raise TypeError("Butler is read-only.")
         progress = Progress("lsst.daf.butler.Butler.transfer_from", level=VERBOSE)
+        artifact_existence: dict[ResourcePath, bool] = {}
+        file_transfer_source = source_butler._file_transfer_source
+        transfer_records = retrieve_file_transfer_records(
+            file_transfer_source, source_refs, artifact_existence
+        )
+        # In some situations the datastore artifact may be missing and we do
+        # not want that registry entry to be imported.  For example, this can
+        # happen if a file was removed but the dataset was left in the registry
+        # for provenance, or if a pipeline task didn't create all of the
+        # possible files in a QuantumBackedButler.
+        if skip_missing:
+            original_ids = {ref.id for ref in source_refs}
+            missing_ids = original_ids - transfer_records.keys()
+            if missing_ids:
+                original_count = len(source_refs)
+                source_refs = [ref for ref in source_refs if ref.id not in missing_ids]
+                filtered_count = len(source_refs)
+                n_missing = original_count - filtered_count
+                _LOG.verbose(
+                    "%d dataset%s removed because the artifact does not exist. Now have %d.",
+                    n_missing,
+                    "" if n_missing == 1 else "s",
+                    filtered_count,
+                )
         import_info = self._prepare_for_import_refs(
             source_butler,
             source_refs,
-            skip_missing=skip_missing,
             register_dataset_types=register_dataset_types,
             dry_run=dry_run,
             transfer_dimensions=transfer_dimensions,
@@ -2317,11 +2320,12 @@ class DirectButler(Butler):  # numpydoc ignore=PR02
             # Ask the datastore to transfer. The datastore has to check that
             # the source datastore is compatible with the target datastore.
+            _LOG.verbose("Transferring %d datasets from %s", len(transfer_records), file_transfer_source.name)
             accepted, rejected = self._datastore.transfer_from(
-                source_butler._datastore,
+                transfer_records,
                 imported_refs,
                 transfer=transfer,
-                artifact_existence=import_info.artifact_existence,
+                artifact_existence=artifact_existence,
                 dry_run=dry_run,
             )
             if rejected:
@@ -2567,4 +2571,3 @@ class _ImportDatasetsInfo(NamedTuple):
     grouped_refs: defaultdict[_RefGroup, list[DatasetRef]]
     dimension_records: dict[DimensionElement, dict[DataCoordinate, DimensionRecord]]
-    artifact_existence: dict[ResourcePath, bool]

lsst/daf/butler/formatters/parquet.py CHANGED Viewed

@@ -295,7 +295,7 @@ def arrow_to_numpy(arrow_table: pa.Table) -> np.ndarray | np.ma.MaskedArray:
     numpy_dict = arrow_to_numpy_dict(arrow_table)
     has_mask = False
-    dtype = []
+    dtype: list[tuple] = []
     for name, col in numpy_dict.items():
         if len(shape := numpy_dict[name].shape) <= 1:
             dtype.append((name, col.dtype))
@@ -429,7 +429,11 @@ def numpy_to_arrow(np_array: np.ndarray) -> pa.Table:
     md = {}
     md[b"lsst::arrow::rowcount"] = str(len(np_array))
-    for name in np_array.dtype.names:
+    names = np_array.dtype.names
+    if names is None:
+        names = ()
+    for name in names:
         _append_numpy_string_metadata(md, name, np_array.dtype[name])
         _append_numpy_multidim_metadata(md, name, np_array.dtype[name])
@@ -1379,7 +1383,7 @@ def _numpy_dict_to_dtype(numpy_dict: dict[str, np.ndarray]) -> tuple[np.dtype, i
     """
     import numpy as np
-    dtype_list = []
+    dtype_list: list[tuple] = []
     rowcount = 0
     for name, col in numpy_dict.items():
         if rowcount == 0:

lsst/daf/butler/registry/interfaces/_database.py CHANGED Viewed

@@ -1696,9 +1696,8 @@ class Database(ABC):
             for k, v in content.items():
                 if k == name:
                     continue
-                column = table.columns[k]
                 # The set only has one element
-                clauses.append(column == v.pop())
+                clauses.append(table.columns[k] == v.pop())
             # The IN operator will not work for "infinite" numbers of
             # rows so must batch it up into distinct calls.

lsst/daf/butler/registry/obscore/_config.py CHANGED Viewed

@@ -178,6 +178,11 @@ class ObsCoreConfig(pydantic.BaseModel):
     indexing support, but a standard ``s_region`` column is always included.
     """
+    fallback_instrument: str | None = None
+    """Instrument to use if a dataset type does not have an instrument
+    dimension. Will be left unset if `None`. Can be dangerous to set this
+    in a repository containing data from multiple instruments."""
 class ConfigCollectionType(str, enum.Enum):
     """Enum class defining possible values for configuration attributes."""

lsst/daf/butler/registry/obscore/_records.py CHANGED Viewed

@@ -394,12 +394,14 @@ class DafButlerRecordFactory(RecordFactory):
         dataId = ref.dataId
         record: dict[str, str | int | float | UUID | None] = {}
-        instrument_name = cast(str, dataId.get("instrument"))
+        instrument_name = cast(str | None, dataId.get("instrument", self.config.fallback_instrument))
         record["instrument_name"] = instrument_name
         if self.schema.dataset_fk is not None:
             record[self.schema.dataset_fk.name] = ref.id
-        record["facility_name"] = self.config.facility_map.get(instrument_name, self.config.facility_name)
+        record["facility_name"] = self.config.facility_map.get(
+            instrument_name or "", self.config.facility_name
+        )
         timespan = dataId.timespan
         if timespan is not None:

lsst/daf/butler/remote_butler/_http_connection.py CHANGED Viewed

@@ -69,10 +69,14 @@ class RemoteButlerHttpConnection:
         self.server_url = server_url
         self._access_token = access_token
-        auth_headers = get_authentication_headers(access_token)
+        self._auth_headers = get_authentication_headers(access_token)
         headers = {"user-agent": f"RemoteButler/{__version__}"}
-        self._headers = auth_headers | headers
+        self._headers = self._auth_headers | headers
+    @property
+    def authentication_headers(self) -> dict[str, str]:
+        return self._auth_headers
     def post(self, path: str, model: BaseModel) -> httpx.Response:
         """Send a POST request to the Butler server.

lsst/daf/butler/remote_butler/_remote_butler.py CHANGED Viewed

@@ -76,6 +76,7 @@ from ._query_results import convert_dataset_ref_results, read_query_results
 from ._ref_utils import apply_storage_class_override, normalize_dataset_type_name, simplify_dataId
 from ._registry import RemoteButlerRegistry
 from ._remote_butler_collections import RemoteButlerCollections
+from ._remote_file_transfer_source import RemoteFileTransferSource
 from .server_models import (
     CollectionList,
     FindDatasetRequestModel,
@@ -713,6 +714,10 @@ class RemoteButler(Butler):  # numpydoc ignore=PR02
             connection=self._connection, cache=self._cache, defaults=defaults, metrics=metrics
         )
+    @property
+    def _file_transfer_source(self) -> RemoteFileTransferSource:
+        return RemoteFileTransferSource(self._connection)
     def __str__(self) -> str:
         return f"RemoteButler({self._connection.server_url})"

lsst-daf-butler 29.1.0rc2__py3-none-any.whl → 29.1.0rc4__py3-none-any.whl

lsst-daf-butler 29.1.0rc2py3-none-any.whl → 29.1.0rc4py3-none-any.whl