lsst-daf-butler 30.0.0rc2__py3-none-any.whl → 30.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/daf/butler/_butler.py +8 -5
- lsst/daf/butler/_butler_metrics.py +49 -2
- lsst/daf/butler/_formatter.py +2 -7
- lsst/daf/butler/_labeled_butler_factory.py +28 -8
- lsst/daf/butler/_rubin/temporary_for_ingest.py +207 -0
- lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
- lsst/daf/butler/configs/storageClasses.yaml +15 -0
- lsst/daf/butler/datastore/record_data.py +1 -1
- lsst/daf/butler/datastores/fileDatastore.py +15 -12
- lsst/daf/butler/dimensions/_coordinate.py +5 -0
- lsst/daf/butler/direct_butler/_direct_butler.py +45 -28
- lsst/daf/butler/logging.py +9 -3
- lsst/daf/butler/registry/bridge/monolithic.py +17 -13
- lsst/daf/butler/registry/datasets/byDimensions/_manager.py +49 -45
- lsst/daf/butler/registry/expand_data_ids.py +93 -0
- lsst/daf/butler/registry/interfaces/_database.py +6 -1
- lsst/daf/butler/registry/sql_registry.py +2 -24
- lsst/daf/butler/remote_butler/_remote_butler.py +5 -1
- lsst/daf/butler/tests/hybrid_butler.py +4 -1
- lsst/daf/butler/tests/registry_data/lsstcam-subset.yaml +191 -0
- lsst/daf/butler/tests/testFormatters.py +2 -2
- lsst/daf/butler/transfers/_context.py +7 -6
- lsst/daf/butler/version.py +1 -1
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/METADATA +1 -1
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/RECORD +33 -30
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/WHEEL +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/entry_points.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/LICENSE +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/top_level.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.0rc3.dist-info}/zip-safe +0 -0
|
@@ -1822,12 +1822,25 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
1822
1822
|
f" Example: {existing_datasets[0]}"
|
|
1823
1823
|
)
|
|
1824
1824
|
|
|
1825
|
+
# Calculate some statistics based on the given list of datasets.
|
|
1826
|
+
n_files = len(datasets)
|
|
1827
|
+
n_datasets = 0
|
|
1828
|
+
for d in datasets:
|
|
1829
|
+
n_datasets += len(d.refs)
|
|
1830
|
+
sfiles = "s" if n_files != 1 else ""
|
|
1831
|
+
srefs = "s" if n_datasets != 1 else ""
|
|
1832
|
+
|
|
1825
1833
|
# We use `datasets` rather `new_datasets` for the Registry
|
|
1826
1834
|
# portion of this, to let it confirm that everything matches the
|
|
1827
1835
|
# existing datasets.
|
|
1828
1836
|
import_info = self._prepare_ingest_file_datasets(datasets, progress)
|
|
1829
1837
|
|
|
1830
|
-
with
|
|
1838
|
+
with (
|
|
1839
|
+
self._metrics.instrument_ingest(
|
|
1840
|
+
n_datasets, _LOG, msg=f"Ingesting {n_files} file{sfiles} with {n_datasets} dataset{srefs}"
|
|
1841
|
+
),
|
|
1842
|
+
self.transaction(),
|
|
1843
|
+
):
|
|
1831
1844
|
self._ingest_file_datasets(datasets, import_info, progress)
|
|
1832
1845
|
|
|
1833
1846
|
# Bulk-insert everything into Datastore.
|
|
@@ -1982,7 +1995,7 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
1982
1995
|
doImport(filename) # type: ignore
|
|
1983
1996
|
|
|
1984
1997
|
def transfer_dimension_records_from(
|
|
1985
|
-
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
|
|
1998
|
+
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate]
|
|
1986
1999
|
) -> None:
|
|
1987
2000
|
# Allowed dimensions in the target butler.
|
|
1988
2001
|
elements = frozenset(element for element in self.dimensions.elements if element.has_own_table)
|
|
@@ -2012,16 +2025,13 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
2012
2025
|
source_butler, data_ids, allowed_elements
|
|
2013
2026
|
)
|
|
2014
2027
|
|
|
2015
|
-
can_query = True if isinstance(source_butler, Butler) else False
|
|
2016
|
-
|
|
2017
2028
|
additional_records: dict[DimensionElement, dict[DataCoordinate, DimensionRecord]] = defaultdict(dict)
|
|
2018
2029
|
for original_element, record_mapping in primary_records.items():
|
|
2019
2030
|
# Get dimensions that depend on this dimension.
|
|
2020
2031
|
populated_by = self.dimensions.get_elements_populated_by(
|
|
2021
2032
|
self.dimensions[original_element.name] # type: ignore
|
|
2022
2033
|
)
|
|
2023
|
-
|
|
2024
|
-
for data_id in record_mapping.keys():
|
|
2034
|
+
if populated_by:
|
|
2025
2035
|
for element in populated_by:
|
|
2026
2036
|
if element not in allowed_elements:
|
|
2027
2037
|
continue
|
|
@@ -2040,28 +2050,32 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
2040
2050
|
# have to be scanned.
|
|
2041
2051
|
continue
|
|
2042
2052
|
|
|
2043
|
-
if
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2053
|
+
if record_mapping:
|
|
2054
|
+
if not isinstance(source_butler, Butler):
|
|
2055
|
+
raise RuntimeError(
|
|
2056
|
+
f"Transferring populated_by records like {element.name}"
|
|
2057
|
+
" requires a full Butler."
|
|
2058
|
+
)
|
|
2047
2059
|
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
additional_records[record.definition].setdefault(record.dataId, record)
|
|
2060
|
+
with source_butler.query() as query:
|
|
2061
|
+
records = query.join_data_coordinates(record_mapping.keys()).dimension_records(
|
|
2062
|
+
element.name
|
|
2063
|
+
)
|
|
2064
|
+
for record in records:
|
|
2065
|
+
additional_records[record.definition].setdefault(record.dataId, record)
|
|
2055
2066
|
|
|
2056
2067
|
# The next step is to walk back through the additional records to
|
|
2057
2068
|
# pick up any missing content (such as visit_definition needing to
|
|
2058
2069
|
# know the exposure). Want to ensure we do not request records we
|
|
2059
2070
|
# already have.
|
|
2060
2071
|
missing_data_ids = set()
|
|
2061
|
-
for
|
|
2072
|
+
for record_mapping in additional_records.values():
|
|
2062
2073
|
for data_id in record_mapping.keys():
|
|
2063
|
-
|
|
2064
|
-
|
|
2074
|
+
for dimension in data_id.dimensions.required:
|
|
2075
|
+
element = source_butler.dimensions[dimension]
|
|
2076
|
+
dimension_key = data_id.subset(dimension)
|
|
2077
|
+
if dimension_key not in primary_records[element]:
|
|
2078
|
+
missing_data_ids.add(dimension_key)
|
|
2065
2079
|
|
|
2066
2080
|
# Fill out the new records. Assume that these new records do not
|
|
2067
2081
|
# also need to carry over additional populated_by records.
|
|
@@ -2078,19 +2092,19 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
2078
2092
|
def _extract_dimension_records_from_data_ids(
|
|
2079
2093
|
self,
|
|
2080
2094
|
source_butler: LimitedButler | Butler,
|
|
2081
|
-
data_ids:
|
|
2095
|
+
data_ids: Iterable[DataCoordinate],
|
|
2082
2096
|
allowed_elements: frozenset[DimensionElement],
|
|
2083
2097
|
) -> dict[DimensionElement, dict[DataCoordinate, DimensionRecord]]:
|
|
2084
2098
|
dimension_records: dict[DimensionElement, dict[DataCoordinate, DimensionRecord]] = defaultdict(dict)
|
|
2085
2099
|
|
|
2100
|
+
data_ids = set(data_ids)
|
|
2101
|
+
if not all(data_id.hasRecords() for data_id in data_ids):
|
|
2102
|
+
if isinstance(source_butler, Butler):
|
|
2103
|
+
data_ids = source_butler._expand_data_ids(data_ids)
|
|
2104
|
+
else:
|
|
2105
|
+
raise TypeError("Input butler needs to be a full butler to expand DataId.")
|
|
2106
|
+
|
|
2086
2107
|
for data_id in data_ids:
|
|
2087
|
-
# Need an expanded record, if not expanded that we need a full
|
|
2088
|
-
# butler with registry (allow mocks with registry too).
|
|
2089
|
-
if not data_id.hasRecords():
|
|
2090
|
-
if registry := getattr(source_butler, "registry", None):
|
|
2091
|
-
data_id = registry.expandDataId(data_id)
|
|
2092
|
-
else:
|
|
2093
|
-
raise TypeError("Input butler needs to be a full butler to expand DataId.")
|
|
2094
2108
|
# If this butler doesn't know about a dimension in the source
|
|
2095
2109
|
# butler things will break later.
|
|
2096
2110
|
for element_name in data_id.dimensions.elements:
|
|
@@ -2569,6 +2583,9 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
2569
2583
|
"""Immediately load caches that are used for common operations."""
|
|
2570
2584
|
self._registry.preload_cache(load_dimension_record_cache=load_dimension_record_cache)
|
|
2571
2585
|
|
|
2586
|
+
def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
|
|
2587
|
+
return self._registry.expand_data_ids(data_ids)
|
|
2588
|
+
|
|
2572
2589
|
_config: ButlerConfig
|
|
2573
2590
|
"""Configuration for this Butler instance."""
|
|
2574
2591
|
|
lsst/daf/butler/logging.py
CHANGED
|
@@ -764,11 +764,17 @@ class ButlerLogRecords(MutableSequence[ButlerLogRecord]):
|
|
|
764
764
|
|
|
765
765
|
|
|
766
766
|
class ButlerLogRecordHandler(StreamHandler):
|
|
767
|
-
"""Python log handler that accumulates records.
|
|
767
|
+
"""Python log handler that accumulates records.
|
|
768
768
|
|
|
769
|
-
|
|
769
|
+
Parameters
|
|
770
|
+
----------
|
|
771
|
+
records : `ButlerLogRecords`, optional
|
|
772
|
+
Container to store logs in.
|
|
773
|
+
"""
|
|
774
|
+
|
|
775
|
+
def __init__(self, records: ButlerLogRecords | None = None) -> None:
|
|
770
776
|
super().__init__()
|
|
771
|
-
self.records = ButlerLogRecords([])
|
|
777
|
+
self.records = ButlerLogRecords([]) if records is None else records
|
|
772
778
|
|
|
773
779
|
def emit(self, record: LogRecord) -> None:
|
|
774
780
|
self.records.append(record)
|
|
@@ -215,20 +215,24 @@ class MonolithicDatastoreRegistryBridge(DatastoreRegistryBridge):
|
|
|
215
215
|
def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
|
|
216
216
|
# Docstring inherited from DatastoreRegistryBridge
|
|
217
217
|
byId = {ref.id: ref for ref in refs}
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
.
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
self._tables.dataset_location
|
|
224
|
-
|
|
218
|
+
found: list[DatasetIdRef] = []
|
|
219
|
+
with self._db.session():
|
|
220
|
+
for batch in chunk_iterable(byId.keys(), 50000):
|
|
221
|
+
sql = (
|
|
222
|
+
sqlalchemy.sql.select(self._tables.dataset_location.columns.dataset_id)
|
|
223
|
+
.select_from(self._tables.dataset_location)
|
|
224
|
+
.where(
|
|
225
|
+
sqlalchemy.sql.and_(
|
|
226
|
+
self._tables.dataset_location.columns.datastore_name == self.datastoreName,
|
|
227
|
+
self._tables.dataset_location.columns.dataset_id.in_(batch),
|
|
228
|
+
)
|
|
229
|
+
)
|
|
225
230
|
)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
yield byId[row.dataset_id]
|
|
231
|
+
with self._db.query(sql) as sql_result:
|
|
232
|
+
sql_ids = sql_result.scalars().all()
|
|
233
|
+
found.extend(byId[id] for id in sql_ids)
|
|
234
|
+
|
|
235
|
+
return found
|
|
232
236
|
|
|
233
237
|
@contextmanager
|
|
234
238
|
def emptyTrash(
|
|
@@ -12,6 +12,8 @@ from typing import TYPE_CHECKING, Any, ClassVar
|
|
|
12
12
|
import astropy.time
|
|
13
13
|
import sqlalchemy
|
|
14
14
|
|
|
15
|
+
from lsst.utils.iteration import chunk_iterable
|
|
16
|
+
|
|
15
17
|
from .... import ddl
|
|
16
18
|
from ...._collection_type import CollectionType
|
|
17
19
|
from ...._dataset_ref import DatasetId, DatasetIdFactory, DatasetIdGenEnum, DatasetRef
|
|
@@ -424,17 +426,18 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
|
|
|
424
426
|
return result
|
|
425
427
|
|
|
426
428
|
def get_dataset_refs(self, ids: list[DatasetId]) -> list[DatasetRef]:
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
id_col
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
429
|
+
dataset_type_map: dict[DatasetId, DatasetType] = {}
|
|
430
|
+
for batch in chunk_iterable(set(ids), 50000):
|
|
431
|
+
# Look up the dataset types corresponding to the given Dataset IDs.
|
|
432
|
+
id_col = self._static.dataset.columns["id"]
|
|
433
|
+
sql = sqlalchemy.sql.select(
|
|
434
|
+
id_col,
|
|
435
|
+
self._static.dataset.columns["dataset_type_id"],
|
|
436
|
+
).where(id_col.in_(batch))
|
|
437
|
+
with self._db.query(sql) as sql_result:
|
|
438
|
+
dataset_rows = sql_result.mappings().all()
|
|
439
|
+
for row in dataset_rows:
|
|
440
|
+
dataset_type_map[row["id"]] = self._get_dataset_type_by_id(row["dataset_type_id"])
|
|
438
441
|
|
|
439
442
|
# Group the given dataset IDs by the DimensionGroup of their dataset
|
|
440
443
|
# types -- there is a separate tags table for each DimensionGroup.
|
|
@@ -448,40 +451,41 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
|
|
|
448
451
|
# data IDs corresponding to the UUIDs found from the dataset table.
|
|
449
452
|
dynamic_tables = self._get_dynamic_tables(dimension_group)
|
|
450
453
|
tags_table = self._get_tags_table(dynamic_tables)
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
454
|
+
for batch in chunk_iterable(datasets, 50000):
|
|
455
|
+
tags_sql = tags_table.select().where(tags_table.columns["dataset_id"].in_(batch))
|
|
456
|
+
# Join in the collection table to fetch the run name.
|
|
457
|
+
collection_column = tags_table.columns[self._collections.getCollectionForeignKeyName()]
|
|
458
|
+
joined_collections = self._collections.join_collections_sql(collection_column, tags_sql)
|
|
459
|
+
tags_sql = joined_collections.joined_sql
|
|
460
|
+
run_name_column = joined_collections.name_column
|
|
461
|
+
tags_sql = tags_sql.add_columns(run_name_column)
|
|
462
|
+
# Tags table includes run collections and tagged
|
|
463
|
+
# collections.
|
|
464
|
+
# In theory the data ID for a given dataset should be the
|
|
465
|
+
# same in both, but nothing actually guarantees this.
|
|
466
|
+
# So skip any tagged collections, using the run collection
|
|
467
|
+
# as the definitive definition.
|
|
468
|
+
tags_sql = tags_sql.where(joined_collections.type_column == int(CollectionType.RUN))
|
|
469
|
+
|
|
470
|
+
with self._db.query(tags_sql) as sql_result:
|
|
471
|
+
data_id_rows = sql_result.mappings().all()
|
|
472
|
+
|
|
473
|
+
assert run_name_column.key is not None
|
|
474
|
+
for data_id_row in data_id_rows:
|
|
475
|
+
id = data_id_row["dataset_id"]
|
|
476
|
+
dataset_type = dataset_type_map[id]
|
|
477
|
+
run_name = data_id_row[run_name_column.key]
|
|
478
|
+
data_id = DataCoordinate.from_required_values(
|
|
479
|
+
dimension_group,
|
|
480
|
+
tuple(data_id_row[dimension] for dimension in dimension_group.required),
|
|
481
|
+
)
|
|
482
|
+
ref = DatasetRef(
|
|
483
|
+
datasetType=dataset_type,
|
|
484
|
+
dataId=data_id,
|
|
485
|
+
id=id,
|
|
486
|
+
run=run_name,
|
|
487
|
+
)
|
|
488
|
+
output_refs.append(ref)
|
|
485
489
|
|
|
486
490
|
return output_refs
|
|
487
491
|
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# This file is part of daf_butler.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from collections import defaultdict
|
|
31
|
+
from collections.abc import Iterable
|
|
32
|
+
|
|
33
|
+
from ..dimensions import (
|
|
34
|
+
DataCoordinate,
|
|
35
|
+
DimensionDataAttacher,
|
|
36
|
+
DimensionGroup,
|
|
37
|
+
DimensionUniverse,
|
|
38
|
+
)
|
|
39
|
+
from ..dimensions.record_cache import DimensionRecordCache
|
|
40
|
+
from ..queries import QueryFactoryFunction
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def expand_data_ids(
|
|
44
|
+
data_ids: Iterable[DataCoordinate],
|
|
45
|
+
universe: DimensionUniverse,
|
|
46
|
+
query_func: QueryFactoryFunction,
|
|
47
|
+
cache: DimensionRecordCache | None,
|
|
48
|
+
) -> list[DataCoordinate]:
|
|
49
|
+
"""Expand the given data IDs to look up implied dimension values and attach
|
|
50
|
+
dimension records.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
data_ids : `~collections.abc.Iterable` [ `DataCoordinate` ]
|
|
55
|
+
Data coordinates to be expanded.
|
|
56
|
+
universe : `DimensionUniverse`
|
|
57
|
+
Dimension universe associated with the given ``data_ids`` values.
|
|
58
|
+
query_func : QueryFactoryFunction
|
|
59
|
+
Function used to set up a Butler query context for looking up required
|
|
60
|
+
information from the database.
|
|
61
|
+
cache : `DimensionRecordCache` | None
|
|
62
|
+
Cache containing already-known dimension records. May be `None` if a
|
|
63
|
+
cache is not available.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
expanded : `list` [ `DataCoordinate` ]
|
|
68
|
+
List of `DataCoordinate` instances in the same order as the input
|
|
69
|
+
values. It is guaranteed that each `DataCoordinate` has
|
|
70
|
+
``hasRecords()=True`` and ``hasFull()=True``.
|
|
71
|
+
"""
|
|
72
|
+
output = list(data_ids)
|
|
73
|
+
|
|
74
|
+
grouped_by_dimensions: defaultdict[DimensionGroup, list[int]] = defaultdict(list)
|
|
75
|
+
for i, data_id in enumerate(data_ids):
|
|
76
|
+
if not data_id.hasRecords():
|
|
77
|
+
grouped_by_dimensions[data_id.dimensions].append(i)
|
|
78
|
+
|
|
79
|
+
if not grouped_by_dimensions:
|
|
80
|
+
# All given DataCoordinate values are already expanded.
|
|
81
|
+
return output
|
|
82
|
+
|
|
83
|
+
attacher = DimensionDataAttacher(
|
|
84
|
+
cache=cache,
|
|
85
|
+
dimensions=DimensionGroup.union(*grouped_by_dimensions.keys(), universe=universe),
|
|
86
|
+
)
|
|
87
|
+
for dimensions, indexes in grouped_by_dimensions.items():
|
|
88
|
+
with query_func() as query:
|
|
89
|
+
expanded = attacher.attach(dimensions, (output[index] for index in indexes), query)
|
|
90
|
+
for index, data_id in zip(indexes, expanded):
|
|
91
|
+
output[index] = data_id
|
|
92
|
+
|
|
93
|
+
return output
|
|
@@ -1562,7 +1562,12 @@ class Database(ABC):
|
|
|
1562
1562
|
return None
|
|
1563
1563
|
else:
|
|
1564
1564
|
sql = table.insert()
|
|
1565
|
-
|
|
1565
|
+
ids = []
|
|
1566
|
+
for row in rows:
|
|
1567
|
+
key = connection.execute(sql, row).inserted_primary_key
|
|
1568
|
+
assert key is not None
|
|
1569
|
+
ids.append(key[0])
|
|
1570
|
+
return ids
|
|
1566
1571
|
|
|
1567
1572
|
@abstractmethod
|
|
1568
1573
|
def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
|
|
@@ -34,7 +34,6 @@ __all__ = ("SqlRegistry",)
|
|
|
34
34
|
import contextlib
|
|
35
35
|
import logging
|
|
36
36
|
import warnings
|
|
37
|
-
from collections import defaultdict
|
|
38
37
|
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
39
38
|
from typing import TYPE_CHECKING, Any
|
|
40
39
|
|
|
@@ -54,7 +53,6 @@ from ..dimensions import (
|
|
|
54
53
|
DataCoordinate,
|
|
55
54
|
DataId,
|
|
56
55
|
DimensionConfig,
|
|
57
|
-
DimensionDataAttacher,
|
|
58
56
|
DimensionElement,
|
|
59
57
|
DimensionGroup,
|
|
60
58
|
DimensionRecord,
|
|
@@ -78,6 +76,7 @@ from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError
|
|
|
78
76
|
from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
|
|
79
77
|
from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
|
|
80
78
|
from ..utils import transactional
|
|
79
|
+
from .expand_data_ids import expand_data_ids
|
|
81
80
|
|
|
82
81
|
if TYPE_CHECKING:
|
|
83
82
|
from .._butler_config import ButlerConfig
|
|
@@ -1415,28 +1414,7 @@ class SqlRegistry:
|
|
|
1415
1414
|
return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records)
|
|
1416
1415
|
|
|
1417
1416
|
def expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
grouped_by_dimensions: defaultdict[DimensionGroup, list[int]] = defaultdict(list)
|
|
1421
|
-
for i, data_id in enumerate(data_ids):
|
|
1422
|
-
if not data_id.hasRecords():
|
|
1423
|
-
grouped_by_dimensions[data_id.dimensions].append(i)
|
|
1424
|
-
|
|
1425
|
-
if not grouped_by_dimensions:
|
|
1426
|
-
# All given DataCoordinate values are already expanded.
|
|
1427
|
-
return output
|
|
1428
|
-
|
|
1429
|
-
attacher = DimensionDataAttacher(
|
|
1430
|
-
cache=self.dimension_record_cache,
|
|
1431
|
-
dimensions=DimensionGroup.union(*grouped_by_dimensions.keys(), universe=self.dimensions),
|
|
1432
|
-
)
|
|
1433
|
-
with self._query() as query:
|
|
1434
|
-
for dimensions, indexes in grouped_by_dimensions.items():
|
|
1435
|
-
expanded = attacher.attach(dimensions, (output[index] for index in indexes), query)
|
|
1436
|
-
for index, data_id in zip(indexes, expanded):
|
|
1437
|
-
output[index] = data_id
|
|
1438
|
-
|
|
1439
|
-
return output
|
|
1417
|
+
return expand_data_ids(data_ids, self.dimensions, self._query, self.dimension_record_cache)
|
|
1440
1418
|
|
|
1441
1419
|
def expand_refs(self, dataset_refs: list[DatasetRef]) -> list[DatasetRef]:
|
|
1442
1420
|
expanded_ids = self.expand_data_ids([ref.dataId for ref in dataset_refs])
|
|
@@ -65,6 +65,7 @@ from ..dimensions import DataCoordinate, DataIdValue, DimensionConfig, Dimension
|
|
|
65
65
|
from ..queries import Query
|
|
66
66
|
from ..queries.tree import make_column_literal
|
|
67
67
|
from ..registry import CollectionArgType, NoDefaultCollectionError, Registry, RegistryDefaults
|
|
68
|
+
from ..registry.expand_data_ids import expand_data_ids
|
|
68
69
|
from ._collection_args import convert_collection_arg_to_glob_string_list
|
|
69
70
|
from ._defaults import DefaultsHolder
|
|
70
71
|
from ._get import convert_http_url_to_resource_path, get_dataset_as_python_object
|
|
@@ -633,7 +634,7 @@ class RemoteButler(Butler): # numpydoc ignore=PR02
|
|
|
633
634
|
raise NotImplementedError()
|
|
634
635
|
|
|
635
636
|
def transfer_dimension_records_from(
|
|
636
|
-
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
|
|
637
|
+
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate]
|
|
637
638
|
) -> None:
|
|
638
639
|
# Docstring inherited.
|
|
639
640
|
raise NotImplementedError()
|
|
@@ -738,6 +739,9 @@ class RemoteButler(Butler): # numpydoc ignore=PR02
|
|
|
738
739
|
def close(self) -> None:
|
|
739
740
|
pass
|
|
740
741
|
|
|
742
|
+
def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
|
|
743
|
+
return expand_data_ids(data_ids, self.dimensions, self.query, None)
|
|
744
|
+
|
|
741
745
|
@property
|
|
742
746
|
def _file_transfer_source(self) -> RemoteFileTransferSource:
|
|
743
747
|
return RemoteFileTransferSource(self._connection)
|
|
@@ -338,7 +338,7 @@ class HybridButler(Butler):
|
|
|
338
338
|
)
|
|
339
339
|
|
|
340
340
|
def transfer_dimension_records_from(
|
|
341
|
-
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
|
|
341
|
+
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate]
|
|
342
342
|
) -> None:
|
|
343
343
|
return self._direct_butler.transfer_dimension_records_from(source_butler, source_refs)
|
|
344
344
|
|
|
@@ -425,6 +425,9 @@ class HybridButler(Butler):
|
|
|
425
425
|
source_butler, data_ids, allowed_elements
|
|
426
426
|
)
|
|
427
427
|
|
|
428
|
+
def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
|
|
429
|
+
return self._remote_butler._expand_data_ids(data_ids)
|
|
430
|
+
|
|
428
431
|
@property
|
|
429
432
|
def collection_chains(self) -> ButlerCollections:
|
|
430
433
|
return HybridButlerCollections(self)
|