lsst-daf-butler 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/daf/butler/_butler.py +27 -8
- lsst/daf/butler/_butler_collections.py +4 -4
- lsst/daf/butler/_butler_metrics.py +51 -2
- lsst/daf/butler/_dataset_provenance.py +1 -1
- lsst/daf/butler/_dataset_ref.py +1 -1
- lsst/daf/butler/_exceptions.py +2 -2
- lsst/daf/butler/_file_dataset.py +2 -1
- lsst/daf/butler/_formatter.py +14 -7
- lsst/daf/butler/_labeled_butler_factory.py +28 -8
- lsst/daf/butler/_query_all_datasets.py +2 -0
- lsst/daf/butler/_rubin/temporary_for_ingest.py +207 -0
- lsst/daf/butler/cli/cmd/_remove_runs.py +1 -12
- lsst/daf/butler/column_spec.py +4 -4
- lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
- lsst/daf/butler/configs/storageClasses.yaml +15 -0
- lsst/daf/butler/datastore/_datastore.py +21 -1
- lsst/daf/butler/datastore/record_data.py +1 -1
- lsst/daf/butler/datastore/stored_file_info.py +2 -2
- lsst/daf/butler/datastores/chainedDatastore.py +4 -0
- lsst/daf/butler/datastores/fileDatastore.py +26 -13
- lsst/daf/butler/datastores/file_datastore/get.py +4 -4
- lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py +5 -1
- lsst/daf/butler/datastores/file_datastore/transfer.py +2 -2
- lsst/daf/butler/datastores/inMemoryDatastore.py +8 -0
- lsst/daf/butler/ddl.py +2 -2
- lsst/daf/butler/dimensions/_coordinate.py +11 -8
- lsst/daf/butler/dimensions/_record_set.py +1 -1
- lsst/daf/butler/dimensions/_records.py +9 -3
- lsst/daf/butler/direct_butler/_direct_butler.py +85 -51
- lsst/daf/butler/direct_query_driver/_driver.py +5 -4
- lsst/daf/butler/direct_query_driver/_result_page_converter.py +1 -1
- lsst/daf/butler/formatters/parquet.py +6 -6
- lsst/daf/butler/logging.py +9 -3
- lsst/daf/butler/nonempty_mapping.py +1 -1
- lsst/daf/butler/persistence_context.py +8 -5
- lsst/daf/butler/queries/_general_query_results.py +1 -1
- lsst/daf/butler/queries/driver.py +1 -1
- lsst/daf/butler/queries/expression_factory.py +2 -2
- lsst/daf/butler/queries/expressions/parser/exprTree.py +1 -1
- lsst/daf/butler/queries/expressions/parser/parserYacc.py +1 -1
- lsst/daf/butler/queries/overlaps.py +2 -2
- lsst/daf/butler/queries/tree/_column_set.py +1 -1
- lsst/daf/butler/registry/_collection_record_cache.py +1 -1
- lsst/daf/butler/registry/_collection_summary_cache.py +5 -4
- lsst/daf/butler/registry/_registry.py +4 -0
- lsst/daf/butler/registry/bridge/monolithic.py +17 -13
- lsst/daf/butler/registry/databases/postgresql.py +2 -1
- lsst/daf/butler/registry/datasets/byDimensions/_dataset_type_cache.py +1 -1
- lsst/daf/butler/registry/datasets/byDimensions/_manager.py +53 -47
- lsst/daf/butler/registry/datasets/byDimensions/summaries.py +3 -2
- lsst/daf/butler/registry/expand_data_ids.py +93 -0
- lsst/daf/butler/registry/interfaces/_database.py +6 -1
- lsst/daf/butler/registry/interfaces/_datasets.py +2 -1
- lsst/daf/butler/registry/interfaces/_obscore.py +1 -1
- lsst/daf/butler/registry/obscore/_records.py +1 -1
- lsst/daf/butler/registry/obscore/_spatial.py +2 -2
- lsst/daf/butler/registry/queries/_results.py +2 -2
- lsst/daf/butler/registry/sql_registry.py +3 -25
- lsst/daf/butler/registry/wildcards.py +5 -5
- lsst/daf/butler/remote_butler/_get.py +1 -1
- lsst/daf/butler/remote_butler/_remote_butler.py +6 -1
- lsst/daf/butler/remote_butler/_remote_file_transfer_source.py +4 -0
- lsst/daf/butler/remote_butler/authentication/cadc.py +4 -3
- lsst/daf/butler/script/_pruneDatasets.py +4 -2
- lsst/daf/butler/script/configValidate.py +2 -2
- lsst/daf/butler/script/queryCollections.py +2 -2
- lsst/daf/butler/script/removeCollections.py +2 -0
- lsst/daf/butler/script/removeRuns.py +2 -0
- lsst/daf/butler/tests/cliCmdTestBase.py +2 -0
- lsst/daf/butler/tests/cliLogTestBase.py +2 -0
- lsst/daf/butler/tests/hybrid_butler.py +10 -2
- lsst/daf/butler/tests/registry_data/lsstcam-subset.yaml +191 -0
- lsst/daf/butler/tests/registry_data/spatial.py +4 -2
- lsst/daf/butler/tests/testFormatters.py +2 -2
- lsst/daf/butler/tests/utils.py +1 -1
- lsst/daf/butler/timespan_database_representation.py +3 -3
- lsst/daf/butler/transfers/_context.py +7 -6
- lsst/daf/butler/version.py +1 -1
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/METADATA +3 -2
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/RECORD +88 -85
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/WHEEL +1 -1
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/entry_points.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/LICENSE +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/top_level.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/zip-safe +0 -0
|
@@ -610,15 +610,15 @@ class DirectQueryDriver(QueryDriver):
|
|
|
610
610
|
----------
|
|
611
611
|
tree : `.queries.tree.QueryTree`
|
|
612
612
|
Description of the joins and row filters in the query.
|
|
613
|
+
allow_duplicate_overlaps : `bool`, optional
|
|
614
|
+
If set to `True` then query will be allowed to generate
|
|
615
|
+
non-distinct rows for spatial overlaps.
|
|
613
616
|
|
|
614
617
|
Returns
|
|
615
618
|
-------
|
|
616
619
|
tree_analysis : `QueryTreeAnalysis`
|
|
617
620
|
Struct containing additional information need to build the joins
|
|
618
621
|
stage of a query.
|
|
619
|
-
allow_duplicate_overlaps : `bool`, optional
|
|
620
|
-
If set to `True` then query will be allowed to generate
|
|
621
|
-
non-distinct rows for spatial overlaps.
|
|
622
622
|
|
|
623
623
|
Notes
|
|
624
624
|
-----
|
|
@@ -1313,7 +1313,8 @@ class DirectQueryDriver(QueryDriver):
|
|
|
1313
1313
|
Mapping of collection names to collection records, must contain
|
|
1314
1314
|
records for all collections in ``collection_names`` and all their
|
|
1315
1315
|
children collections.
|
|
1316
|
-
summaries : `~collections.abc.Mapping` [`Any`,
|
|
1316
|
+
summaries : `~collections.abc.Mapping` [`typing.Any`, \
|
|
1317
|
+
`CollectionSummary`]
|
|
1317
1318
|
Mapping of collection IDs to collection summaries, must contain
|
|
1318
1319
|
summaries for all non-chained collections in the collection tree.
|
|
1319
1320
|
|
|
@@ -270,18 +270,18 @@ def arrow_to_pandas(arrow_table: pa.Table) -> pd.DataFrame:
|
|
|
270
270
|
|
|
271
271
|
|
|
272
272
|
def arrow_to_astropy(arrow_table: pa.Table) -> atable.Table:
|
|
273
|
-
"""Convert a pyarrow table to an `astropy.Table`.
|
|
273
|
+
"""Convert a pyarrow table to an `astropy.table.Table`.
|
|
274
274
|
|
|
275
275
|
Parameters
|
|
276
276
|
----------
|
|
277
277
|
arrow_table : `pyarrow.Table`
|
|
278
278
|
Input arrow table to convert. If the table has astropy unit
|
|
279
279
|
metadata in the schema it will be used in the construction
|
|
280
|
-
of the ``astropy.Table``.
|
|
280
|
+
of the ``astropy.table.Table``.
|
|
281
281
|
|
|
282
282
|
Returns
|
|
283
283
|
-------
|
|
284
|
-
table : `astropy.Table`
|
|
284
|
+
table : `astropy.table.Table`
|
|
285
285
|
Converted astropy table.
|
|
286
286
|
"""
|
|
287
287
|
from astropy.table import Table
|
|
@@ -520,7 +520,7 @@ def astropy_to_arrow(astropy_table: atable.Table) -> pa.Table:
|
|
|
520
520
|
|
|
521
521
|
Parameters
|
|
522
522
|
----------
|
|
523
|
-
astropy_table : `astropy.Table`
|
|
523
|
+
astropy_table : `astropy.table.Table`
|
|
524
524
|
Input astropy table.
|
|
525
525
|
|
|
526
526
|
Returns
|
|
@@ -584,7 +584,7 @@ def astropy_to_pandas(astropy_table: atable.Table, index: str | None = None) ->
|
|
|
584
584
|
|
|
585
585
|
Parameters
|
|
586
586
|
----------
|
|
587
|
-
astropy_table : `astropy.Table`
|
|
587
|
+
astropy_table : `astropy.table.Table`
|
|
588
588
|
Input astropy table.
|
|
589
589
|
index : `str`, optional
|
|
590
590
|
Name of column to set as index.
|
|
@@ -640,7 +640,7 @@ def _astropy_to_numpy_dict(astropy_table: atable.Table) -> dict[str, np.ndarray]
|
|
|
640
640
|
|
|
641
641
|
Parameters
|
|
642
642
|
----------
|
|
643
|
-
astropy_table : `astropy.Table`
|
|
643
|
+
astropy_table : `astropy.table.Table`
|
|
644
644
|
Input astropy table.
|
|
645
645
|
|
|
646
646
|
Returns
|
lsst/daf/butler/logging.py
CHANGED
|
@@ -764,11 +764,17 @@ class ButlerLogRecords(MutableSequence[ButlerLogRecord]):
|
|
|
764
764
|
|
|
765
765
|
|
|
766
766
|
class ButlerLogRecordHandler(StreamHandler):
|
|
767
|
-
"""Python log handler that accumulates records.
|
|
767
|
+
"""Python log handler that accumulates records.
|
|
768
768
|
|
|
769
|
-
|
|
769
|
+
Parameters
|
|
770
|
+
----------
|
|
771
|
+
records : `ButlerLogRecords`, optional
|
|
772
|
+
Container to store logs in.
|
|
773
|
+
"""
|
|
774
|
+
|
|
775
|
+
def __init__(self, records: ButlerLogRecords | None = None) -> None:
|
|
770
776
|
super().__init__()
|
|
771
|
-
self.records = ButlerLogRecords([])
|
|
777
|
+
self.records = ButlerLogRecords([]) if records is None else records
|
|
772
778
|
|
|
773
779
|
def emit(self, record: LogRecord) -> None:
|
|
774
780
|
self.records.append(record)
|
|
@@ -43,7 +43,7 @@ _V = TypeVar("_V", bound=Copyable, covariant=True)
|
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class NonemptyMapping(Mapping[_K, _V]):
|
|
46
|
-
"""A
|
|
46
|
+
"""A `~collections.abc.Mapping` that implicitly adds values (like
|
|
47
47
|
`~collections.defaultdict`) but treats any that evaluate to `False` as not
|
|
48
48
|
present.
|
|
49
49
|
|
|
@@ -180,17 +180,20 @@ class PersistenceContextVars:
|
|
|
180
180
|
|
|
181
181
|
Parameters
|
|
182
182
|
----------
|
|
183
|
-
function : `Callable`
|
|
183
|
+
function : `collections.abc.Callable`
|
|
184
184
|
A callable which is to be executed inside a specific context.
|
|
185
185
|
*args : tuple
|
|
186
|
-
Positional arguments which are to be passed to the
|
|
186
|
+
Positional arguments which are to be passed to the
|
|
187
|
+
`~collections.abc.Callable`.
|
|
187
188
|
**kwargs : dict, optional
|
|
188
|
-
Extra key word arguments which are to be passed to the
|
|
189
|
+
Extra key word arguments which are to be passed to the
|
|
190
|
+
`~collections.abc.Callable`.
|
|
189
191
|
|
|
190
192
|
Returns
|
|
191
193
|
-------
|
|
192
|
-
result : `Any`
|
|
193
|
-
The result returned by executing the supplied
|
|
194
|
+
result : `typing.Any`
|
|
195
|
+
The result returned by executing the supplied
|
|
196
|
+
`~collections.abc.Callable`.
|
|
194
197
|
"""
|
|
195
198
|
self._ctx = copy_context()
|
|
196
199
|
# Type checkers seem to have trouble with a second layer nesting of
|
|
@@ -93,7 +93,7 @@ class GeneralQueryResults(QueryResultsBase):
|
|
|
93
93
|
|
|
94
94
|
Yields
|
|
95
95
|
------
|
|
96
|
-
row_dict : `dict` [`str`, `Any`]
|
|
96
|
+
row_dict : `dict` [`str`, `typing.Any`]
|
|
97
97
|
Result row as dictionary, the keys are the names of the dimensions,
|
|
98
98
|
dimension fields (separated from dimension by dot) or dataset type
|
|
99
99
|
fields (separated from dataset type name by dot).
|
|
@@ -245,7 +245,7 @@ class QueryDriver(AbstractContextManager[None]):
|
|
|
245
245
|
----------
|
|
246
246
|
dimensions : `DimensionGroup`
|
|
247
247
|
Dimensions of the data coordinates.
|
|
248
|
-
rows :
|
|
248
|
+
rows : `~collections.abc.Iterable` [ `tuple` ]
|
|
249
249
|
Tuples of data coordinate values, covering just the "required"
|
|
250
250
|
subset of ``dimensions``.
|
|
251
251
|
|
|
@@ -557,7 +557,7 @@ class ExpressionFactory:
|
|
|
557
557
|
-------
|
|
558
558
|
logical_and : `tree.Predicate`
|
|
559
559
|
A boolean expression that evaluates to `True` only if all operands
|
|
560
|
-
evaluate to `True
|
|
560
|
+
evaluate to `True`.
|
|
561
561
|
"""
|
|
562
562
|
return first.logical_and(*args)
|
|
563
563
|
|
|
@@ -575,7 +575,7 @@ class ExpressionFactory:
|
|
|
575
575
|
-------
|
|
576
576
|
logical_or : `tree.Predicate`
|
|
577
577
|
A boolean expression that evaluates to `True` if any operand
|
|
578
|
-
evaluates to `True
|
|
578
|
+
evaluates to `True`.
|
|
579
579
|
"""
|
|
580
580
|
return first.logical_or(*args)
|
|
581
581
|
|
|
@@ -429,7 +429,7 @@ class ColumnOrder:
|
|
|
429
429
|
|
|
430
430
|
Parameters
|
|
431
431
|
----------
|
|
432
|
-
row :
|
|
432
|
+
row : `~collections.abc.Sequence` [ `DataIdValue` ]
|
|
433
433
|
A row output by the SQL query associated with these columns.
|
|
434
434
|
"""
|
|
435
435
|
return row[: len(self._dimension_keys)]
|
|
@@ -54,7 +54,8 @@ class CollectionSummaryCache:
|
|
|
54
54
|
|
|
55
55
|
Parameters
|
|
56
56
|
----------
|
|
57
|
-
summaries : `~collections.abc.Mapping` [`Any`,
|
|
57
|
+
summaries : `~collections.abc.Mapping` [`typing.Any`, \
|
|
58
|
+
`CollectionSummary`]
|
|
58
59
|
Summary records indexed by collection key, records must include all
|
|
59
60
|
dataset types.
|
|
60
61
|
"""
|
|
@@ -65,15 +66,15 @@ class CollectionSummaryCache:
|
|
|
65
66
|
|
|
66
67
|
Parameters
|
|
67
68
|
----------
|
|
68
|
-
keys : `~collections.abc.Iterable` [`Any`]
|
|
69
|
+
keys : `~collections.abc.Iterable` [`typing.Any`]
|
|
69
70
|
Sequence of collection keys.
|
|
70
71
|
|
|
71
72
|
Returns
|
|
72
73
|
-------
|
|
73
|
-
summaries : `dict` [`Any`, `CollectionSummary`]
|
|
74
|
+
summaries : `dict` [`typing.Any`, `CollectionSummary`]
|
|
74
75
|
Dictionary of summaries indexed by collection keys, includes
|
|
75
76
|
records found in the cache.
|
|
76
|
-
missing_keys : `set` [`Any`]
|
|
77
|
+
missing_keys : `set` [`typing.Any`]
|
|
77
78
|
Collection keys that are not present in the cache.
|
|
78
79
|
"""
|
|
79
80
|
found = {}
|
|
@@ -437,6 +437,10 @@ class Registry(ABC):
|
|
|
437
437
|
Name of the type to be removed or tuple containing a list of type
|
|
438
438
|
names to be removed. Wildcards are allowed.
|
|
439
439
|
|
|
440
|
+
Returns
|
|
441
|
+
-------
|
|
442
|
+
None
|
|
443
|
+
|
|
440
444
|
Raises
|
|
441
445
|
------
|
|
442
446
|
lsst.daf.butler.registry.OrphanedRecordError
|
|
@@ -215,20 +215,24 @@ class MonolithicDatastoreRegistryBridge(DatastoreRegistryBridge):
|
|
|
215
215
|
def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
|
|
216
216
|
# Docstring inherited from DatastoreRegistryBridge
|
|
217
217
|
byId = {ref.id: ref for ref in refs}
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
.
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
self._tables.dataset_location
|
|
224
|
-
|
|
218
|
+
found: list[DatasetIdRef] = []
|
|
219
|
+
with self._db.session():
|
|
220
|
+
for batch in chunk_iterable(byId.keys(), 50000):
|
|
221
|
+
sql = (
|
|
222
|
+
sqlalchemy.sql.select(self._tables.dataset_location.columns.dataset_id)
|
|
223
|
+
.select_from(self._tables.dataset_location)
|
|
224
|
+
.where(
|
|
225
|
+
sqlalchemy.sql.and_(
|
|
226
|
+
self._tables.dataset_location.columns.datastore_name == self.datastoreName,
|
|
227
|
+
self._tables.dataset_location.columns.dataset_id.in_(batch),
|
|
228
|
+
)
|
|
229
|
+
)
|
|
225
230
|
)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
yield byId[row.dataset_id]
|
|
231
|
+
with self._db.query(sql) as sql_result:
|
|
232
|
+
sql_ids = sql_result.scalars().all()
|
|
233
|
+
found.extend(byId[id] for id in sql_ids)
|
|
234
|
+
|
|
235
|
+
return found
|
|
232
236
|
|
|
233
237
|
@contextmanager
|
|
234
238
|
def emptyTrash(
|
|
@@ -92,7 +92,8 @@ class PostgresqlDatabase(Database):
|
|
|
92
92
|
allow_temporary_tables: bool = True,
|
|
93
93
|
):
|
|
94
94
|
with engine.connect() as connection:
|
|
95
|
-
# `Any` to make mypy ignore the line below, can't
|
|
95
|
+
# `typing.Any` to make mypy ignore the line below, can't
|
|
96
|
+
# use type: ignore
|
|
96
97
|
dbapi: Any = connection.connection
|
|
97
98
|
try:
|
|
98
99
|
dsn = dbapi.get_dsn_parameters()
|
|
@@ -155,7 +155,7 @@ class DatasetTypeCache:
|
|
|
155
155
|
dataset_type : `DatasetType` or `None`
|
|
156
156
|
Cached dataset type, `None` is returned if the name is not in the
|
|
157
157
|
cache.
|
|
158
|
-
extra : `Any` or `None`
|
|
158
|
+
extra : `typing.Any` or `None`
|
|
159
159
|
Cached opaque data, `None` is returned if the name is not in the
|
|
160
160
|
cache.
|
|
161
161
|
"""
|
|
@@ -12,6 +12,8 @@ from typing import TYPE_CHECKING, Any, ClassVar
|
|
|
12
12
|
import astropy.time
|
|
13
13
|
import sqlalchemy
|
|
14
14
|
|
|
15
|
+
from lsst.utils.iteration import chunk_iterable
|
|
16
|
+
|
|
15
17
|
from .... import ddl
|
|
16
18
|
from ...._collection_type import CollectionType
|
|
17
19
|
from ...._dataset_ref import DatasetId, DatasetIdFactory, DatasetIdGenEnum, DatasetRef
|
|
@@ -424,17 +426,18 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
|
|
|
424
426
|
return result
|
|
425
427
|
|
|
426
428
|
def get_dataset_refs(self, ids: list[DatasetId]) -> list[DatasetRef]:
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
id_col
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
429
|
+
dataset_type_map: dict[DatasetId, DatasetType] = {}
|
|
430
|
+
for batch in chunk_iterable(set(ids), 50000):
|
|
431
|
+
# Look up the dataset types corresponding to the given Dataset IDs.
|
|
432
|
+
id_col = self._static.dataset.columns["id"]
|
|
433
|
+
sql = sqlalchemy.sql.select(
|
|
434
|
+
id_col,
|
|
435
|
+
self._static.dataset.columns["dataset_type_id"],
|
|
436
|
+
).where(id_col.in_(batch))
|
|
437
|
+
with self._db.query(sql) as sql_result:
|
|
438
|
+
dataset_rows = sql_result.mappings().all()
|
|
439
|
+
for row in dataset_rows:
|
|
440
|
+
dataset_type_map[row["id"]] = self._get_dataset_type_by_id(row["dataset_type_id"])
|
|
438
441
|
|
|
439
442
|
# Group the given dataset IDs by the DimensionGroup of their dataset
|
|
440
443
|
# types -- there is a separate tags table for each DimensionGroup.
|
|
@@ -448,40 +451,41 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
|
|
|
448
451
|
# data IDs corresponding to the UUIDs found from the dataset table.
|
|
449
452
|
dynamic_tables = self._get_dynamic_tables(dimension_group)
|
|
450
453
|
tags_table = self._get_tags_table(dynamic_tables)
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
454
|
+
for batch in chunk_iterable(datasets, 50000):
|
|
455
|
+
tags_sql = tags_table.select().where(tags_table.columns["dataset_id"].in_(batch))
|
|
456
|
+
# Join in the collection table to fetch the run name.
|
|
457
|
+
collection_column = tags_table.columns[self._collections.getCollectionForeignKeyName()]
|
|
458
|
+
joined_collections = self._collections.join_collections_sql(collection_column, tags_sql)
|
|
459
|
+
tags_sql = joined_collections.joined_sql
|
|
460
|
+
run_name_column = joined_collections.name_column
|
|
461
|
+
tags_sql = tags_sql.add_columns(run_name_column)
|
|
462
|
+
# Tags table includes run collections and tagged
|
|
463
|
+
# collections.
|
|
464
|
+
# In theory the data ID for a given dataset should be the
|
|
465
|
+
# same in both, but nothing actually guarantees this.
|
|
466
|
+
# So skip any tagged collections, using the run collection
|
|
467
|
+
# as the definitive definition.
|
|
468
|
+
tags_sql = tags_sql.where(joined_collections.type_column == int(CollectionType.RUN))
|
|
469
|
+
|
|
470
|
+
with self._db.query(tags_sql) as sql_result:
|
|
471
|
+
data_id_rows = sql_result.mappings().all()
|
|
472
|
+
|
|
473
|
+
assert run_name_column.key is not None
|
|
474
|
+
for data_id_row in data_id_rows:
|
|
475
|
+
id = data_id_row["dataset_id"]
|
|
476
|
+
dataset_type = dataset_type_map[id]
|
|
477
|
+
run_name = data_id_row[run_name_column.key]
|
|
478
|
+
data_id = DataCoordinate.from_required_values(
|
|
479
|
+
dimension_group,
|
|
480
|
+
tuple(data_id_row[dimension] for dimension in dimension_group.required),
|
|
481
|
+
)
|
|
482
|
+
ref = DatasetRef(
|
|
483
|
+
datasetType=dataset_type,
|
|
484
|
+
dataId=data_id,
|
|
485
|
+
id=id,
|
|
486
|
+
run=run_name,
|
|
487
|
+
)
|
|
488
|
+
output_refs.append(ref)
|
|
485
489
|
|
|
486
490
|
return output_refs
|
|
487
491
|
|
|
@@ -818,8 +822,10 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
|
|
|
818
822
|
|
|
819
823
|
Parameters
|
|
820
824
|
----------
|
|
821
|
-
|
|
822
|
-
|
|
825
|
+
dimensions : `DimensionGroup`
|
|
826
|
+
Dimensions to validate.
|
|
827
|
+
tags : `sqlalchemy.schema.Table`
|
|
828
|
+
???
|
|
823
829
|
tmp_tags : `sqlalchemy.schema.Table`
|
|
824
830
|
Temporary table with new datasets and the same schema as tags
|
|
825
831
|
table.
|
|
@@ -304,13 +304,14 @@ class CollectionSummaryManager:
|
|
|
304
304
|
dataset_type_names : `~collections.abc.Iterable` [`str`]
|
|
305
305
|
Names of dataset types to include into returned summaries. If
|
|
306
306
|
`None` then all dataset types will be included.
|
|
307
|
-
dataset_type_factory :
|
|
307
|
+
dataset_type_factory : `~collections.abc.Callable`
|
|
308
308
|
Method that takes a table row and make `DatasetType` instance out
|
|
309
309
|
of it.
|
|
310
310
|
|
|
311
311
|
Returns
|
|
312
312
|
-------
|
|
313
|
-
summaries : `~collections.abc.Mapping` [`Any`,
|
|
313
|
+
summaries : `~collections.abc.Mapping` [`typing.Any`, \
|
|
314
|
+
`CollectionSummary`]
|
|
314
315
|
Collection summaries indexed by collection record key. This mapping
|
|
315
316
|
will also contain all nested non-chained collections of the chained
|
|
316
317
|
collections.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# This file is part of daf_butler.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from collections import defaultdict
|
|
31
|
+
from collections.abc import Iterable
|
|
32
|
+
|
|
33
|
+
from ..dimensions import (
|
|
34
|
+
DataCoordinate,
|
|
35
|
+
DimensionDataAttacher,
|
|
36
|
+
DimensionGroup,
|
|
37
|
+
DimensionUniverse,
|
|
38
|
+
)
|
|
39
|
+
from ..dimensions.record_cache import DimensionRecordCache
|
|
40
|
+
from ..queries import QueryFactoryFunction
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def expand_data_ids(
|
|
44
|
+
data_ids: Iterable[DataCoordinate],
|
|
45
|
+
universe: DimensionUniverse,
|
|
46
|
+
query_func: QueryFactoryFunction,
|
|
47
|
+
cache: DimensionRecordCache | None,
|
|
48
|
+
) -> list[DataCoordinate]:
|
|
49
|
+
"""Expand the given data IDs to look up implied dimension values and attach
|
|
50
|
+
dimension records.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
data_ids : `~collections.abc.Iterable` [ `DataCoordinate` ]
|
|
55
|
+
Data coordinates to be expanded.
|
|
56
|
+
universe : `DimensionUniverse`
|
|
57
|
+
Dimension universe associated with the given ``data_ids`` values.
|
|
58
|
+
query_func : QueryFactoryFunction
|
|
59
|
+
Function used to set up a Butler query context for looking up required
|
|
60
|
+
information from the database.
|
|
61
|
+
cache : `DimensionRecordCache` | None
|
|
62
|
+
Cache containing already-known dimension records. May be `None` if a
|
|
63
|
+
cache is not available.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
expanded : `list` [ `DataCoordinate` ]
|
|
68
|
+
List of `DataCoordinate` instances in the same order as the input
|
|
69
|
+
values. It is guaranteed that each `DataCoordinate` has
|
|
70
|
+
``hasRecords()=True`` and ``hasFull()=True``.
|
|
71
|
+
"""
|
|
72
|
+
output = list(data_ids)
|
|
73
|
+
|
|
74
|
+
grouped_by_dimensions: defaultdict[DimensionGroup, list[int]] = defaultdict(list)
|
|
75
|
+
for i, data_id in enumerate(data_ids):
|
|
76
|
+
if not data_id.hasRecords():
|
|
77
|
+
grouped_by_dimensions[data_id.dimensions].append(i)
|
|
78
|
+
|
|
79
|
+
if not grouped_by_dimensions:
|
|
80
|
+
# All given DataCoordinate values are already expanded.
|
|
81
|
+
return output
|
|
82
|
+
|
|
83
|
+
attacher = DimensionDataAttacher(
|
|
84
|
+
cache=cache,
|
|
85
|
+
dimensions=DimensionGroup.union(*grouped_by_dimensions.keys(), universe=universe),
|
|
86
|
+
)
|
|
87
|
+
for dimensions, indexes in grouped_by_dimensions.items():
|
|
88
|
+
with query_func() as query:
|
|
89
|
+
expanded = attacher.attach(dimensions, (output[index] for index in indexes), query)
|
|
90
|
+
for index, data_id in zip(indexes, expanded):
|
|
91
|
+
output[index] = data_id
|
|
92
|
+
|
|
93
|
+
return output
|
|
@@ -1562,7 +1562,12 @@ class Database(ABC):
|
|
|
1562
1562
|
return None
|
|
1563
1563
|
else:
|
|
1564
1564
|
sql = table.insert()
|
|
1565
|
-
|
|
1565
|
+
ids = []
|
|
1566
|
+
for row in rows:
|
|
1567
|
+
key = connection.execute(sql, row).inserted_primary_key
|
|
1568
|
+
assert key is not None
|
|
1569
|
+
ids.append(key[0])
|
|
1570
|
+
return ids
|
|
1566
1571
|
|
|
1567
1572
|
@abstractmethod
|
|
1568
1573
|
def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
|
|
@@ -378,7 +378,8 @@ class DatasetRecordStorageManager(VersionedExtension):
|
|
|
378
378
|
|
|
379
379
|
Returns
|
|
380
380
|
-------
|
|
381
|
-
summaries : `~collections.abc.Mapping` [`Any`,
|
|
381
|
+
summaries : `~collections.abc.Mapping` [`typing.Any`, \
|
|
382
|
+
`CollectionSummary`]
|
|
382
383
|
Collection summaries indexed by collection record key. This mapping
|
|
383
384
|
will also contain all nested non-chained collections of the chained
|
|
384
385
|
collections.
|
|
@@ -115,7 +115,7 @@ class ObsCoreTableManager(VersionedExtension):
|
|
|
115
115
|
implemented with this manager.
|
|
116
116
|
universe : `DimensionUniverse`
|
|
117
117
|
All dimensions known to the registry.
|
|
118
|
-
config : `dict` [ `str`, `Any` ]
|
|
118
|
+
config : `dict` [ `str`, `typing.Any` ]
|
|
119
119
|
Configuration of the obscore manager.
|
|
120
120
|
datasets : `type`
|
|
121
121
|
Type of dataset manager.
|
|
@@ -256,7 +256,7 @@ class RecordFactory:
|
|
|
256
256
|
|
|
257
257
|
Returns
|
|
258
258
|
-------
|
|
259
|
-
record : `dict` [ `str`, `Any` ] or `None`
|
|
259
|
+
record : `dict` [ `str`, `typing.Any` ] or `None`
|
|
260
260
|
ObsCore record represented as a dictionary. `None` is returned if
|
|
261
261
|
dataset does not need to be stored in the obscore table, e.g. when
|
|
262
262
|
dataset type is not in obscore configuration.
|
|
@@ -72,7 +72,7 @@ class SpatialObsCorePlugin(ABC):
|
|
|
72
72
|
name : `str`
|
|
73
73
|
Arbitrary name given to this plugin (usually key in
|
|
74
74
|
configuration).
|
|
75
|
-
config : `dict` [ `str`, `Any` ]
|
|
75
|
+
config : `dict` [ `str`, `typing.Any` ]
|
|
76
76
|
Plugin configuration dictionary.
|
|
77
77
|
db : `Database`, optional
|
|
78
78
|
Interface to the underlying database engine and namespace. In some
|
|
@@ -120,7 +120,7 @@ class SpatialObsCorePlugin(ABC):
|
|
|
120
120
|
|
|
121
121
|
Returns
|
|
122
122
|
-------
|
|
123
|
-
record : `dict` [ `str`, `Any` ] or `None`
|
|
123
|
+
record : `dict` [ `str`, `typing.Any` ] or `None`
|
|
124
124
|
Data to store in the main obscore table with column values
|
|
125
125
|
corresponding to a region or `None` if there is nothing to store.
|
|
126
126
|
|