lsst-daf-butler 30.2025.5000__py3-none-any.whl → 30.2025.5100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,7 +49,7 @@ if TYPE_CHECKING:
49
49
  # Pydantic requires the possible value types to be explicitly enumerated in
50
50
  # order for `uuid.UUID` in particular to work. `typing.Any` does not work
51
51
  # here.
52
- _Record: TypeAlias = dict[str, int | str | uuid.UUID | None]
52
+ _Record: TypeAlias = dict[str, int | str | None]
53
53
 
54
54
 
55
55
  class SerializedDatastoreRecordData(pydantic.BaseModel):
@@ -3166,6 +3166,20 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
3166
3166
 
3167
3167
  def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]:
3168
3168
  # Docstring inherited from the base class.
3169
+
3170
+ # This call to 'bridge.check' filters out "partially deleted" datasets.
3171
+ # Specifically, ones in the unusual edge state that:
3172
+ # 1. They have an entry in the registry dataset tables
3173
+ # 2. They were "trashed" from the datastore, so they are not
3174
+ # present in the "dataset_location" table.)
3175
+ # 3. But the trash has not been "emptied", so there are still entries
3176
+ # in the "opaque" datastore records table.
3177
+ #
3178
+ # As far as I can tell, this can only occur in the case of a concurrent
3179
+ # or aborted call to `Butler.pruneDatasets(unstore=True, purge=False)`.
3180
+ # Datasets (with or without files existing on disk) can persist in
3181
+ # this zombie state indefinitely, until someone manually empties
3182
+ # the trash.
3169
3183
  exported_refs = list(self._bridge.check(refs))
3170
3184
  ids = {ref.id for ref in exported_refs}
3171
3185
  records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {id: {} for id in ids}
@@ -215,20 +215,24 @@ class MonolithicDatastoreRegistryBridge(DatastoreRegistryBridge):
215
215
  def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
216
216
  # Docstring inherited from DatastoreRegistryBridge
217
217
  byId = {ref.id: ref for ref in refs}
218
- sql = (
219
- sqlalchemy.sql.select(self._tables.dataset_location.columns.dataset_id)
220
- .select_from(self._tables.dataset_location)
221
- .where(
222
- sqlalchemy.sql.and_(
223
- self._tables.dataset_location.columns.datastore_name == self.datastoreName,
224
- self._tables.dataset_location.columns.dataset_id.in_(byId.keys()),
218
+ found: list[DatasetIdRef] = []
219
+ with self._db.session():
220
+ for batch in chunk_iterable(byId.keys(), 50000):
221
+ sql = (
222
+ sqlalchemy.sql.select(self._tables.dataset_location.columns.dataset_id)
223
+ .select_from(self._tables.dataset_location)
224
+ .where(
225
+ sqlalchemy.sql.and_(
226
+ self._tables.dataset_location.columns.datastore_name == self.datastoreName,
227
+ self._tables.dataset_location.columns.dataset_id.in_(batch),
228
+ )
229
+ )
225
230
  )
226
- )
227
- )
228
- with self._db.query(sql) as sql_result:
229
- sql_rows = sql_result.fetchall()
230
- for row in sql_rows:
231
- yield byId[row.dataset_id]
231
+ with self._db.query(sql) as sql_result:
232
+ sql_ids = sql_result.scalars().all()
233
+ found.extend(byId[id] for id in sql_ids)
234
+
235
+ return found
232
236
 
233
237
  @contextmanager
234
238
  def emptyTrash(
@@ -12,6 +12,8 @@ from typing import TYPE_CHECKING, Any, ClassVar
12
12
  import astropy.time
13
13
  import sqlalchemy
14
14
 
15
+ from lsst.utils.iteration import chunk_iterable
16
+
15
17
  from .... import ddl
16
18
  from ...._collection_type import CollectionType
17
19
  from ...._dataset_ref import DatasetId, DatasetIdFactory, DatasetIdGenEnum, DatasetRef
@@ -424,17 +426,18 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
424
426
  return result
425
427
 
426
428
  def get_dataset_refs(self, ids: list[DatasetId]) -> list[DatasetRef]:
427
- # Look up the dataset types corresponding to the given Dataset IDs.
428
- id_col = self._static.dataset.columns["id"]
429
- sql = sqlalchemy.sql.select(
430
- id_col,
431
- self._static.dataset.columns["dataset_type_id"],
432
- ).where(id_col.in_(ids))
433
- with self._db.query(sql) as sql_result:
434
- dataset_rows = sql_result.mappings().all()
435
- dataset_type_map: dict[DatasetId, DatasetType] = {
436
- row["id"]: self._get_dataset_type_by_id(row["dataset_type_id"]) for row in dataset_rows
437
- }
429
+ dataset_type_map: dict[DatasetId, DatasetType] = {}
430
+ for batch in chunk_iterable(set(ids), 50000):
431
+ # Look up the dataset types corresponding to the given Dataset IDs.
432
+ id_col = self._static.dataset.columns["id"]
433
+ sql = sqlalchemy.sql.select(
434
+ id_col,
435
+ self._static.dataset.columns["dataset_type_id"],
436
+ ).where(id_col.in_(batch))
437
+ with self._db.query(sql) as sql_result:
438
+ dataset_rows = sql_result.mappings().all()
439
+ for row in dataset_rows:
440
+ dataset_type_map[row["id"]] = self._get_dataset_type_by_id(row["dataset_type_id"])
438
441
 
439
442
  # Group the given dataset IDs by the DimensionGroup of their dataset
440
443
  # types -- there is a separate tags table for each DimensionGroup.
@@ -448,40 +451,41 @@ class ByDimensionsDatasetRecordStorageManagerUUID(DatasetRecordStorageManager):
448
451
  # data IDs corresponding to the UUIDs found from the dataset table.
449
452
  dynamic_tables = self._get_dynamic_tables(dimension_group)
450
453
  tags_table = self._get_tags_table(dynamic_tables)
451
- tags_sql = tags_table.select().where(tags_table.columns["dataset_id"].in_(datasets))
452
- # Join in the collection table to fetch the run name.
453
- collection_column = tags_table.columns[self._collections.getCollectionForeignKeyName()]
454
- joined_collections = self._collections.join_collections_sql(collection_column, tags_sql)
455
- tags_sql = joined_collections.joined_sql
456
- run_name_column = joined_collections.name_column
457
- tags_sql = tags_sql.add_columns(run_name_column)
458
- # Tags table includes run collections and tagged
459
- # collections.
460
- # In theory the data ID for a given dataset should be the
461
- # same in both, but nothing actually guarantees this.
462
- # So skip any tagged collections, using the run collection
463
- # as the definitive definition.
464
- tags_sql = tags_sql.where(joined_collections.type_column == int(CollectionType.RUN))
465
-
466
- with self._db.query(tags_sql) as sql_result:
467
- data_id_rows = sql_result.mappings().all()
468
-
469
- assert run_name_column.key is not None
470
- for data_id_row in data_id_rows:
471
- id = data_id_row["dataset_id"]
472
- dataset_type = dataset_type_map[id]
473
- run_name = data_id_row[run_name_column.key]
474
- data_id = DataCoordinate.from_required_values(
475
- dimension_group,
476
- tuple(data_id_row[dimension] for dimension in dimension_group.required),
477
- )
478
- ref = DatasetRef(
479
- datasetType=dataset_type,
480
- dataId=data_id,
481
- id=id,
482
- run=run_name,
483
- )
484
- output_refs.append(ref)
454
+ for batch in chunk_iterable(datasets, 50000):
455
+ tags_sql = tags_table.select().where(tags_table.columns["dataset_id"].in_(batch))
456
+ # Join in the collection table to fetch the run name.
457
+ collection_column = tags_table.columns[self._collections.getCollectionForeignKeyName()]
458
+ joined_collections = self._collections.join_collections_sql(collection_column, tags_sql)
459
+ tags_sql = joined_collections.joined_sql
460
+ run_name_column = joined_collections.name_column
461
+ tags_sql = tags_sql.add_columns(run_name_column)
462
+ # Tags table includes run collections and tagged
463
+ # collections.
464
+ # In theory the data ID for a given dataset should be the
465
+ # same in both, but nothing actually guarantees this.
466
+ # So skip any tagged collections, using the run collection
467
+ # as the definitive definition.
468
+ tags_sql = tags_sql.where(joined_collections.type_column == int(CollectionType.RUN))
469
+
470
+ with self._db.query(tags_sql) as sql_result:
471
+ data_id_rows = sql_result.mappings().all()
472
+
473
+ assert run_name_column.key is not None
474
+ for data_id_row in data_id_rows:
475
+ id = data_id_row["dataset_id"]
476
+ dataset_type = dataset_type_map[id]
477
+ run_name = data_id_row[run_name_column.key]
478
+ data_id = DataCoordinate.from_required_values(
479
+ dimension_group,
480
+ tuple(data_id_row[dimension] for dimension in dimension_group.required),
481
+ )
482
+ ref = DatasetRef(
483
+ datasetType=dataset_type,
484
+ dataId=data_id,
485
+ id=id,
486
+ run=run_name,
487
+ )
488
+ output_refs.append(ref)
485
489
 
486
490
  return output_refs
487
491
 
@@ -1562,7 +1562,12 @@ class Database(ABC):
1562
1562
  return None
1563
1563
  else:
1564
1564
  sql = table.insert()
1565
- return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1565
+ ids = []
1566
+ for row in rows:
1567
+ key = connection.execute(sql, row).inserted_primary_key
1568
+ assert key is not None
1569
+ ids.append(key[0])
1570
+ return ids
1566
1571
 
1567
1572
  @abstractmethod
1568
1573
  def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
@@ -1,2 +1,2 @@
1
1
  __all__ = ["__version__"]
2
- __version__ = "30.2025.5000"
2
+ __version__ = "30.2025.5100"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-daf-butler
3
- Version: 30.2025.5000
3
+ Version: 30.2025.5100
4
4
  Summary: An abstraction layer for reading and writing astronomical data to datastores.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License-Expression: BSD-3-Clause OR GPL-3.0-or-later
@@ -51,7 +51,7 @@ lsst/daf/butler/repo_relocation.py,sha256=Ivhx2xU4slc53Z6RExhNnquMr2Hx-S8h62emml
51
51
  lsst/daf/butler/time_utils.py,sha256=MVTfOFI2xt3IeA46pa-fWY2kJRwSzaQyq1uzeUABcTM,11805
52
52
  lsst/daf/butler/timespan_database_representation.py,sha256=rYeQ_vp6gneRjboqV-gvNW0DWhm1QJM-KnVzFTDVZ0I,24550
53
53
  lsst/daf/butler/utils.py,sha256=5u50COK5z4u31grOhmQF7mFz55biNLOvSMRdQjEdsjo,5140
54
- lsst/daf/butler/version.py,sha256=arbNW1FaLmuy0rG2fl72Eu7RoD_LnOedd06IvO-wmLM,55
54
+ lsst/daf/butler/version.py,sha256=T_ii-AmyfLEzX_XiWTNw5GEFMy94NcXrbg2rkzKZg7g,55
55
55
  lsst/daf/butler/_rubin/__init__.py,sha256=9z5kmc6LJ3C_iPFV46cvdlQ2qOGJbZh-2Ft5Z-rbE28,1569
56
56
  lsst/daf/butler/_rubin/file_datasets.py,sha256=P5_BIhxpVj9qfLuLiI2_dClMHsjO5Qm5oDXVr3WntNU,3607
57
57
  lsst/daf/butler/_utilities/__init__.py,sha256=vLzPZYAJ-9r1cnqsP64MVpFgSw2166yOpq0iPMSdAvw,1298
@@ -96,11 +96,11 @@ lsst/daf/butler/datastore/composites.py,sha256=NZ7rBK5yH-hrtpqZxC8d49UwwQqWZlEmm
96
96
  lsst/daf/butler/datastore/constraints.py,sha256=OcUXuXZq1UBnuQqq8U7Hp3Ezqu0RBN8pIo93BEq7lyI,5921
97
97
  lsst/daf/butler/datastore/file_templates.py,sha256=xnb4ZheW6NqeCE__vkIvLF91d57nhfcX3ynGWI0rX_0,35095
98
98
  lsst/daf/butler/datastore/generic_base.py,sha256=C15FN1fDVxs-XjeDc1hw5un3MMMVIaZN4QdFbjqQ168,5176
99
- lsst/daf/butler/datastore/record_data.py,sha256=dhku1kCG0WCeYeWOq9m0Euq02pa96XDF5W9nD_ZEe8k,10428
99
+ lsst/daf/butler/datastore/record_data.py,sha256=kY-fqXrhdffpIHgczgBDvGYb4VnDoFtIeNfqUpteR1w,10416
100
100
  lsst/daf/butler/datastore/stored_file_info.py,sha256=s_9LsLZgIF7TwjiXEXgNPz3OaaRiUyzmf5SOdJ1elhk,15838
101
101
  lsst/daf/butler/datastores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
102
  lsst/daf/butler/datastores/chainedDatastore.py,sha256=3DUKoGLVbmqyqm2K4-De3GKswE4WALFp0P9ul6OF-Qk,56220
103
- lsst/daf/butler/datastores/fileDatastore.py,sha256=jEfKlhKRSgR5jzmGu7VWFQXLOHsLt566GufLFfQ32yE,136081
103
+ lsst/daf/butler/datastores/fileDatastore.py,sha256=AN4IKz5hcz2jcfKQUhEnT4DxiPDs1nf5_WF5P_t-u68,136876
104
104
  lsst/daf/butler/datastores/inMemoryDatastore.py,sha256=ZLgJuPnKJPsnaZQ_8rTZn02YHl2MxsQ_kcxCXhcc5dk,30213
105
105
  lsst/daf/butler/datastores/file_datastore/__init__.py,sha256=vLzPZYAJ-9r1cnqsP64MVpFgSw2166yOpq0iPMSdAvw,1298
106
106
  lsst/daf/butler/datastores/file_datastore/get.py,sha256=w-5tuXxdthsSiOlI7MBFrFO6zWM_f1Uqqoy8VGAdvcQ,16964
@@ -203,7 +203,7 @@ lsst/daf/butler/registry/versions.py,sha256=egvrctt_1wBzZgh8iSfySaQJQ9bkx_9bUJWk
203
203
  lsst/daf/butler/registry/wildcards.py,sha256=akMGgqDkVM0mQ9RAFENv0IrnoUyMP3mhODYXDaWIQ8o,20277
204
204
  lsst/daf/butler/registry/bridge/__init__.py,sha256=vLzPZYAJ-9r1cnqsP64MVpFgSw2166yOpq0iPMSdAvw,1298
205
205
  lsst/daf/butler/registry/bridge/ephemeral.py,sha256=QSKMRwQTAHnMOwdigH5tSNrQaM2dK-IXymb7x87bY-w,5845
206
- lsst/daf/butler/registry/bridge/monolithic.py,sha256=JvVhMH0K9lfzOTaVe9k34JaTAXAHKLP8e2XvRANWEZg,18148
206
+ lsst/daf/butler/registry/bridge/monolithic.py,sha256=NLMLPh9ORChrtHT0be8PtmIlnnDQlmeAsM2MPmTeZZM,18386
207
207
  lsst/daf/butler/registry/collections/__init__.py,sha256=vLzPZYAJ-9r1cnqsP64MVpFgSw2166yOpq0iPMSdAvw,1298
208
208
  lsst/daf/butler/registry/collections/_base.py,sha256=MxrvTh81aHUULXtf5hHzK6iAjwkes5NckouHMhTFdPM,37480
209
209
  lsst/daf/butler/registry/collections/nameKey.py,sha256=UZCwiY0hOyB1NXA_1ZzjD4tuKnQ_jvrMmPQlUN6tWuk,12890
@@ -214,7 +214,7 @@ lsst/daf/butler/registry/databases/sqlite.py,sha256=xW82mdbOoOOTBeuSYDQ9DiDPhnMG
214
214
  lsst/daf/butler/registry/datasets/__init__.py,sha256=vLzPZYAJ-9r1cnqsP64MVpFgSw2166yOpq0iPMSdAvw,1298
215
215
  lsst/daf/butler/registry/datasets/byDimensions/__init__.py,sha256=BG4C7mhKFbCzvfQSI31CIV_iTMc1gYL_LT4Plyu6LdE,1323
216
216
  lsst/daf/butler/registry/datasets/byDimensions/_dataset_type_cache.py,sha256=VmpKsw7PfHTu54cChAUH8qcOC2jizBLNJX_SE_DQ1yA,8315
217
- lsst/daf/butler/registry/datasets/byDimensions/_manager.py,sha256=f7DWQ_77tgOti6C_0wq-v1orf4YCW_mhmzhi_FMcbq0,71373
217
+ lsst/daf/butler/registry/datasets/byDimensions/_manager.py,sha256=DeJhLIc7hqw19j_HYh-boQ8jow3NMurbhUegrQ8GO8s,71720
218
218
  lsst/daf/butler/registry/datasets/byDimensions/summaries.py,sha256=MuRk2p6fAKhvjId3jWnuFzhqMnKNF31ugExD7a2g48k,18534
219
219
  lsst/daf/butler/registry/datasets/byDimensions/tables.py,sha256=9MXcpRGuXQonEMmh-zQOUcYJVA9pvUuNXyZRZtP-KH8,25768
220
220
  lsst/daf/butler/registry/dimensions/__init__.py,sha256=vLzPZYAJ-9r1cnqsP64MVpFgSw2166yOpq0iPMSdAvw,1298
@@ -223,7 +223,7 @@ lsst/daf/butler/registry/interfaces/__init__.py,sha256=IBMBBb1gyAx3o9uTufhQHtMrh
223
223
  lsst/daf/butler/registry/interfaces/_attributes.py,sha256=z-njEpWLhmKU4S0KOCplrY4QeBGoKUhlPRtSdNS_4uw,7258
224
224
  lsst/daf/butler/registry/interfaces/_bridge.py,sha256=pds0AhEXZVO3YguU-tkZ8J_NZrBj55HCGkHpBTXbBFQ,15924
225
225
  lsst/daf/butler/registry/interfaces/_collections.py,sha256=q0xRy7gA4KacPvJb1Wtk-uDSHrjmv4xpD2qGmzpvdKM,28485
226
- lsst/daf/butler/registry/interfaces/_database.py,sha256=ibsaPp40WhutsBx9fyf3eioouvySfcX_awT0UucgL7A,84557
226
+ lsst/daf/butler/registry/interfaces/_database.py,sha256=YmbGySh0qXf8HJe_FQd69DgBMdx0HuytbhnPVIpYCJw,84706
227
227
  lsst/daf/butler/registry/interfaces/_database_explain.py,sha256=CkALWwNeyrjRvKizWrxvcGDunIhB77kLtEuXscrXVOY,3052
228
228
  lsst/daf/butler/registry/interfaces/_datasets.py,sha256=_vvEStau34AdZ45NdnrrgBh_9uSHmth7amnH538gj7A,24975
229
229
  lsst/daf/butler/registry/interfaces/_dimensions.py,sha256=peUPhmQEdwvgZcSsEsR80OZBcKEyjBRvmAbEkvMKlKY,14504
@@ -337,13 +337,13 @@ lsst/daf/butler/transfers/__init__.py,sha256=M1YcFszSkNB5hB2pZwwGXqbJE2dKt4YXDin
337
337
  lsst/daf/butler/transfers/_context.py,sha256=Ro_nf9NDw9IAr-Pw_NtcdotQKx34RbBbNubt20zwRXU,16449
338
338
  lsst/daf/butler/transfers/_interfaces.py,sha256=Ia1NqcFR5E-Ik4zsXEe2fuMtNCJj5Yfe_gVHLTBtJDw,7490
339
339
  lsst/daf/butler/transfers/_yaml.py,sha256=w_0GmrueuHVLfOfAXGHFBbWAl18tX6eSElbTC-2jRoc,32632
340
- lsst_daf_butler-30.2025.5000.dist-info/licenses/COPYRIGHT,sha256=k1Vq0-Be_K-puaeW4UZnckPjksEL-MJh4XKiWcjMxJE,312
341
- lsst_daf_butler-30.2025.5000.dist-info/licenses/LICENSE,sha256=pRExkS03v0MQW-neNfIcaSL6aiAnoLxYgtZoFzQ6zkM,232
342
- lsst_daf_butler-30.2025.5000.dist-info/licenses/bsd_license.txt,sha256=7MIcv8QRX9guUtqPSBDMPz2SnZ5swI-xZMqm_VDSfxY,1606
343
- lsst_daf_butler-30.2025.5000.dist-info/licenses/gpl-v3.0.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
344
- lsst_daf_butler-30.2025.5000.dist-info/METADATA,sha256=LU-VqW_BssqqK4on8WCqIuI9N6QrcBI-1F3QE4O8pxU,3813
345
- lsst_daf_butler-30.2025.5000.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
346
- lsst_daf_butler-30.2025.5000.dist-info/entry_points.txt,sha256=XsRxyTK3c-jGlKVuVnbpch3gtaO0lAA_fS3i2NGS5rw,59
347
- lsst_daf_butler-30.2025.5000.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
348
- lsst_daf_butler-30.2025.5000.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
349
- lsst_daf_butler-30.2025.5000.dist-info/RECORD,,
340
+ lsst_daf_butler-30.2025.5100.dist-info/licenses/COPYRIGHT,sha256=k1Vq0-Be_K-puaeW4UZnckPjksEL-MJh4XKiWcjMxJE,312
341
+ lsst_daf_butler-30.2025.5100.dist-info/licenses/LICENSE,sha256=pRExkS03v0MQW-neNfIcaSL6aiAnoLxYgtZoFzQ6zkM,232
342
+ lsst_daf_butler-30.2025.5100.dist-info/licenses/bsd_license.txt,sha256=7MIcv8QRX9guUtqPSBDMPz2SnZ5swI-xZMqm_VDSfxY,1606
343
+ lsst_daf_butler-30.2025.5100.dist-info/licenses/gpl-v3.0.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
344
+ lsst_daf_butler-30.2025.5100.dist-info/METADATA,sha256=9foKLN5p5TlcxoI2l8Pa85qZ-ayz5WpArFk_0V9VHWI,3813
345
+ lsst_daf_butler-30.2025.5100.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
346
+ lsst_daf_butler-30.2025.5100.dist-info/entry_points.txt,sha256=XsRxyTK3c-jGlKVuVnbpch3gtaO0lAA_fS3i2NGS5rw,59
347
+ lsst_daf_butler-30.2025.5100.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
348
+ lsst_daf_butler-30.2025.5100.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
349
+ lsst_daf_butler-30.2025.5100.dist-info/RECORD,,