lsst-daf-butler 30.0.0rc3__py3-none-any.whl → 30.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. lsst/daf/butler/_butler.py +19 -3
  2. lsst/daf/butler/_butler_collections.py +4 -4
  3. lsst/daf/butler/_butler_metrics.py +2 -0
  4. lsst/daf/butler/_dataset_provenance.py +1 -1
  5. lsst/daf/butler/_dataset_ref.py +1 -1
  6. lsst/daf/butler/_exceptions.py +2 -2
  7. lsst/daf/butler/_file_dataset.py +2 -1
  8. lsst/daf/butler/_formatter.py +12 -0
  9. lsst/daf/butler/_query_all_datasets.py +2 -0
  10. lsst/daf/butler/cli/cmd/_remove_runs.py +1 -12
  11. lsst/daf/butler/column_spec.py +4 -4
  12. lsst/daf/butler/datastore/_datastore.py +21 -1
  13. lsst/daf/butler/datastore/stored_file_info.py +2 -2
  14. lsst/daf/butler/datastores/chainedDatastore.py +4 -0
  15. lsst/daf/butler/datastores/fileDatastore.py +11 -1
  16. lsst/daf/butler/datastores/file_datastore/get.py +4 -4
  17. lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py +5 -1
  18. lsst/daf/butler/datastores/file_datastore/transfer.py +2 -2
  19. lsst/daf/butler/datastores/inMemoryDatastore.py +8 -0
  20. lsst/daf/butler/ddl.py +2 -2
  21. lsst/daf/butler/dimensions/_coordinate.py +6 -8
  22. lsst/daf/butler/dimensions/_record_set.py +1 -1
  23. lsst/daf/butler/dimensions/_records.py +9 -3
  24. lsst/daf/butler/direct_butler/_direct_butler.py +40 -23
  25. lsst/daf/butler/direct_query_driver/_driver.py +5 -4
  26. lsst/daf/butler/direct_query_driver/_result_page_converter.py +1 -1
  27. lsst/daf/butler/formatters/parquet.py +6 -6
  28. lsst/daf/butler/nonempty_mapping.py +1 -1
  29. lsst/daf/butler/persistence_context.py +8 -5
  30. lsst/daf/butler/queries/_general_query_results.py +1 -1
  31. lsst/daf/butler/queries/driver.py +1 -1
  32. lsst/daf/butler/queries/expression_factory.py +2 -2
  33. lsst/daf/butler/queries/expressions/parser/exprTree.py +1 -1
  34. lsst/daf/butler/queries/expressions/parser/parserYacc.py +1 -1
  35. lsst/daf/butler/queries/overlaps.py +2 -2
  36. lsst/daf/butler/queries/tree/_column_set.py +1 -1
  37. lsst/daf/butler/registry/_collection_record_cache.py +1 -1
  38. lsst/daf/butler/registry/_collection_summary_cache.py +5 -4
  39. lsst/daf/butler/registry/_registry.py +4 -0
  40. lsst/daf/butler/registry/databases/postgresql.py +2 -1
  41. lsst/daf/butler/registry/datasets/byDimensions/_dataset_type_cache.py +1 -1
  42. lsst/daf/butler/registry/datasets/byDimensions/_manager.py +4 -2
  43. lsst/daf/butler/registry/datasets/byDimensions/summaries.py +3 -2
  44. lsst/daf/butler/registry/interfaces/_datasets.py +2 -1
  45. lsst/daf/butler/registry/interfaces/_obscore.py +1 -1
  46. lsst/daf/butler/registry/obscore/_records.py +1 -1
  47. lsst/daf/butler/registry/obscore/_spatial.py +2 -2
  48. lsst/daf/butler/registry/queries/_results.py +2 -2
  49. lsst/daf/butler/registry/sql_registry.py +1 -1
  50. lsst/daf/butler/registry/wildcards.py +5 -5
  51. lsst/daf/butler/remote_butler/_get.py +1 -1
  52. lsst/daf/butler/remote_butler/_remote_butler.py +1 -0
  53. lsst/daf/butler/remote_butler/_remote_file_transfer_source.py +4 -0
  54. lsst/daf/butler/remote_butler/authentication/cadc.py +4 -3
  55. lsst/daf/butler/script/_pruneDatasets.py +4 -2
  56. lsst/daf/butler/script/configValidate.py +2 -2
  57. lsst/daf/butler/script/queryCollections.py +2 -2
  58. lsst/daf/butler/script/removeCollections.py +2 -0
  59. lsst/daf/butler/script/removeRuns.py +2 -0
  60. lsst/daf/butler/tests/cliCmdTestBase.py +2 -0
  61. lsst/daf/butler/tests/cliLogTestBase.py +2 -0
  62. lsst/daf/butler/tests/hybrid_butler.py +6 -1
  63. lsst/daf/butler/tests/registry_data/spatial.py +4 -2
  64. lsst/daf/butler/tests/utils.py +1 -1
  65. lsst/daf/butler/timespan_database_representation.py +3 -3
  66. lsst/daf/butler/version.py +1 -1
  67. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/METADATA +3 -2
  68. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/RECORD +76 -76
  69. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/WHEEL +1 -1
  70. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/entry_points.txt +0 -0
  71. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
  72. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/LICENSE +0 -0
  73. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
  74. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
  75. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/top_level.txt +0 -0
  76. {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/zip-safe +0 -0
@@ -138,7 +138,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
138
138
  without_datastore : `bool`, optional
139
139
  If `True` do not attach a datastore to this butler. Any attempts
140
140
  to use a datastore will fail.
141
- **kwargs : `Any`
141
+ metrics : `ButlerMetrics` or `None`
142
+ External metrics object to be used for tracking butler usage. If `None`
143
+ a new metrics object is created.
144
+ **kwargs : `typing.Any`
142
145
  Additional keyword arguments passed to a constructor of actual butler
143
146
  class.
144
147
 
@@ -240,7 +243,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
240
243
  to use a datastore will fail.
241
244
  metrics : `ButlerMetrics` or `None`, optional
242
245
  Metrics object to record butler usage statistics.
243
- **kwargs : `Any`
246
+ **kwargs : `typing.Any`
244
247
  Default data ID key-value pairs. These may only identify
245
248
  "governor" dimensions like ``instrument`` and ``skymap``.
246
249
 
@@ -1390,6 +1393,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1390
1393
  raised if any datasets with the same dataset ID already exist
1391
1394
  in the datastore.
1392
1395
 
1396
+ Returns
1397
+ -------
1398
+ None
1399
+
1393
1400
  Raises
1394
1401
  ------
1395
1402
  TypeError
@@ -1429,6 +1436,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1429
1436
  *,
1430
1437
  transfer_dimensions: bool = False,
1431
1438
  dry_run: bool = False,
1439
+ skip_existing: bool = False,
1432
1440
  ) -> None:
1433
1441
  """Ingest a Zip file into this butler.
1434
1442
 
@@ -1447,6 +1455,14 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1447
1455
  If `True` the ingest will be processed without any modifications
1448
1456
  made to the target butler and as if the target butler did not
1449
1457
  have any of the datasets.
1458
+ skip_existing : `bool`, optional
1459
+ If `True`, a zip will not be ingested if the dataset entries listed
1460
+ in the index with the same dataset ID already exists in the butler.
1461
+ If `False` (the default), a `ConflictingDefinitionError` will be
1462
+ raised if any datasets with the same dataset ID already exist
1463
+ in the repository. If, somehow, some datasets are known to the
1464
+ butler and some are not, this is currently treated as an error
1465
+ rather than attempting to do a partial ingest.
1450
1466
 
1451
1467
  Notes
1452
1468
  -----
@@ -2024,7 +2040,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2024
2040
 
2025
2041
  Returns
2026
2042
  -------
2027
- records : `list`[`DimensionRecord`]
2043
+ records : `list` [`DimensionRecord`]
2028
2044
  Dimension records matching the given query parameters.
2029
2045
 
2030
2046
  Raises
@@ -360,10 +360,10 @@ class ButlerCollections(ABC, Sequence):
360
360
  name : `str`
361
361
  The name of the collection of interest.
362
362
  include_parents : `bool`, optional
363
- If `True` any parents of this collection will be included.
363
+ If `True` any parents of this collection will be included.
364
364
  include_summary : `bool`, optional
365
- If `True` dataset type names and governor dimensions of datasets
366
- stored in this collection will be included in the result.
365
+ If `True` dataset type names and governor dimensions of datasets
366
+ stored in this collection will be included in the result.
367
367
 
368
368
  Returns
369
369
  -------
@@ -464,7 +464,7 @@ class ButlerCollections(ABC, Sequence):
464
464
 
465
465
  Returns
466
466
  -------
467
- filtered : `~collections.abc.Mapping` [`str`, `list`[`str`]]
467
+ filtered : `~collections.abc.Mapping` [`str`, `list` [`str`]]
468
468
  Mapping of the dataset type name to its corresponding list of
469
469
  collection names.
470
470
  """
@@ -27,6 +27,8 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
+ __all__ = ["ButlerMetrics"]
31
+
30
32
  from collections.abc import Callable, Iterator
31
33
  from contextlib import contextmanager
32
34
  from typing import Concatenate, ParamSpec
@@ -267,7 +267,7 @@ class DatasetProvenance(pydantic.BaseModel):
267
267
  use_upper : `bool` or `None`
268
268
  If `True` use upper case for provenance keys, if `False` use lower
269
269
  case, if `None` match the case of the prefix.
270
- keys : `tuple` of `str` | `int`
270
+ *keys : `tuple` of `str` | `int`
271
271
  Components of key to combine with prefix and separator.
272
272
 
273
273
  Returns
@@ -479,7 +479,7 @@ class DatasetRef:
479
479
 
480
480
  Parameters
481
481
  ----------
482
- simple : `dict` of [`str`, `Any`]
482
+ simple : `dict` of [`str`, `typing.Any`]
483
483
  The value returned by `to_simple()`.
484
484
  universe : `DimensionUniverse`
485
485
  The special graph of all known dimensions.
@@ -196,8 +196,8 @@ class ValidationError(RuntimeError):
196
196
 
197
197
 
198
198
  class EmptyQueryResultError(Exception):
199
- """Exception raised when query methods return an empty result and `explain`
200
- flag is set.
199
+ """Exception raised when query methods return an empty result and
200
+ ``explain`` flag is set.
201
201
 
202
202
  Parameters
203
203
  ----------
@@ -129,7 +129,8 @@ class FileDataset:
129
129
  ----------
130
130
  dataset : `SerializedFileDataset`
131
131
  Object to deserialize.
132
- dataset_type_loader : `Callable` [[ `str` ], `DatasetType` ]
132
+ dataset_type_loader : `~collections.abc.Callable` \
133
+ [[ `str` ], `DatasetType` ]
133
134
  Function that takes a string dataset type name as its
134
135
  only parameter, and returns an instance of `DatasetType`.
135
136
  Used to deserialize the `DatasetRef` instances contained
@@ -910,6 +910,10 @@ class FormatterV2:
910
910
  provenance : `DatasetProvenance` | `None`, optional
911
911
  Provenance to attach to the file being written.
912
912
 
913
+ Returns
914
+ -------
915
+ None
916
+
913
917
  Raises
914
918
  ------
915
919
  FormatterNotImplementedError
@@ -1137,6 +1141,10 @@ class FormatterV2:
1137
1141
  location : `Location`
1138
1142
  Location from which to extract a file extension.
1139
1143
 
1144
+ Returns
1145
+ -------
1146
+ None
1147
+
1140
1148
  Raises
1141
1149
  ------
1142
1150
  ValueError
@@ -1583,6 +1591,10 @@ class Formatter(metaclass=ABCMeta):
1583
1591
  location : `Location`
1584
1592
  Location from which to extract a file extension.
1585
1593
 
1594
+ Returns
1595
+ -------
1596
+ None
1597
+
1586
1598
  Raises
1587
1599
  ------
1588
1600
  NotImplementedError
@@ -151,6 +151,8 @@ def _filter_collections_and_dataset_types(
151
151
 
152
152
  Parameters
153
153
  ----------
154
+ butler
155
+ Butler repository to use.
154
156
  collections
155
157
  List of collection names or collection search globs.
156
158
  dataset_type_query
@@ -114,18 +114,7 @@ def remove_runs(context: click.Context, confirm: bool, force: bool, **kwargs: An
114
114
 
115
115
  This command can be used to remove RUN collections and the datasets within
116
116
  them.
117
-
118
- Parameters
119
- ----------
120
- context : `click.Context`
121
- Context provided by Click.
122
- confirm : `bool`
123
- Confirmation for removal of the run.
124
- force : `bool`
125
- Force removal.
126
- **kwargs : `dict` [`str`, `str`]
127
- The parameters to pass to `~lsst.daf.butler.script.removeRuns`.
128
- """
117
+ """ # numpydoc ignore=PR01
129
118
  result = script.removeRuns(**kwargs)
130
119
  canRemoveRuns = len(result.runs)
131
120
  if not canRemoveRuns:
@@ -109,12 +109,12 @@ class ColumnValueSerializer(ABC):
109
109
 
110
110
  Parameters
111
111
  ----------
112
- value : `Any`
112
+ value : `typing.Any`
113
113
  Column value to be serialized.
114
114
 
115
115
  Returns
116
116
  -------
117
- value : `Any`
117
+ value : `typing.Any`
118
118
  Column value in serializable format.
119
119
  """
120
120
  raise NotImplementedError
@@ -125,12 +125,12 @@ class ColumnValueSerializer(ABC):
125
125
 
126
126
  Parameters
127
127
  ----------
128
- value : `Any`
128
+ value : `typing.Any`
129
129
  Serialized column value.
130
130
 
131
131
  Returns
132
132
  -------
133
- value : `Any`
133
+ value : `typing.Any`
134
134
  Deserialized column value.
135
135
  """
136
136
  raise NotImplementedError
@@ -284,6 +284,14 @@ class DatasetRefURIs(abc.Sequence):
284
284
  def __repr__(self) -> str:
285
285
  return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
286
286
 
287
+ def iter_all(self) -> Iterator[ResourcePath]:
288
+ """Iterate over all URIs without regard to whether they are primary
289
+ or component.
290
+ """
291
+ if self.primaryURI is not None:
292
+ yield self.primaryURI
293
+ yield from self.componentURIs.values()
294
+
287
295
 
288
296
  class Datastore(FileTransferSource, metaclass=ABCMeta):
289
297
  """Datastore interface.
@@ -536,7 +544,7 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
536
544
 
537
545
  Returns
538
546
  -------
539
- exists : `dict`[`DatasetRef`, `bool`]
547
+ exists : `dict` [`DatasetRef`, `bool`]
540
548
  Mapping of dataset to boolean indicating whether the dataset
541
549
  is known to the datastore.
542
550
  """
@@ -825,6 +833,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
825
833
  in an external system or if the file is to be compressed in place.
826
834
  It is up to the datastore whether this parameter is relevant.
827
835
 
836
+ Returns
837
+ -------
838
+ None
839
+
828
840
  Raises
829
841
  ------
830
842
  NotImplementedError
@@ -1143,6 +1155,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
1143
1155
  Determine whether errors should be ignored. When multiple
1144
1156
  refs are being trashed there will be no per-ref check.
1145
1157
 
1158
+ Returns
1159
+ -------
1160
+ None
1161
+
1146
1162
  Raises
1147
1163
  ------
1148
1164
  FileNotFoundError
@@ -1278,6 +1294,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
1278
1294
  Entity to compare with configuration retrieved using the
1279
1295
  specified lookup key.
1280
1296
 
1297
+ Returns
1298
+ -------
1299
+ None
1300
+
1281
1301
  Raises
1282
1302
  ------
1283
1303
  DatastoreValidationError
@@ -423,8 +423,8 @@ def make_datastore_path_relative(path: str) -> str:
423
423
  path : `str`
424
424
  The file path from a `StoredFileInfo`.
425
425
 
426
- Return
427
- ------
426
+ Returns
427
+ -------
428
428
  normalized_path : `str`
429
429
  The original path, if it was relative. Otherwise, a version of it that
430
430
  was converted to a relative path, stripping URI scheme and netloc from
@@ -1077,6 +1077,10 @@ class ChainedDatastore(Datastore):
1077
1077
  If `True`, output a log message for every validation error
1078
1078
  detected.
1079
1079
 
1080
+ Returns
1081
+ -------
1082
+ None
1083
+
1080
1084
  Raises
1081
1085
  ------
1082
1086
  DatastoreValidationError
@@ -2152,7 +2152,13 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
2152
2152
 
2153
2153
  return artifact_map
2154
2154
 
2155
- def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None:
2155
+ def ingest_zip(
2156
+ self,
2157
+ zip_path: ResourcePath,
2158
+ transfer: str | None,
2159
+ *,
2160
+ dry_run: bool = False,
2161
+ ) -> None:
2156
2162
  """Ingest an indexed Zip file and contents.
2157
2163
 
2158
2164
  The Zip file must have an index file as created by `retrieveArtifacts`.
@@ -2976,6 +2982,10 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
2976
2982
  If `True`, output a log message for every validation error
2977
2983
  detected.
2978
2984
 
2985
+ Returns
2986
+ -------
2987
+ None
2988
+
2979
2989
  Raises
2980
2990
  ------
2981
2991
  DatastoreValidationError
@@ -97,12 +97,12 @@ def generate_datastore_get_information(
97
97
 
98
98
  Parameters
99
99
  ----------
100
- fileLocations : `list`[`DatasetLocationInformation`]
100
+ fileLocations : `list` [`DatasetLocationInformation`]
101
101
  List of file locations for this artifact and their associated datastore
102
102
  records.
103
103
  ref : `DatasetRef`
104
104
  The registry information associated with this artifact.
105
- parameters : `Mapping`[`str`, `Any`]
105
+ parameters : `~collections.abc.Mapping` [`str`, `typing.Any`]
106
106
  `StorageClass` and `Formatter` parameters.
107
107
  readStorageClass : `StorageClass` | `None`, optional
108
108
  The StorageClass to use when ultimately returning the resulting object
@@ -255,12 +255,12 @@ def get_dataset_as_python_object_from_get_info(
255
255
 
256
256
  Parameters
257
257
  ----------
258
- allGetInfo : `list`[`DatastoreFileGetInformation`]
258
+ allGetInfo : `list` [`DatastoreFileGetInformation`]
259
259
  Pre-processed information about each file associated with this
260
260
  artifact.
261
261
  ref : `DatasetRef`
262
262
  The registry information associated with this artifact.
263
- parameters : `Mapping`[`str`, `Any`]
263
+ parameters : `~collections.abc.Mapping` [`str`, `typing.Any`]
264
264
  `StorageClass` and `Formatter` parameters.
265
265
  cache_manager : `AbstractDatastoreCacheManager`
266
266
  The cache manager to use for caching retrieved files.
@@ -274,7 +274,11 @@ class ZipIndex(BaseModel):
274
274
  Path to the Zip file.
275
275
  """
276
276
  with zip_path.open("rb") as fd, zipfile.ZipFile(fd) as zf:
277
- json_data = zf.read(cls.index_name)
277
+ return cls.from_open_zip(zf)
278
+
279
+ @classmethod
280
+ def from_open_zip(cls, zf: zipfile.ZipFile) -> Self:
281
+ json_data = zf.read(cls.index_name)
278
282
  return cls.model_validate_json(json_data)
279
283
 
280
284
 
@@ -55,8 +55,8 @@ def retrieve_file_transfer_records(
55
55
  Cache mapping datastore artifact to existence. Updated by
56
56
  this method with details of all artifacts tested.
57
57
 
58
- Return
59
- ------
58
+ Returns
59
+ -------
60
60
  files : `FileTransferMap`
61
61
  A dictionary from `DatasetId` to a list of `FileTransferRecord`,
62
62
  containing information about the files that were found for these
@@ -590,6 +590,10 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
590
590
  ignore_errors : `bool`, optional
591
591
  Indicate that errors should be ignored.
592
592
 
593
+ Returns
594
+ -------
595
+ None
596
+
593
597
  Raises
594
598
  ------
595
599
  FileNotFoundError
@@ -721,6 +725,10 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
721
725
  If `True`, output a log message for every validation error
722
726
  detected.
723
727
 
728
+ Returns
729
+ -------
730
+ None
731
+
724
732
  Raises
725
733
  ------
726
734
  DatastoreValidationError
lsst/daf/butler/ddl.py CHANGED
@@ -537,7 +537,7 @@ class IndexSpec:
537
537
  ----------
538
538
  *columns : `str`
539
539
  Names of the columns to index.
540
- **kwargs : `Any`
540
+ **kwargs : `typing.Any`
541
541
  Additional keyword arguments to pass directly to
542
542
  `sqlalchemy.schema.Index` constructor. This could be used to provide
543
543
  backend-specific options, e.g. to create a ``GIST`` index in PostgreSQL
@@ -556,7 +556,7 @@ class IndexSpec:
556
556
 
557
557
  kwargs: dict[str, Any]
558
558
  """Additional keyword arguments passed directly to
559
- `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `Any` ]).
559
+ `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `typing.Any` ]).
560
560
  """
561
561
 
562
562
 
@@ -35,8 +35,6 @@ from __future__ import annotations
35
35
  __all__ = (
36
36
  "DataCoordinate",
37
37
  "DataId",
38
- "DataIdKey",
39
- "DataIdValue",
40
38
  "SerializedDataCoordinate",
41
39
  "SerializedDataId",
42
40
  )
@@ -55,7 +53,7 @@ from .._timespan import Timespan
55
53
  from ..json import from_json_pydantic, to_json_pydantic
56
54
  from ..persistence_context import PersistenceContextVars
57
55
  from ._group import DimensionGroup
58
- from ._records import DataIdKey, DataIdValue, DimensionRecord, SerializedDimensionRecord
56
+ from ._records import DataIdValue, DimensionRecord, SerializedDimensionRecord
59
57
 
60
58
  if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
61
59
  from ..registry import Registry
@@ -559,11 +557,11 @@ class DataCoordinate:
559
557
  Returns
560
558
  -------
561
559
  state : `bool`
562
- If `True`, `__getitem__`, `get`, and `__contains__` (but not
563
- `keys`!) will act as though the mapping includes key-value pairs
564
- for implied dimensions, and the `full` property may be used. If
560
+ If `True`, ``__getitem__``, `get`, and ``__contains__`` (but not
561
+ ``keys``!) will act as though the mapping includes key-value pairs
562
+ for implied dimensions, and the ``full`` property may be used. If
565
563
  `False`, these operations only include key-value pairs for required
566
- dimensions, and accessing `full` is an error. Always `True` if
564
+ dimensions, and accessing ``full`` is an error. Always `True` if
567
565
  there are no implied dimensions.
568
566
  """
569
567
  raise NotImplementedError()
@@ -718,7 +716,7 @@ class DataCoordinate:
718
716
 
719
717
  Parameters
720
718
  ----------
721
- simple : `dict` of [`str`, `Any`]
719
+ simple : `dict` of [`str`, `typing.Any`]
722
720
  The `dict` returned by `to_simple()`.
723
721
  universe : `DimensionUniverse`
724
722
  Object that manages all known dimensions.
@@ -97,7 +97,7 @@ def fail_record_lookup(
97
97
 
98
98
  Returns
99
99
  -------
100
- record : `DimensionRecord`
100
+ record : `DimensionRecord`
101
101
  Never returned; this function always raises `LookupError`.
102
102
  """
103
103
  raise LookupError(
@@ -27,7 +27,13 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
- __all__ = ("DimensionRecord", "SerializedDimensionRecord", "SerializedKeyValueDimensionRecord")
30
+ __all__ = (
31
+ "DataIdKey",
32
+ "DataIdValue",
33
+ "DimensionRecord",
34
+ "SerializedDimensionRecord",
35
+ "SerializedKeyValueDimensionRecord",
36
+ )
31
37
 
32
38
  import itertools
33
39
  from collections.abc import Callable, Hashable
@@ -451,8 +457,8 @@ class DimensionRecord:
451
457
  registry : `lsst.daf.butler.Registry`, optional
452
458
  Registry from which a universe can be extracted. Can be `None`
453
459
  if universe is provided explicitly.
454
- cacheKey : `Hashable` or `None`
455
- If this is not None, it will be used as a key for any cached
460
+ cacheKey : `collections.abc.Hashable` or `None`
461
+ If this is not `None`, it will be used as a key for any cached
456
462
  reconstruction instead of calculating a value from the serialized
457
463
  format.
458
464
 
@@ -884,6 +884,8 @@ class DirectButler(Butler): # numpydoc ignore=PR02
884
884
  if isinstance(datasetRefOrType, DatasetRef):
885
885
  if collections is not None:
886
886
  warnings.warn("Collections should not be specified with DatasetRef", stacklevel=3)
887
+ if predict and not datasetRefOrType.dataId.hasRecords():
888
+ return datasetRefOrType.expanded(self.registry.expandDataId(datasetRefOrType.dataId))
887
889
  # May need to retrieve datastore records if requested.
888
890
  if datastore_records and datasetRefOrType._datastore_records is None:
889
891
  datasetRefOrType = self._registry.get_datastore_records(datasetRefOrType)
@@ -936,6 +938,7 @@ class DirectButler(Butler): # numpydoc ignore=PR02
936
938
  run = self.run
937
939
  if run is None:
938
940
  raise TypeError("Cannot predict dataset ID/location with run=None.")
941
+ dataId = self.registry.expandDataId(dataId)
939
942
  return DatasetRef(datasetType, dataId, run=run)
940
943
  else:
941
944
  if collections is None:
@@ -1655,29 +1658,9 @@ class DirectButler(Butler): # numpydoc ignore=PR02
1655
1658
  *,
1656
1659
  transfer_dimensions: bool = False,
1657
1660
  dry_run: bool = False,
1661
+ skip_existing: bool = False,
1658
1662
  ) -> None:
1659
- """Ingest a Zip file into this butler.
1660
-
1661
- The Zip file must have been created by `retrieve_artifacts_zip`.
1662
-
1663
- Parameters
1664
- ----------
1665
- zip_file : `lsst.resources.ResourcePathExpression`
1666
- Path to the Zip file.
1667
- transfer : `str`, optional
1668
- Method to use to transfer the Zip into the datastore.
1669
- transfer_dimensions : `bool`, optional
1670
- If `True`, dimension record data associated with the new datasets
1671
- will be transferred from the Zip, if present.
1672
- dry_run : `bool`, optional
1673
- If `True` the ingest will be processed without any modifications
1674
- made to the target butler and as if the target butler did not
1675
- have any of the datasets.
1676
-
1677
- Notes
1678
- -----
1679
- Run collections and dataset types are created as needed.
1680
- """
1663
+ # Docstring inherited.
1681
1664
  if not self.isWriteable():
1682
1665
  raise TypeError("Butler is read-only.")
1683
1666
 
@@ -1703,6 +1686,29 @@ class DirectButler(Butler): # numpydoc ignore=PR02
1703
1686
  datasets.append(dataset)
1704
1687
  processed_ids.update(unprocessed)
1705
1688
 
1689
+ new_datasets, existing_datasets = self._partition_datasets_by_known(datasets)
1690
+ if existing_datasets:
1691
+ if skip_existing:
1692
+ _LOG.info(
1693
+ "Skipping %d datasets from zip file %s which already exist in the repository.",
1694
+ len(existing_datasets),
1695
+ zip_file,
1696
+ )
1697
+ else:
1698
+ raise ConflictingDefinitionError(
1699
+ f"Datastore already contains {len(existing_datasets)} of the given datasets."
1700
+ f" Example: {existing_datasets[0]}"
1701
+ )
1702
+ if new_datasets:
1703
+ # Can not yet support partial zip ingests where a zip contains
1704
+ # some datasets that are already in another zip.
1705
+ raise ValueError(
1706
+ f"The given zip file from {zip_file} contains {len(new_datasets)} datasets not known "
1707
+ f"to this butler but also contains {len(existing_datasets)} datasets already known to "
1708
+ "this butler. Currently butler can not ingest zip files with overlapping content."
1709
+ )
1710
+ return
1711
+
1706
1712
  # Ingest doesn't create the RUN collections so we have to do that
1707
1713
  # here.
1708
1714
  #
@@ -1721,7 +1727,18 @@ class DirectButler(Butler): # numpydoc ignore=PR02
1721
1727
  datasets, progress, dry_run=dry_run, transfer_dimensions=transfer_dimensions
1722
1728
  )
1723
1729
 
1724
- with self.transaction():
1730
+ # Calculate some statistics based on the given list of datasets.
1731
+ n_datasets = 0
1732
+ for d in datasets:
1733
+ n_datasets += len(d.refs)
1734
+ srefs = "s" if n_datasets != 1 else ""
1735
+
1736
+ with (
1737
+ self._metrics.instrument_ingest(
1738
+ n_datasets, _LOG, msg=f"Ingesting zip file {zip_file} with {n_datasets} dataset{srefs}"
1739
+ ),
1740
+ self.transaction(),
1741
+ ):
1725
1742
  # Do not need expanded dataset refs so can ignore the return value.
1726
1743
  self._ingest_file_datasets(datasets, import_info, progress, dry_run=dry_run)
1727
1744
 
@@ -610,15 +610,15 @@ class DirectQueryDriver(QueryDriver):
610
610
  ----------
611
611
  tree : `.queries.tree.QueryTree`
612
612
  Description of the joins and row filters in the query.
613
+ allow_duplicate_overlaps : `bool`, optional
614
+ If set to `True` then query will be allowed to generate
615
+ non-distinct rows for spatial overlaps.
613
616
 
614
617
  Returns
615
618
  -------
616
619
  tree_analysis : `QueryTreeAnalysis`
617
620
  Struct containing additional information need to build the joins
618
621
  stage of a query.
619
- allow_duplicate_overlaps : `bool`, optional
620
- If set to `True` then query will be allowed to generate
621
- non-distinct rows for spatial overlaps.
622
622
 
623
623
  Notes
624
624
  -----
@@ -1313,7 +1313,8 @@ class DirectQueryDriver(QueryDriver):
1313
1313
  Mapping of collection names to collection records, must contain
1314
1314
  records for all collections in ``collection_names`` and all their
1315
1315
  children collections.
1316
- summaries : `~collections.abc.Mapping` [`Any`, `CollectionSummary`]
1316
+ summaries : `~collections.abc.Mapping` [`typing.Any`, \
1317
+ `CollectionSummary`]
1317
1318
  Mapping of collection IDs to collection summaries, must contain
1318
1319
  summaries for all non-chained collections in the collection tree.
1319
1320
 
@@ -386,7 +386,7 @@ class _GeneralColumnConverter:
386
386
 
387
387
  Returns
388
388
  -------
389
- value : `Any`
389
+ value : `typing.Any`
390
390
  Result of the conversion.
391
391
  """
392
392
  raise NotImplementedError()