lsst-daf-butler 30.0.0rc3__py3-none-any.whl → 30.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/daf/butler/_butler.py +19 -3
- lsst/daf/butler/_butler_collections.py +4 -4
- lsst/daf/butler/_butler_metrics.py +2 -0
- lsst/daf/butler/_dataset_provenance.py +1 -1
- lsst/daf/butler/_dataset_ref.py +1 -1
- lsst/daf/butler/_exceptions.py +2 -2
- lsst/daf/butler/_file_dataset.py +2 -1
- lsst/daf/butler/_formatter.py +12 -0
- lsst/daf/butler/_query_all_datasets.py +2 -0
- lsst/daf/butler/cli/cmd/_remove_runs.py +1 -12
- lsst/daf/butler/column_spec.py +4 -4
- lsst/daf/butler/datastore/_datastore.py +21 -1
- lsst/daf/butler/datastore/stored_file_info.py +2 -2
- lsst/daf/butler/datastores/chainedDatastore.py +4 -0
- lsst/daf/butler/datastores/fileDatastore.py +11 -1
- lsst/daf/butler/datastores/file_datastore/get.py +4 -4
- lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py +5 -1
- lsst/daf/butler/datastores/file_datastore/transfer.py +2 -2
- lsst/daf/butler/datastores/inMemoryDatastore.py +8 -0
- lsst/daf/butler/ddl.py +2 -2
- lsst/daf/butler/dimensions/_coordinate.py +6 -8
- lsst/daf/butler/dimensions/_record_set.py +1 -1
- lsst/daf/butler/dimensions/_records.py +9 -3
- lsst/daf/butler/direct_butler/_direct_butler.py +40 -23
- lsst/daf/butler/direct_query_driver/_driver.py +5 -4
- lsst/daf/butler/direct_query_driver/_result_page_converter.py +1 -1
- lsst/daf/butler/formatters/parquet.py +6 -6
- lsst/daf/butler/nonempty_mapping.py +1 -1
- lsst/daf/butler/persistence_context.py +8 -5
- lsst/daf/butler/queries/_general_query_results.py +1 -1
- lsst/daf/butler/queries/driver.py +1 -1
- lsst/daf/butler/queries/expression_factory.py +2 -2
- lsst/daf/butler/queries/expressions/parser/exprTree.py +1 -1
- lsst/daf/butler/queries/expressions/parser/parserYacc.py +1 -1
- lsst/daf/butler/queries/overlaps.py +2 -2
- lsst/daf/butler/queries/tree/_column_set.py +1 -1
- lsst/daf/butler/registry/_collection_record_cache.py +1 -1
- lsst/daf/butler/registry/_collection_summary_cache.py +5 -4
- lsst/daf/butler/registry/_registry.py +4 -0
- lsst/daf/butler/registry/databases/postgresql.py +2 -1
- lsst/daf/butler/registry/datasets/byDimensions/_dataset_type_cache.py +1 -1
- lsst/daf/butler/registry/datasets/byDimensions/_manager.py +4 -2
- lsst/daf/butler/registry/datasets/byDimensions/summaries.py +3 -2
- lsst/daf/butler/registry/interfaces/_datasets.py +2 -1
- lsst/daf/butler/registry/interfaces/_obscore.py +1 -1
- lsst/daf/butler/registry/obscore/_records.py +1 -1
- lsst/daf/butler/registry/obscore/_spatial.py +2 -2
- lsst/daf/butler/registry/queries/_results.py +2 -2
- lsst/daf/butler/registry/sql_registry.py +1 -1
- lsst/daf/butler/registry/wildcards.py +5 -5
- lsst/daf/butler/remote_butler/_get.py +1 -1
- lsst/daf/butler/remote_butler/_remote_butler.py +1 -0
- lsst/daf/butler/remote_butler/_remote_file_transfer_source.py +4 -0
- lsst/daf/butler/remote_butler/authentication/cadc.py +4 -3
- lsst/daf/butler/script/_pruneDatasets.py +4 -2
- lsst/daf/butler/script/configValidate.py +2 -2
- lsst/daf/butler/script/queryCollections.py +2 -2
- lsst/daf/butler/script/removeCollections.py +2 -0
- lsst/daf/butler/script/removeRuns.py +2 -0
- lsst/daf/butler/tests/cliCmdTestBase.py +2 -0
- lsst/daf/butler/tests/cliLogTestBase.py +2 -0
- lsst/daf/butler/tests/hybrid_butler.py +6 -1
- lsst/daf/butler/tests/registry_data/spatial.py +4 -2
- lsst/daf/butler/tests/utils.py +1 -1
- lsst/daf/butler/timespan_database_representation.py +3 -3
- lsst/daf/butler/version.py +1 -1
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/METADATA +3 -2
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/RECORD +76 -76
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/WHEEL +1 -1
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/entry_points.txt +0 -0
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/LICENSE +0 -0
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/top_level.txt +0 -0
- {lsst_daf_butler-30.0.0rc3.dist-info → lsst_daf_butler-30.0.1.dist-info}/zip-safe +0 -0
lsst/daf/butler/_butler.py
CHANGED
|
@@ -138,7 +138,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
138
138
|
without_datastore : `bool`, optional
|
|
139
139
|
If `True` do not attach a datastore to this butler. Any attempts
|
|
140
140
|
to use a datastore will fail.
|
|
141
|
-
|
|
141
|
+
metrics : `ButlerMetrics` or `None`
|
|
142
|
+
External metrics object to be used for tracking butler usage. If `None`
|
|
143
|
+
a new metrics object is created.
|
|
144
|
+
**kwargs : `typing.Any`
|
|
142
145
|
Additional keyword arguments passed to a constructor of actual butler
|
|
143
146
|
class.
|
|
144
147
|
|
|
@@ -240,7 +243,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
240
243
|
to use a datastore will fail.
|
|
241
244
|
metrics : `ButlerMetrics` or `None`, optional
|
|
242
245
|
Metrics object to record butler usage statistics.
|
|
243
|
-
**kwargs : `Any`
|
|
246
|
+
**kwargs : `typing.Any`
|
|
244
247
|
Default data ID key-value pairs. These may only identify
|
|
245
248
|
"governor" dimensions like ``instrument`` and ``skymap``.
|
|
246
249
|
|
|
@@ -1390,6 +1393,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1390
1393
|
raised if any datasets with the same dataset ID already exist
|
|
1391
1394
|
in the datastore.
|
|
1392
1395
|
|
|
1396
|
+
Returns
|
|
1397
|
+
-------
|
|
1398
|
+
None
|
|
1399
|
+
|
|
1393
1400
|
Raises
|
|
1394
1401
|
------
|
|
1395
1402
|
TypeError
|
|
@@ -1429,6 +1436,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1429
1436
|
*,
|
|
1430
1437
|
transfer_dimensions: bool = False,
|
|
1431
1438
|
dry_run: bool = False,
|
|
1439
|
+
skip_existing: bool = False,
|
|
1432
1440
|
) -> None:
|
|
1433
1441
|
"""Ingest a Zip file into this butler.
|
|
1434
1442
|
|
|
@@ -1447,6 +1455,14 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1447
1455
|
If `True` the ingest will be processed without any modifications
|
|
1448
1456
|
made to the target butler and as if the target butler did not
|
|
1449
1457
|
have any of the datasets.
|
|
1458
|
+
skip_existing : `bool`, optional
|
|
1459
|
+
If `True`, a zip will not be ingested if the dataset entries listed
|
|
1460
|
+
in the index with the same dataset ID already exists in the butler.
|
|
1461
|
+
If `False` (the default), a `ConflictingDefinitionError` will be
|
|
1462
|
+
raised if any datasets with the same dataset ID already exist
|
|
1463
|
+
in the repository. If, somehow, some datasets are known to the
|
|
1464
|
+
butler and some are not, this is currently treated as an error
|
|
1465
|
+
rather than attempting to do a partial ingest.
|
|
1450
1466
|
|
|
1451
1467
|
Notes
|
|
1452
1468
|
-----
|
|
@@ -2024,7 +2040,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
2024
2040
|
|
|
2025
2041
|
Returns
|
|
2026
2042
|
-------
|
|
2027
|
-
records : `list`[`DimensionRecord`]
|
|
2043
|
+
records : `list` [`DimensionRecord`]
|
|
2028
2044
|
Dimension records matching the given query parameters.
|
|
2029
2045
|
|
|
2030
2046
|
Raises
|
|
@@ -360,10 +360,10 @@ class ButlerCollections(ABC, Sequence):
|
|
|
360
360
|
name : `str`
|
|
361
361
|
The name of the collection of interest.
|
|
362
362
|
include_parents : `bool`, optional
|
|
363
|
-
|
|
363
|
+
If `True` any parents of this collection will be included.
|
|
364
364
|
include_summary : `bool`, optional
|
|
365
|
-
|
|
366
|
-
|
|
365
|
+
If `True` dataset type names and governor dimensions of datasets
|
|
366
|
+
stored in this collection will be included in the result.
|
|
367
367
|
|
|
368
368
|
Returns
|
|
369
369
|
-------
|
|
@@ -464,7 +464,7 @@ class ButlerCollections(ABC, Sequence):
|
|
|
464
464
|
|
|
465
465
|
Returns
|
|
466
466
|
-------
|
|
467
|
-
filtered : `~collections.abc.Mapping` [`str`, `list`[`str`]]
|
|
467
|
+
filtered : `~collections.abc.Mapping` [`str`, `list` [`str`]]
|
|
468
468
|
Mapping of the dataset type name to its corresponding list of
|
|
469
469
|
collection names.
|
|
470
470
|
"""
|
|
@@ -267,7 +267,7 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
267
267
|
use_upper : `bool` or `None`
|
|
268
268
|
If `True` use upper case for provenance keys, if `False` use lower
|
|
269
269
|
case, if `None` match the case of the prefix.
|
|
270
|
-
keys : `tuple` of `str` | `int`
|
|
270
|
+
*keys : `tuple` of `str` | `int`
|
|
271
271
|
Components of key to combine with prefix and separator.
|
|
272
272
|
|
|
273
273
|
Returns
|
lsst/daf/butler/_dataset_ref.py
CHANGED
|
@@ -479,7 +479,7 @@ class DatasetRef:
|
|
|
479
479
|
|
|
480
480
|
Parameters
|
|
481
481
|
----------
|
|
482
|
-
simple : `dict` of [`str`, `Any`]
|
|
482
|
+
simple : `dict` of [`str`, `typing.Any`]
|
|
483
483
|
The value returned by `to_simple()`.
|
|
484
484
|
universe : `DimensionUniverse`
|
|
485
485
|
The special graph of all known dimensions.
|
lsst/daf/butler/_exceptions.py
CHANGED
|
@@ -196,8 +196,8 @@ class ValidationError(RuntimeError):
|
|
|
196
196
|
|
|
197
197
|
|
|
198
198
|
class EmptyQueryResultError(Exception):
|
|
199
|
-
"""Exception raised when query methods return an empty result and
|
|
200
|
-
flag is set.
|
|
199
|
+
"""Exception raised when query methods return an empty result and
|
|
200
|
+
``explain`` flag is set.
|
|
201
201
|
|
|
202
202
|
Parameters
|
|
203
203
|
----------
|
lsst/daf/butler/_file_dataset.py
CHANGED
|
@@ -129,7 +129,8 @@ class FileDataset:
|
|
|
129
129
|
----------
|
|
130
130
|
dataset : `SerializedFileDataset`
|
|
131
131
|
Object to deserialize.
|
|
132
|
-
dataset_type_loader :
|
|
132
|
+
dataset_type_loader : `~collections.abc.Callable` \
|
|
133
|
+
[[ `str` ], `DatasetType` ]
|
|
133
134
|
Function that takes a string dataset type name as its
|
|
134
135
|
only parameter, and returns an instance of `DatasetType`.
|
|
135
136
|
Used to deserialize the `DatasetRef` instances contained
|
lsst/daf/butler/_formatter.py
CHANGED
|
@@ -910,6 +910,10 @@ class FormatterV2:
|
|
|
910
910
|
provenance : `DatasetProvenance` | `None`, optional
|
|
911
911
|
Provenance to attach to the file being written.
|
|
912
912
|
|
|
913
|
+
Returns
|
|
914
|
+
-------
|
|
915
|
+
None
|
|
916
|
+
|
|
913
917
|
Raises
|
|
914
918
|
------
|
|
915
919
|
FormatterNotImplementedError
|
|
@@ -1137,6 +1141,10 @@ class FormatterV2:
|
|
|
1137
1141
|
location : `Location`
|
|
1138
1142
|
Location from which to extract a file extension.
|
|
1139
1143
|
|
|
1144
|
+
Returns
|
|
1145
|
+
-------
|
|
1146
|
+
None
|
|
1147
|
+
|
|
1140
1148
|
Raises
|
|
1141
1149
|
------
|
|
1142
1150
|
ValueError
|
|
@@ -1583,6 +1591,10 @@ class Formatter(metaclass=ABCMeta):
|
|
|
1583
1591
|
location : `Location`
|
|
1584
1592
|
Location from which to extract a file extension.
|
|
1585
1593
|
|
|
1594
|
+
Returns
|
|
1595
|
+
-------
|
|
1596
|
+
None
|
|
1597
|
+
|
|
1586
1598
|
Raises
|
|
1587
1599
|
------
|
|
1588
1600
|
NotImplementedError
|
|
@@ -114,18 +114,7 @@ def remove_runs(context: click.Context, confirm: bool, force: bool, **kwargs: An
|
|
|
114
114
|
|
|
115
115
|
This command can be used to remove RUN collections and the datasets within
|
|
116
116
|
them.
|
|
117
|
-
|
|
118
|
-
Parameters
|
|
119
|
-
----------
|
|
120
|
-
context : `click.Context`
|
|
121
|
-
Context provided by Click.
|
|
122
|
-
confirm : `bool`
|
|
123
|
-
Confirmation for removal of the run.
|
|
124
|
-
force : `bool`
|
|
125
|
-
Force removal.
|
|
126
|
-
**kwargs : `dict` [`str`, `str`]
|
|
127
|
-
The parameters to pass to `~lsst.daf.butler.script.removeRuns`.
|
|
128
|
-
"""
|
|
117
|
+
""" # numpydoc ignore=PR01
|
|
129
118
|
result = script.removeRuns(**kwargs)
|
|
130
119
|
canRemoveRuns = len(result.runs)
|
|
131
120
|
if not canRemoveRuns:
|
lsst/daf/butler/column_spec.py
CHANGED
|
@@ -109,12 +109,12 @@ class ColumnValueSerializer(ABC):
|
|
|
109
109
|
|
|
110
110
|
Parameters
|
|
111
111
|
----------
|
|
112
|
-
value : `Any`
|
|
112
|
+
value : `typing.Any`
|
|
113
113
|
Column value to be serialized.
|
|
114
114
|
|
|
115
115
|
Returns
|
|
116
116
|
-------
|
|
117
|
-
value : `Any`
|
|
117
|
+
value : `typing.Any`
|
|
118
118
|
Column value in serializable format.
|
|
119
119
|
"""
|
|
120
120
|
raise NotImplementedError
|
|
@@ -125,12 +125,12 @@ class ColumnValueSerializer(ABC):
|
|
|
125
125
|
|
|
126
126
|
Parameters
|
|
127
127
|
----------
|
|
128
|
-
value : `Any`
|
|
128
|
+
value : `typing.Any`
|
|
129
129
|
Serialized column value.
|
|
130
130
|
|
|
131
131
|
Returns
|
|
132
132
|
-------
|
|
133
|
-
value : `Any`
|
|
133
|
+
value : `typing.Any`
|
|
134
134
|
Deserialized column value.
|
|
135
135
|
"""
|
|
136
136
|
raise NotImplementedError
|
|
@@ -284,6 +284,14 @@ class DatasetRefURIs(abc.Sequence):
|
|
|
284
284
|
def __repr__(self) -> str:
|
|
285
285
|
return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
|
|
286
286
|
|
|
287
|
+
def iter_all(self) -> Iterator[ResourcePath]:
|
|
288
|
+
"""Iterate over all URIs without regard to whether they are primary
|
|
289
|
+
or component.
|
|
290
|
+
"""
|
|
291
|
+
if self.primaryURI is not None:
|
|
292
|
+
yield self.primaryURI
|
|
293
|
+
yield from self.componentURIs.values()
|
|
294
|
+
|
|
287
295
|
|
|
288
296
|
class Datastore(FileTransferSource, metaclass=ABCMeta):
|
|
289
297
|
"""Datastore interface.
|
|
@@ -536,7 +544,7 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
|
|
|
536
544
|
|
|
537
545
|
Returns
|
|
538
546
|
-------
|
|
539
|
-
exists : `dict`[`DatasetRef`, `bool`]
|
|
547
|
+
exists : `dict` [`DatasetRef`, `bool`]
|
|
540
548
|
Mapping of dataset to boolean indicating whether the dataset
|
|
541
549
|
is known to the datastore.
|
|
542
550
|
"""
|
|
@@ -825,6 +833,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
|
|
|
825
833
|
in an external system or if the file is to be compressed in place.
|
|
826
834
|
It is up to the datastore whether this parameter is relevant.
|
|
827
835
|
|
|
836
|
+
Returns
|
|
837
|
+
-------
|
|
838
|
+
None
|
|
839
|
+
|
|
828
840
|
Raises
|
|
829
841
|
------
|
|
830
842
|
NotImplementedError
|
|
@@ -1143,6 +1155,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
|
|
|
1143
1155
|
Determine whether errors should be ignored. When multiple
|
|
1144
1156
|
refs are being trashed there will be no per-ref check.
|
|
1145
1157
|
|
|
1158
|
+
Returns
|
|
1159
|
+
-------
|
|
1160
|
+
None
|
|
1161
|
+
|
|
1146
1162
|
Raises
|
|
1147
1163
|
------
|
|
1148
1164
|
FileNotFoundError
|
|
@@ -1278,6 +1294,10 @@ class Datastore(FileTransferSource, metaclass=ABCMeta):
|
|
|
1278
1294
|
Entity to compare with configuration retrieved using the
|
|
1279
1295
|
specified lookup key.
|
|
1280
1296
|
|
|
1297
|
+
Returns
|
|
1298
|
+
-------
|
|
1299
|
+
None
|
|
1300
|
+
|
|
1281
1301
|
Raises
|
|
1282
1302
|
------
|
|
1283
1303
|
DatastoreValidationError
|
|
@@ -423,8 +423,8 @@ def make_datastore_path_relative(path: str) -> str:
|
|
|
423
423
|
path : `str`
|
|
424
424
|
The file path from a `StoredFileInfo`.
|
|
425
425
|
|
|
426
|
-
|
|
427
|
-
|
|
426
|
+
Returns
|
|
427
|
+
-------
|
|
428
428
|
normalized_path : `str`
|
|
429
429
|
The original path, if it was relative. Otherwise, a version of it that
|
|
430
430
|
was converted to a relative path, stripping URI scheme and netloc from
|
|
@@ -2152,7 +2152,13 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
|
|
|
2152
2152
|
|
|
2153
2153
|
return artifact_map
|
|
2154
2154
|
|
|
2155
|
-
def ingest_zip(
|
|
2155
|
+
def ingest_zip(
|
|
2156
|
+
self,
|
|
2157
|
+
zip_path: ResourcePath,
|
|
2158
|
+
transfer: str | None,
|
|
2159
|
+
*,
|
|
2160
|
+
dry_run: bool = False,
|
|
2161
|
+
) -> None:
|
|
2156
2162
|
"""Ingest an indexed Zip file and contents.
|
|
2157
2163
|
|
|
2158
2164
|
The Zip file must have an index file as created by `retrieveArtifacts`.
|
|
@@ -2976,6 +2982,10 @@ class FileDatastore(GenericBaseDatastore[StoredFileInfo]):
|
|
|
2976
2982
|
If `True`, output a log message for every validation error
|
|
2977
2983
|
detected.
|
|
2978
2984
|
|
|
2985
|
+
Returns
|
|
2986
|
+
-------
|
|
2987
|
+
None
|
|
2988
|
+
|
|
2979
2989
|
Raises
|
|
2980
2990
|
------
|
|
2981
2991
|
DatastoreValidationError
|
|
@@ -97,12 +97,12 @@ def generate_datastore_get_information(
|
|
|
97
97
|
|
|
98
98
|
Parameters
|
|
99
99
|
----------
|
|
100
|
-
fileLocations : `list`[`DatasetLocationInformation`]
|
|
100
|
+
fileLocations : `list` [`DatasetLocationInformation`]
|
|
101
101
|
List of file locations for this artifact and their associated datastore
|
|
102
102
|
records.
|
|
103
103
|
ref : `DatasetRef`
|
|
104
104
|
The registry information associated with this artifact.
|
|
105
|
-
parameters :
|
|
105
|
+
parameters : `~collections.abc.Mapping` [`str`, `typing.Any`]
|
|
106
106
|
`StorageClass` and `Formatter` parameters.
|
|
107
107
|
readStorageClass : `StorageClass` | `None`, optional
|
|
108
108
|
The StorageClass to use when ultimately returning the resulting object
|
|
@@ -255,12 +255,12 @@ def get_dataset_as_python_object_from_get_info(
|
|
|
255
255
|
|
|
256
256
|
Parameters
|
|
257
257
|
----------
|
|
258
|
-
allGetInfo : `list`[`DatastoreFileGetInformation`]
|
|
258
|
+
allGetInfo : `list` [`DatastoreFileGetInformation`]
|
|
259
259
|
Pre-processed information about each file associated with this
|
|
260
260
|
artifact.
|
|
261
261
|
ref : `DatasetRef`
|
|
262
262
|
The registry information associated with this artifact.
|
|
263
|
-
parameters :
|
|
263
|
+
parameters : `~collections.abc.Mapping` [`str`, `typing.Any`]
|
|
264
264
|
`StorageClass` and `Formatter` parameters.
|
|
265
265
|
cache_manager : `AbstractDatastoreCacheManager`
|
|
266
266
|
The cache manager to use for caching retrieved files.
|
|
@@ -274,7 +274,11 @@ class ZipIndex(BaseModel):
|
|
|
274
274
|
Path to the Zip file.
|
|
275
275
|
"""
|
|
276
276
|
with zip_path.open("rb") as fd, zipfile.ZipFile(fd) as zf:
|
|
277
|
-
|
|
277
|
+
return cls.from_open_zip(zf)
|
|
278
|
+
|
|
279
|
+
@classmethod
|
|
280
|
+
def from_open_zip(cls, zf: zipfile.ZipFile) -> Self:
|
|
281
|
+
json_data = zf.read(cls.index_name)
|
|
278
282
|
return cls.model_validate_json(json_data)
|
|
279
283
|
|
|
280
284
|
|
|
@@ -55,8 +55,8 @@ def retrieve_file_transfer_records(
|
|
|
55
55
|
Cache mapping datastore artifact to existence. Updated by
|
|
56
56
|
this method with details of all artifacts tested.
|
|
57
57
|
|
|
58
|
-
|
|
59
|
-
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
60
|
files : `FileTransferMap`
|
|
61
61
|
A dictionary from `DatasetId` to a list of `FileTransferRecord`,
|
|
62
62
|
containing information about the files that were found for these
|
|
@@ -590,6 +590,10 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
|
|
|
590
590
|
ignore_errors : `bool`, optional
|
|
591
591
|
Indicate that errors should be ignored.
|
|
592
592
|
|
|
593
|
+
Returns
|
|
594
|
+
-------
|
|
595
|
+
None
|
|
596
|
+
|
|
593
597
|
Raises
|
|
594
598
|
------
|
|
595
599
|
FileNotFoundError
|
|
@@ -721,6 +725,10 @@ class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]):
|
|
|
721
725
|
If `True`, output a log message for every validation error
|
|
722
726
|
detected.
|
|
723
727
|
|
|
728
|
+
Returns
|
|
729
|
+
-------
|
|
730
|
+
None
|
|
731
|
+
|
|
724
732
|
Raises
|
|
725
733
|
------
|
|
726
734
|
DatastoreValidationError
|
lsst/daf/butler/ddl.py
CHANGED
|
@@ -537,7 +537,7 @@ class IndexSpec:
|
|
|
537
537
|
----------
|
|
538
538
|
*columns : `str`
|
|
539
539
|
Names of the columns to index.
|
|
540
|
-
**kwargs : `Any`
|
|
540
|
+
**kwargs : `typing.Any`
|
|
541
541
|
Additional keyword arguments to pass directly to
|
|
542
542
|
`sqlalchemy.schema.Index` constructor. This could be used to provide
|
|
543
543
|
backend-specific options, e.g. to create a ``GIST`` index in PostgreSQL
|
|
@@ -556,7 +556,7 @@ class IndexSpec:
|
|
|
556
556
|
|
|
557
557
|
kwargs: dict[str, Any]
|
|
558
558
|
"""Additional keyword arguments passed directly to
|
|
559
|
-
`sqlalchemy.schema.Index` constructor (`dict` [ `str`, `Any` ]).
|
|
559
|
+
`sqlalchemy.schema.Index` constructor (`dict` [ `str`, `typing.Any` ]).
|
|
560
560
|
"""
|
|
561
561
|
|
|
562
562
|
|
|
@@ -35,8 +35,6 @@ from __future__ import annotations
|
|
|
35
35
|
__all__ = (
|
|
36
36
|
"DataCoordinate",
|
|
37
37
|
"DataId",
|
|
38
|
-
"DataIdKey",
|
|
39
|
-
"DataIdValue",
|
|
40
38
|
"SerializedDataCoordinate",
|
|
41
39
|
"SerializedDataId",
|
|
42
40
|
)
|
|
@@ -55,7 +53,7 @@ from .._timespan import Timespan
|
|
|
55
53
|
from ..json import from_json_pydantic, to_json_pydantic
|
|
56
54
|
from ..persistence_context import PersistenceContextVars
|
|
57
55
|
from ._group import DimensionGroup
|
|
58
|
-
from ._records import
|
|
56
|
+
from ._records import DataIdValue, DimensionRecord, SerializedDimensionRecord
|
|
59
57
|
|
|
60
58
|
if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
|
|
61
59
|
from ..registry import Registry
|
|
@@ -559,11 +557,11 @@ class DataCoordinate:
|
|
|
559
557
|
Returns
|
|
560
558
|
-------
|
|
561
559
|
state : `bool`
|
|
562
|
-
If `True`,
|
|
563
|
-
|
|
564
|
-
for implied dimensions, and the
|
|
560
|
+
If `True`, ``__getitem__``, `get`, and ``__contains__`` (but not
|
|
561
|
+
``keys``!) will act as though the mapping includes key-value pairs
|
|
562
|
+
for implied dimensions, and the ``full`` property may be used. If
|
|
565
563
|
`False`, these operations only include key-value pairs for required
|
|
566
|
-
dimensions, and accessing
|
|
564
|
+
dimensions, and accessing ``full`` is an error. Always `True` if
|
|
567
565
|
there are no implied dimensions.
|
|
568
566
|
"""
|
|
569
567
|
raise NotImplementedError()
|
|
@@ -718,7 +716,7 @@ class DataCoordinate:
|
|
|
718
716
|
|
|
719
717
|
Parameters
|
|
720
718
|
----------
|
|
721
|
-
simple : `dict` of [`str`, `Any`]
|
|
719
|
+
simple : `dict` of [`str`, `typing.Any`]
|
|
722
720
|
The `dict` returned by `to_simple()`.
|
|
723
721
|
universe : `DimensionUniverse`
|
|
724
722
|
Object that manages all known dimensions.
|
|
@@ -27,7 +27,13 @@
|
|
|
27
27
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
|
-
__all__ = (
|
|
30
|
+
__all__ = (
|
|
31
|
+
"DataIdKey",
|
|
32
|
+
"DataIdValue",
|
|
33
|
+
"DimensionRecord",
|
|
34
|
+
"SerializedDimensionRecord",
|
|
35
|
+
"SerializedKeyValueDimensionRecord",
|
|
36
|
+
)
|
|
31
37
|
|
|
32
38
|
import itertools
|
|
33
39
|
from collections.abc import Callable, Hashable
|
|
@@ -451,8 +457,8 @@ class DimensionRecord:
|
|
|
451
457
|
registry : `lsst.daf.butler.Registry`, optional
|
|
452
458
|
Registry from which a universe can be extracted. Can be `None`
|
|
453
459
|
if universe is provided explicitly.
|
|
454
|
-
cacheKey : `Hashable` or `None`
|
|
455
|
-
If this is not None
|
|
460
|
+
cacheKey : `collections.abc.Hashable` or `None`
|
|
461
|
+
If this is not `None`, it will be used as a key for any cached
|
|
456
462
|
reconstruction instead of calculating a value from the serialized
|
|
457
463
|
format.
|
|
458
464
|
|
|
@@ -884,6 +884,8 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
884
884
|
if isinstance(datasetRefOrType, DatasetRef):
|
|
885
885
|
if collections is not None:
|
|
886
886
|
warnings.warn("Collections should not be specified with DatasetRef", stacklevel=3)
|
|
887
|
+
if predict and not datasetRefOrType.dataId.hasRecords():
|
|
888
|
+
return datasetRefOrType.expanded(self.registry.expandDataId(datasetRefOrType.dataId))
|
|
887
889
|
# May need to retrieve datastore records if requested.
|
|
888
890
|
if datastore_records and datasetRefOrType._datastore_records is None:
|
|
889
891
|
datasetRefOrType = self._registry.get_datastore_records(datasetRefOrType)
|
|
@@ -936,6 +938,7 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
936
938
|
run = self.run
|
|
937
939
|
if run is None:
|
|
938
940
|
raise TypeError("Cannot predict dataset ID/location with run=None.")
|
|
941
|
+
dataId = self.registry.expandDataId(dataId)
|
|
939
942
|
return DatasetRef(datasetType, dataId, run=run)
|
|
940
943
|
else:
|
|
941
944
|
if collections is None:
|
|
@@ -1655,29 +1658,9 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
1655
1658
|
*,
|
|
1656
1659
|
transfer_dimensions: bool = False,
|
|
1657
1660
|
dry_run: bool = False,
|
|
1661
|
+
skip_existing: bool = False,
|
|
1658
1662
|
) -> None:
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
The Zip file must have been created by `retrieve_artifacts_zip`.
|
|
1662
|
-
|
|
1663
|
-
Parameters
|
|
1664
|
-
----------
|
|
1665
|
-
zip_file : `lsst.resources.ResourcePathExpression`
|
|
1666
|
-
Path to the Zip file.
|
|
1667
|
-
transfer : `str`, optional
|
|
1668
|
-
Method to use to transfer the Zip into the datastore.
|
|
1669
|
-
transfer_dimensions : `bool`, optional
|
|
1670
|
-
If `True`, dimension record data associated with the new datasets
|
|
1671
|
-
will be transferred from the Zip, if present.
|
|
1672
|
-
dry_run : `bool`, optional
|
|
1673
|
-
If `True` the ingest will be processed without any modifications
|
|
1674
|
-
made to the target butler and as if the target butler did not
|
|
1675
|
-
have any of the datasets.
|
|
1676
|
-
|
|
1677
|
-
Notes
|
|
1678
|
-
-----
|
|
1679
|
-
Run collections and dataset types are created as needed.
|
|
1680
|
-
"""
|
|
1663
|
+
# Docstring inherited.
|
|
1681
1664
|
if not self.isWriteable():
|
|
1682
1665
|
raise TypeError("Butler is read-only.")
|
|
1683
1666
|
|
|
@@ -1703,6 +1686,29 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
1703
1686
|
datasets.append(dataset)
|
|
1704
1687
|
processed_ids.update(unprocessed)
|
|
1705
1688
|
|
|
1689
|
+
new_datasets, existing_datasets = self._partition_datasets_by_known(datasets)
|
|
1690
|
+
if existing_datasets:
|
|
1691
|
+
if skip_existing:
|
|
1692
|
+
_LOG.info(
|
|
1693
|
+
"Skipping %d datasets from zip file %s which already exist in the repository.",
|
|
1694
|
+
len(existing_datasets),
|
|
1695
|
+
zip_file,
|
|
1696
|
+
)
|
|
1697
|
+
else:
|
|
1698
|
+
raise ConflictingDefinitionError(
|
|
1699
|
+
f"Datastore already contains {len(existing_datasets)} of the given datasets."
|
|
1700
|
+
f" Example: {existing_datasets[0]}"
|
|
1701
|
+
)
|
|
1702
|
+
if new_datasets:
|
|
1703
|
+
# Can not yet support partial zip ingests where a zip contains
|
|
1704
|
+
# some datasets that are already in another zip.
|
|
1705
|
+
raise ValueError(
|
|
1706
|
+
f"The given zip file from {zip_file} contains {len(new_datasets)} datasets not known "
|
|
1707
|
+
f"to this butler but also contains {len(existing_datasets)} datasets already known to "
|
|
1708
|
+
"this butler. Currently butler can not ingest zip files with overlapping content."
|
|
1709
|
+
)
|
|
1710
|
+
return
|
|
1711
|
+
|
|
1706
1712
|
# Ingest doesn't create the RUN collections so we have to do that
|
|
1707
1713
|
# here.
|
|
1708
1714
|
#
|
|
@@ -1721,7 +1727,18 @@ class DirectButler(Butler): # numpydoc ignore=PR02
|
|
|
1721
1727
|
datasets, progress, dry_run=dry_run, transfer_dimensions=transfer_dimensions
|
|
1722
1728
|
)
|
|
1723
1729
|
|
|
1724
|
-
|
|
1730
|
+
# Calculate some statistics based on the given list of datasets.
|
|
1731
|
+
n_datasets = 0
|
|
1732
|
+
for d in datasets:
|
|
1733
|
+
n_datasets += len(d.refs)
|
|
1734
|
+
srefs = "s" if n_datasets != 1 else ""
|
|
1735
|
+
|
|
1736
|
+
with (
|
|
1737
|
+
self._metrics.instrument_ingest(
|
|
1738
|
+
n_datasets, _LOG, msg=f"Ingesting zip file {zip_file} with {n_datasets} dataset{srefs}"
|
|
1739
|
+
),
|
|
1740
|
+
self.transaction(),
|
|
1741
|
+
):
|
|
1725
1742
|
# Do not need expanded dataset refs so can ignore the return value.
|
|
1726
1743
|
self._ingest_file_datasets(datasets, import_info, progress, dry_run=dry_run)
|
|
1727
1744
|
|
|
@@ -610,15 +610,15 @@ class DirectQueryDriver(QueryDriver):
|
|
|
610
610
|
----------
|
|
611
611
|
tree : `.queries.tree.QueryTree`
|
|
612
612
|
Description of the joins and row filters in the query.
|
|
613
|
+
allow_duplicate_overlaps : `bool`, optional
|
|
614
|
+
If set to `True` then query will be allowed to generate
|
|
615
|
+
non-distinct rows for spatial overlaps.
|
|
613
616
|
|
|
614
617
|
Returns
|
|
615
618
|
-------
|
|
616
619
|
tree_analysis : `QueryTreeAnalysis`
|
|
617
620
|
Struct containing additional information need to build the joins
|
|
618
621
|
stage of a query.
|
|
619
|
-
allow_duplicate_overlaps : `bool`, optional
|
|
620
|
-
If set to `True` then query will be allowed to generate
|
|
621
|
-
non-distinct rows for spatial overlaps.
|
|
622
622
|
|
|
623
623
|
Notes
|
|
624
624
|
-----
|
|
@@ -1313,7 +1313,8 @@ class DirectQueryDriver(QueryDriver):
|
|
|
1313
1313
|
Mapping of collection names to collection records, must contain
|
|
1314
1314
|
records for all collections in ``collection_names`` and all their
|
|
1315
1315
|
children collections.
|
|
1316
|
-
summaries : `~collections.abc.Mapping` [`Any`,
|
|
1316
|
+
summaries : `~collections.abc.Mapping` [`typing.Any`, \
|
|
1317
|
+
`CollectionSummary`]
|
|
1317
1318
|
Mapping of collection IDs to collection summaries, must contain
|
|
1318
1319
|
summaries for all non-chained collections in the collection tree.
|
|
1319
1320
|
|