lsst-daf-butler 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/daf/butler/_butler.py +27 -8
- lsst/daf/butler/_butler_collections.py +4 -4
- lsst/daf/butler/_butler_metrics.py +51 -2
- lsst/daf/butler/_dataset_provenance.py +1 -1
- lsst/daf/butler/_dataset_ref.py +1 -1
- lsst/daf/butler/_exceptions.py +2 -2
- lsst/daf/butler/_file_dataset.py +2 -1
- lsst/daf/butler/_formatter.py +14 -7
- lsst/daf/butler/_labeled_butler_factory.py +28 -8
- lsst/daf/butler/_query_all_datasets.py +2 -0
- lsst/daf/butler/_rubin/temporary_for_ingest.py +207 -0
- lsst/daf/butler/cli/cmd/_remove_runs.py +1 -12
- lsst/daf/butler/column_spec.py +4 -4
- lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
- lsst/daf/butler/configs/storageClasses.yaml +15 -0
- lsst/daf/butler/datastore/_datastore.py +21 -1
- lsst/daf/butler/datastore/record_data.py +1 -1
- lsst/daf/butler/datastore/stored_file_info.py +2 -2
- lsst/daf/butler/datastores/chainedDatastore.py +4 -0
- lsst/daf/butler/datastores/fileDatastore.py +26 -13
- lsst/daf/butler/datastores/file_datastore/get.py +4 -4
- lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py +5 -1
- lsst/daf/butler/datastores/file_datastore/transfer.py +2 -2
- lsst/daf/butler/datastores/inMemoryDatastore.py +8 -0
- lsst/daf/butler/ddl.py +2 -2
- lsst/daf/butler/dimensions/_coordinate.py +11 -8
- lsst/daf/butler/dimensions/_record_set.py +1 -1
- lsst/daf/butler/dimensions/_records.py +9 -3
- lsst/daf/butler/direct_butler/_direct_butler.py +85 -51
- lsst/daf/butler/direct_query_driver/_driver.py +5 -4
- lsst/daf/butler/direct_query_driver/_result_page_converter.py +1 -1
- lsst/daf/butler/formatters/parquet.py +6 -6
- lsst/daf/butler/logging.py +9 -3
- lsst/daf/butler/nonempty_mapping.py +1 -1
- lsst/daf/butler/persistence_context.py +8 -5
- lsst/daf/butler/queries/_general_query_results.py +1 -1
- lsst/daf/butler/queries/driver.py +1 -1
- lsst/daf/butler/queries/expression_factory.py +2 -2
- lsst/daf/butler/queries/expressions/parser/exprTree.py +1 -1
- lsst/daf/butler/queries/expressions/parser/parserYacc.py +1 -1
- lsst/daf/butler/queries/overlaps.py +2 -2
- lsst/daf/butler/queries/tree/_column_set.py +1 -1
- lsst/daf/butler/registry/_collection_record_cache.py +1 -1
- lsst/daf/butler/registry/_collection_summary_cache.py +5 -4
- lsst/daf/butler/registry/_registry.py +4 -0
- lsst/daf/butler/registry/bridge/monolithic.py +17 -13
- lsst/daf/butler/registry/databases/postgresql.py +2 -1
- lsst/daf/butler/registry/datasets/byDimensions/_dataset_type_cache.py +1 -1
- lsst/daf/butler/registry/datasets/byDimensions/_manager.py +53 -47
- lsst/daf/butler/registry/datasets/byDimensions/summaries.py +3 -2
- lsst/daf/butler/registry/expand_data_ids.py +93 -0
- lsst/daf/butler/registry/interfaces/_database.py +6 -1
- lsst/daf/butler/registry/interfaces/_datasets.py +2 -1
- lsst/daf/butler/registry/interfaces/_obscore.py +1 -1
- lsst/daf/butler/registry/obscore/_records.py +1 -1
- lsst/daf/butler/registry/obscore/_spatial.py +2 -2
- lsst/daf/butler/registry/queries/_results.py +2 -2
- lsst/daf/butler/registry/sql_registry.py +3 -25
- lsst/daf/butler/registry/wildcards.py +5 -5
- lsst/daf/butler/remote_butler/_get.py +1 -1
- lsst/daf/butler/remote_butler/_remote_butler.py +6 -1
- lsst/daf/butler/remote_butler/_remote_file_transfer_source.py +4 -0
- lsst/daf/butler/remote_butler/authentication/cadc.py +4 -3
- lsst/daf/butler/script/_pruneDatasets.py +4 -2
- lsst/daf/butler/script/configValidate.py +2 -2
- lsst/daf/butler/script/queryCollections.py +2 -2
- lsst/daf/butler/script/removeCollections.py +2 -0
- lsst/daf/butler/script/removeRuns.py +2 -0
- lsst/daf/butler/tests/cliCmdTestBase.py +2 -0
- lsst/daf/butler/tests/cliLogTestBase.py +2 -0
- lsst/daf/butler/tests/hybrid_butler.py +10 -2
- lsst/daf/butler/tests/registry_data/lsstcam-subset.yaml +191 -0
- lsst/daf/butler/tests/registry_data/spatial.py +4 -2
- lsst/daf/butler/tests/testFormatters.py +2 -2
- lsst/daf/butler/tests/utils.py +1 -1
- lsst/daf/butler/timespan_database_representation.py +3 -3
- lsst/daf/butler/transfers/_context.py +7 -6
- lsst/daf/butler/version.py +1 -1
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/METADATA +3 -2
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/RECORD +88 -85
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/WHEEL +1 -1
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/entry_points.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/LICENSE +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/top_level.txt +0 -0
- {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1.dist-info}/zip-safe +0 -0
lsst/daf/butler/_butler.py
CHANGED
|
@@ -138,7 +138,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
138
138
|
without_datastore : `bool`, optional
|
|
139
139
|
If `True` do not attach a datastore to this butler. Any attempts
|
|
140
140
|
to use a datastore will fail.
|
|
141
|
-
|
|
141
|
+
metrics : `ButlerMetrics` or `None`
|
|
142
|
+
External metrics object to be used for tracking butler usage. If `None`
|
|
143
|
+
a new metrics object is created.
|
|
144
|
+
**kwargs : `typing.Any`
|
|
142
145
|
Additional keyword arguments passed to a constructor of actual butler
|
|
143
146
|
class.
|
|
144
147
|
|
|
@@ -240,7 +243,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
240
243
|
to use a datastore will fail.
|
|
241
244
|
metrics : `ButlerMetrics` or `None`, optional
|
|
242
245
|
Metrics object to record butler usage statistics.
|
|
243
|
-
**kwargs : `Any`
|
|
246
|
+
**kwargs : `typing.Any`
|
|
244
247
|
Default data ID key-value pairs. These may only identify
|
|
245
248
|
"governor" dimensions like ``instrument`` and ``skymap``.
|
|
246
249
|
|
|
@@ -1390,6 +1393,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1390
1393
|
raised if any datasets with the same dataset ID already exist
|
|
1391
1394
|
in the datastore.
|
|
1392
1395
|
|
|
1396
|
+
Returns
|
|
1397
|
+
-------
|
|
1398
|
+
None
|
|
1399
|
+
|
|
1393
1400
|
Raises
|
|
1394
1401
|
------
|
|
1395
1402
|
TypeError
|
|
@@ -1429,6 +1436,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1429
1436
|
*,
|
|
1430
1437
|
transfer_dimensions: bool = False,
|
|
1431
1438
|
dry_run: bool = False,
|
|
1439
|
+
skip_existing: bool = False,
|
|
1432
1440
|
) -> None:
|
|
1433
1441
|
"""Ingest a Zip file into this butler.
|
|
1434
1442
|
|
|
@@ -1447,6 +1455,14 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1447
1455
|
If `True` the ingest will be processed without any modifications
|
|
1448
1456
|
made to the target butler and as if the target butler did not
|
|
1449
1457
|
have any of the datasets.
|
|
1458
|
+
skip_existing : `bool`, optional
|
|
1459
|
+
If `True`, a zip will not be ingested if the dataset entries listed
|
|
1460
|
+
in the index with the same dataset ID already exists in the butler.
|
|
1461
|
+
If `False` (the default), a `ConflictingDefinitionError` will be
|
|
1462
|
+
raised if any datasets with the same dataset ID already exist
|
|
1463
|
+
in the repository. If, somehow, some datasets are known to the
|
|
1464
|
+
butler and some are not, this is currently treated as an error
|
|
1465
|
+
rather than attempting to do a partial ingest.
|
|
1450
1466
|
|
|
1451
1467
|
Notes
|
|
1452
1468
|
-----
|
|
@@ -1566,7 +1582,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1566
1582
|
|
|
1567
1583
|
@abstractmethod
|
|
1568
1584
|
def transfer_dimension_records_from(
|
|
1569
|
-
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
|
|
1585
|
+
self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate]
|
|
1570
1586
|
) -> None:
|
|
1571
1587
|
"""Transfer dimension records to this Butler from another Butler.
|
|
1572
1588
|
|
|
@@ -1578,10 +1594,9 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
1578
1594
|
`Butler` whose registry will be used to expand data IDs. If the
|
|
1579
1595
|
source refs contain coordinates that are used to populate other
|
|
1580
1596
|
records then this will also need to be a full `Butler`.
|
|
1581
|
-
source_refs : iterable of `DatasetRef`
|
|
1582
|
-
Datasets defined in the source butler whose dimension
|
|
1583
|
-
should be transferred to this butler.
|
|
1584
|
-
transfer is faster if the dataset refs are expanded.
|
|
1597
|
+
source_refs : iterable of `DatasetRef` or `DataCoordinate`
|
|
1598
|
+
Datasets or data IDs defined in the source butler whose dimension
|
|
1599
|
+
records should be transferred to this butler.
|
|
1585
1600
|
"""
|
|
1586
1601
|
raise NotImplementedError()
|
|
1587
1602
|
|
|
@@ -2025,7 +2040,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
2025
2040
|
|
|
2026
2041
|
Returns
|
|
2027
2042
|
-------
|
|
2028
|
-
records : `list`[`DimensionRecord`]
|
|
2043
|
+
records : `list` [`DimensionRecord`]
|
|
2029
2044
|
Dimension records matching the given query parameters.
|
|
2030
2045
|
|
|
2031
2046
|
Raises
|
|
@@ -2227,3 +2242,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
|
|
|
2227
2242
|
@abstractmethod
|
|
2228
2243
|
def close(self) -> None:
|
|
2229
2244
|
raise NotImplementedError()
|
|
2245
|
+
|
|
2246
|
+
@abstractmethod
|
|
2247
|
+
def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
|
|
2248
|
+
raise NotImplementedError()
|
|
@@ -360,10 +360,10 @@ class ButlerCollections(ABC, Sequence):
|
|
|
360
360
|
name : `str`
|
|
361
361
|
The name of the collection of interest.
|
|
362
362
|
include_parents : `bool`, optional
|
|
363
|
-
|
|
363
|
+
If `True` any parents of this collection will be included.
|
|
364
364
|
include_summary : `bool`, optional
|
|
365
|
-
|
|
366
|
-
|
|
365
|
+
If `True` dataset type names and governor dimensions of datasets
|
|
366
|
+
stored in this collection will be included in the result.
|
|
367
367
|
|
|
368
368
|
Returns
|
|
369
369
|
-------
|
|
@@ -464,7 +464,7 @@ class ButlerCollections(ABC, Sequence):
|
|
|
464
464
|
|
|
465
465
|
Returns
|
|
466
466
|
-------
|
|
467
|
-
filtered : `~collections.abc.Mapping` [`str`, `list`[`str`]]
|
|
467
|
+
filtered : `~collections.abc.Mapping` [`str`, `list` [`str`]]
|
|
468
468
|
Mapping of the dataset type name to its corresponding list of
|
|
469
469
|
collection names.
|
|
470
470
|
"""
|
|
@@ -27,14 +27,19 @@
|
|
|
27
27
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
|
+
__all__ = ["ButlerMetrics"]
|
|
31
|
+
|
|
30
32
|
from collections.abc import Callable, Iterator
|
|
31
33
|
from contextlib import contextmanager
|
|
34
|
+
from typing import Concatenate, ParamSpec
|
|
32
35
|
|
|
33
36
|
from pydantic import BaseModel
|
|
34
37
|
|
|
35
38
|
from lsst.utils.logging import LsstLoggers
|
|
36
39
|
from lsst.utils.timer import time_this
|
|
37
40
|
|
|
41
|
+
P = ParamSpec("P")
|
|
42
|
+
|
|
38
43
|
|
|
39
44
|
class ButlerMetrics(BaseModel):
|
|
40
45
|
"""Metrics collected during Butler operations."""
|
|
@@ -45,18 +50,26 @@ class ButlerMetrics(BaseModel):
|
|
|
45
50
|
time_in_get: float = 0.0
|
|
46
51
|
"""Wall-clock time, in seconds, spent in get()."""
|
|
47
52
|
|
|
53
|
+
time_in_ingest: float = 0.0
|
|
54
|
+
"""Wall-clock time, in seconds, spent in ingest()."""
|
|
55
|
+
|
|
48
56
|
n_get: int = 0
|
|
49
57
|
"""Number of datasets retrieved with get()."""
|
|
50
58
|
|
|
51
59
|
n_put: int = 0
|
|
52
60
|
"""Number of datasets stored with put()."""
|
|
53
61
|
|
|
62
|
+
n_ingest: int = 0
|
|
63
|
+
"""Number of datasets ingested."""
|
|
64
|
+
|
|
54
65
|
def reset(self) -> None:
|
|
55
66
|
"""Reset all metrics."""
|
|
56
67
|
self.time_in_put = 0.0
|
|
57
68
|
self.time_in_get = 0.0
|
|
69
|
+
self.time_in_ingest = 0.0
|
|
58
70
|
self.n_get = 0
|
|
59
71
|
self.n_put = 0
|
|
72
|
+
self.n_ingest = 0
|
|
60
73
|
|
|
61
74
|
def increment_get(self, duration: float) -> None:
|
|
62
75
|
"""Increment time for get().
|
|
@@ -80,13 +93,31 @@ class ButlerMetrics(BaseModel):
|
|
|
80
93
|
self.time_in_put += duration
|
|
81
94
|
self.n_put += 1
|
|
82
95
|
|
|
96
|
+
def increment_ingest(self, duration: float, n_datasets: int) -> None:
|
|
97
|
+
"""Increment time and datasets for ingest().
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
duration : `float`
|
|
102
|
+
Duration to add to the ingest() statistics.
|
|
103
|
+
n_datasets : `int`
|
|
104
|
+
Number of datasets to be ingested for this call.
|
|
105
|
+
"""
|
|
106
|
+
self.time_in_ingest += duration
|
|
107
|
+
self.n_ingest += n_datasets
|
|
108
|
+
|
|
83
109
|
@contextmanager
|
|
84
110
|
def _timer(
|
|
85
|
-
self,
|
|
111
|
+
self,
|
|
112
|
+
handler: Callable[Concatenate[float, P], None],
|
|
113
|
+
log: LsstLoggers | None = None,
|
|
114
|
+
msg: str | None = None,
|
|
115
|
+
*args: P.args,
|
|
116
|
+
**kwargs: P.kwargs,
|
|
86
117
|
) -> Iterator[None]:
|
|
87
118
|
with time_this(log=log, msg=msg) as timer:
|
|
88
119
|
yield
|
|
89
|
-
handler(timer.duration)
|
|
120
|
+
handler(timer.duration, *args, **kwargs)
|
|
90
121
|
|
|
91
122
|
@contextmanager
|
|
92
123
|
def instrument_get(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
|
|
@@ -115,3 +146,21 @@ class ButlerMetrics(BaseModel):
|
|
|
115
146
|
"""
|
|
116
147
|
with self._timer(self.increment_put, log=log, msg=msg):
|
|
117
148
|
yield
|
|
149
|
+
|
|
150
|
+
@contextmanager
|
|
151
|
+
def instrument_ingest(
|
|
152
|
+
self, n_datasets: int, log: LsstLoggers | None = None, msg: str | None = None
|
|
153
|
+
) -> Iterator[None]:
|
|
154
|
+
"""Run code and increment ingest statistics.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
n_datasets : `int`
|
|
159
|
+
Number of datasets being ingested.
|
|
160
|
+
log : `logging.Logger` or `None`
|
|
161
|
+
Logger to use for any timing information.
|
|
162
|
+
msg : `str` or `None`
|
|
163
|
+
Any message to be included in log output.
|
|
164
|
+
"""
|
|
165
|
+
with self._timer(self.increment_ingest, n_datasets=n_datasets, log=log, msg=msg):
|
|
166
|
+
yield
|
|
@@ -267,7 +267,7 @@ class DatasetProvenance(pydantic.BaseModel):
|
|
|
267
267
|
use_upper : `bool` or `None`
|
|
268
268
|
If `True` use upper case for provenance keys, if `False` use lower
|
|
269
269
|
case, if `None` match the case of the prefix.
|
|
270
|
-
keys : `tuple` of `str` | `int`
|
|
270
|
+
*keys : `tuple` of `str` | `int`
|
|
271
271
|
Components of key to combine with prefix and separator.
|
|
272
272
|
|
|
273
273
|
Returns
|
lsst/daf/butler/_dataset_ref.py
CHANGED
|
@@ -479,7 +479,7 @@ class DatasetRef:
|
|
|
479
479
|
|
|
480
480
|
Parameters
|
|
481
481
|
----------
|
|
482
|
-
simple : `dict` of [`str`, `Any`]
|
|
482
|
+
simple : `dict` of [`str`, `typing.Any`]
|
|
483
483
|
The value returned by `to_simple()`.
|
|
484
484
|
universe : `DimensionUniverse`
|
|
485
485
|
The special graph of all known dimensions.
|
lsst/daf/butler/_exceptions.py
CHANGED
|
@@ -196,8 +196,8 @@ class ValidationError(RuntimeError):
|
|
|
196
196
|
|
|
197
197
|
|
|
198
198
|
class EmptyQueryResultError(Exception):
|
|
199
|
-
"""Exception raised when query methods return an empty result and
|
|
200
|
-
flag is set.
|
|
199
|
+
"""Exception raised when query methods return an empty result and
|
|
200
|
+
``explain`` flag is set.
|
|
201
201
|
|
|
202
202
|
Parameters
|
|
203
203
|
----------
|
lsst/daf/butler/_file_dataset.py
CHANGED
|
@@ -129,7 +129,8 @@ class FileDataset:
|
|
|
129
129
|
----------
|
|
130
130
|
dataset : `SerializedFileDataset`
|
|
131
131
|
Object to deserialize.
|
|
132
|
-
dataset_type_loader :
|
|
132
|
+
dataset_type_loader : `~collections.abc.Callable` \
|
|
133
|
+
[[ `str` ], `DatasetType` ]
|
|
133
134
|
Function that takes a string dataset type name as its
|
|
134
135
|
only parameter, and returns an instance of `DatasetType`.
|
|
135
136
|
Used to deserialize the `DatasetRef` instances contained
|
lsst/daf/butler/_formatter.py
CHANGED
|
@@ -54,6 +54,7 @@ from ._config import Config
|
|
|
54
54
|
from ._config_support import LookupKey, processLookupConfigs
|
|
55
55
|
from ._file_descriptor import FileDescriptor
|
|
56
56
|
from ._location import Location
|
|
57
|
+
from ._rubin.temporary_for_ingest import TemporaryForIngest
|
|
57
58
|
from .dimensions import DataCoordinate, DimensionUniverse
|
|
58
59
|
from .mapping_factory import MappingFactory
|
|
59
60
|
|
|
@@ -909,6 +910,10 @@ class FormatterV2:
|
|
|
909
910
|
provenance : `DatasetProvenance` | `None`, optional
|
|
910
911
|
Provenance to attach to the file being written.
|
|
911
912
|
|
|
913
|
+
Returns
|
|
914
|
+
-------
|
|
915
|
+
None
|
|
916
|
+
|
|
912
917
|
Raises
|
|
913
918
|
------
|
|
914
919
|
FormatterNotImplementedError
|
|
@@ -1031,13 +1036,7 @@ class FormatterV2:
|
|
|
1031
1036
|
"""
|
|
1032
1037
|
cache_manager = self._ensure_cache(cache_manager)
|
|
1033
1038
|
|
|
1034
|
-
|
|
1035
|
-
# using a local file system -- that gives us atomic writes.
|
|
1036
|
-
# If a process is killed as the file is being written we do not
|
|
1037
|
-
# want it to remain in the correct place but in corrupt state.
|
|
1038
|
-
# For local files write to the output directory not temporary dir.
|
|
1039
|
-
prefix = uri.dirname() if uri.isLocal else None
|
|
1040
|
-
with ResourcePath.temporary_uri(suffix=uri.getExtension(), prefix=prefix) as temporary_uri:
|
|
1039
|
+
with TemporaryForIngest.make_path(uri) as temporary_uri:
|
|
1041
1040
|
# Need to configure the formatter to write to a different
|
|
1042
1041
|
# location and that needs us to overwrite internals
|
|
1043
1042
|
log.debug("Writing dataset to temporary location at %s", temporary_uri)
|
|
@@ -1142,6 +1141,10 @@ class FormatterV2:
|
|
|
1142
1141
|
location : `Location`
|
|
1143
1142
|
Location from which to extract a file extension.
|
|
1144
1143
|
|
|
1144
|
+
Returns
|
|
1145
|
+
-------
|
|
1146
|
+
None
|
|
1147
|
+
|
|
1145
1148
|
Raises
|
|
1146
1149
|
------
|
|
1147
1150
|
ValueError
|
|
@@ -1588,6 +1591,10 @@ class Formatter(metaclass=ABCMeta):
|
|
|
1588
1591
|
location : `Location`
|
|
1589
1592
|
Location from which to extract a file extension.
|
|
1590
1593
|
|
|
1594
|
+
Returns
|
|
1595
|
+
-------
|
|
1596
|
+
None
|
|
1597
|
+
|
|
1591
1598
|
Raises
|
|
1592
1599
|
------
|
|
1593
1600
|
NotImplementedError
|
|
@@ -30,7 +30,9 @@ from __future__ import annotations
|
|
|
30
30
|
__all__ = ("LabeledButlerFactory", "LabeledButlerFactoryProtocol")
|
|
31
31
|
|
|
32
32
|
from collections.abc import Mapping
|
|
33
|
-
from
|
|
33
|
+
from contextlib import AbstractContextManager
|
|
34
|
+
from logging import getLogger
|
|
35
|
+
from typing import Any, Literal, Protocol, Self
|
|
34
36
|
|
|
35
37
|
from lsst.resources import ResourcePathExpression
|
|
36
38
|
|
|
@@ -40,6 +42,8 @@ from ._butler_repo_index import ButlerRepoIndex
|
|
|
40
42
|
from ._utilities.named_locks import NamedLocks
|
|
41
43
|
from ._utilities.thread_safe_cache import ThreadSafeCache
|
|
42
44
|
|
|
45
|
+
_LOG = getLogger(__name__)
|
|
46
|
+
|
|
43
47
|
|
|
44
48
|
class LabeledButlerFactoryProtocol(Protocol):
|
|
45
49
|
"""Callable to retrieve a butler from a label."""
|
|
@@ -47,7 +51,7 @@ class LabeledButlerFactoryProtocol(Protocol):
|
|
|
47
51
|
def __call__(self, label: str) -> Butler: ...
|
|
48
52
|
|
|
49
53
|
|
|
50
|
-
class LabeledButlerFactory:
|
|
54
|
+
class LabeledButlerFactory(AbstractContextManager):
|
|
51
55
|
"""Factory for efficiently instantiating Butler instances from the
|
|
52
56
|
repository index file. This is intended for use from long-lived services
|
|
53
57
|
that want to instantiate a separate Butler instance for each end user
|
|
@@ -60,6 +64,9 @@ class LabeledButlerFactory:
|
|
|
60
64
|
files. If not provided, defaults to the global repository index
|
|
61
65
|
configured by the ``DAF_BUTLER_REPOSITORY_INDEX`` environment variable
|
|
62
66
|
-- see `ButlerRepoIndex`.
|
|
67
|
+
writeable : `bool`, optional
|
|
68
|
+
If `True`, Butler instances created by this factory will be writeable.
|
|
69
|
+
If `False` (the default), instances will be read-only.
|
|
63
70
|
|
|
64
71
|
Notes
|
|
65
72
|
-----
|
|
@@ -76,11 +83,12 @@ class LabeledButlerFactory:
|
|
|
76
83
|
safely be used by separate threads.
|
|
77
84
|
"""
|
|
78
85
|
|
|
79
|
-
def __init__(self, repositories: Mapping[str, str] | None = None) -> None:
|
|
86
|
+
def __init__(self, repositories: Mapping[str, str] | None = None, writeable: bool = False) -> None:
|
|
80
87
|
if repositories is None:
|
|
81
88
|
self._repositories = None
|
|
82
89
|
else:
|
|
83
90
|
self._repositories = dict(repositories)
|
|
91
|
+
self._writeable = writeable
|
|
84
92
|
|
|
85
93
|
self._factories = ThreadSafeCache[str, _ButlerFactory]()
|
|
86
94
|
self._initialization_locks = NamedLocks()
|
|
@@ -88,6 +96,16 @@ class LabeledButlerFactory:
|
|
|
88
96
|
# This may be overridden by unit tests.
|
|
89
97
|
self._preload_unsafe_direct_butler_caches = True
|
|
90
98
|
|
|
99
|
+
def __enter__(self) -> Self:
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
|
|
103
|
+
try:
|
|
104
|
+
self.close()
|
|
105
|
+
except Exception:
|
|
106
|
+
_LOG.exception("An exception occurred during LabeledButlerFactory.close()")
|
|
107
|
+
return False
|
|
108
|
+
|
|
91
109
|
def bind(self, access_token: str | None) -> LabeledButlerFactoryProtocol:
|
|
92
110
|
"""Create a callable factory function for generating Butler instances
|
|
93
111
|
with out needing to specify access tokans again.
|
|
@@ -109,7 +127,7 @@ class LabeledButlerFactory:
|
|
|
109
127
|
|
|
110
128
|
return create
|
|
111
129
|
|
|
112
|
-
def create_butler(self,
|
|
130
|
+
def create_butler(self, label: str, *, access_token: str | None = None) -> Butler:
|
|
113
131
|
"""Create a Butler instance.
|
|
114
132
|
|
|
115
133
|
Parameters
|
|
@@ -118,7 +136,7 @@ class LabeledButlerFactory:
|
|
|
118
136
|
Label of the repository to instantiate, from the ``repositories``
|
|
119
137
|
parameter to the `LabeledButlerFactory` constructor or the global
|
|
120
138
|
repository index file.
|
|
121
|
-
access_token : `str` | `None
|
|
139
|
+
access_token : `str` | `None`, optional
|
|
122
140
|
Gafaelfawr access token used to authenticate to a Butler server.
|
|
123
141
|
This is required for any repositories configured to use
|
|
124
142
|
`RemoteButler`. If you only use `DirectButler`, this may be
|
|
@@ -167,7 +185,9 @@ class LabeledButlerFactory:
|
|
|
167
185
|
|
|
168
186
|
match butler_type:
|
|
169
187
|
case ButlerType.DIRECT:
|
|
170
|
-
return _DirectButlerFactory(
|
|
188
|
+
return _DirectButlerFactory(
|
|
189
|
+
config, self._preload_unsafe_direct_butler_caches, self._writeable
|
|
190
|
+
)
|
|
171
191
|
case ButlerType.REMOTE:
|
|
172
192
|
return _RemoteButlerFactory(config)
|
|
173
193
|
case _:
|
|
@@ -189,12 +209,12 @@ class _ButlerFactory(Protocol):
|
|
|
189
209
|
|
|
190
210
|
|
|
191
211
|
class _DirectButlerFactory(_ButlerFactory):
|
|
192
|
-
def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool) -> None:
|
|
212
|
+
def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool, writeable: bool) -> None:
|
|
193
213
|
import lsst.daf.butler.direct_butler
|
|
194
214
|
|
|
195
215
|
# Create a 'template' Butler that will be cloned when callers request
|
|
196
216
|
# an instance.
|
|
197
|
-
self._butler = Butler.from_config(config)
|
|
217
|
+
self._butler = Butler.from_config(config, writeable=writeable)
|
|
198
218
|
assert isinstance(self._butler, lsst.daf.butler.direct_butler.DirectButler)
|
|
199
219
|
|
|
200
220
|
# Load caches so that data is available in cloned instances without
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# This file is part of daf_butler.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("TemporaryForIngest",)
|
|
31
|
+
|
|
32
|
+
import dataclasses
|
|
33
|
+
import glob
|
|
34
|
+
from contextlib import contextmanager
|
|
35
|
+
from typing import TYPE_CHECKING, Self, cast
|
|
36
|
+
|
|
37
|
+
from lsst.resources import ResourcePath
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from collections.abc import Iterator
|
|
41
|
+
from types import TracebackType
|
|
42
|
+
|
|
43
|
+
from .._butler import Butler
|
|
44
|
+
from .._dataset_ref import DatasetRef
|
|
45
|
+
from .._file_dataset import FileDataset
|
|
46
|
+
from .._limited_butler import LimitedButler
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclasses.dataclass
|
|
50
|
+
class TemporaryForIngest:
|
|
51
|
+
"""A context manager for generating temporary paths that will be ingested
|
|
52
|
+
as butler datasets.
|
|
53
|
+
|
|
54
|
+
Notes
|
|
55
|
+
-----
|
|
56
|
+
Neither this class nor its `make_path` method run ingest automatically when
|
|
57
|
+
their context manager is exited; the `ingest` method must always be called
|
|
58
|
+
explicitly.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
butler: Butler
|
|
62
|
+
"""Full butler to obtain a predicted path from and ingest into."""
|
|
63
|
+
|
|
64
|
+
ref: DatasetRef
|
|
65
|
+
"""Description of the dataset to ingest."""
|
|
66
|
+
|
|
67
|
+
dataset: FileDataset = dataclasses.field(init=False)
|
|
68
|
+
"""The dataset that will be passed to `Butler.ingest`."""
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def path(self) -> ResourcePath:
|
|
72
|
+
"""The temporary path.
|
|
73
|
+
|
|
74
|
+
Guaranteed to be a local POSIX path.
|
|
75
|
+
"""
|
|
76
|
+
return cast(ResourcePath, self.dataset.path)
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def ospath(self) -> str:
|
|
80
|
+
"""The temporary path as a complete filename."""
|
|
81
|
+
return self.path.ospath
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
@contextmanager
|
|
85
|
+
def make_path(cls, final_path: ResourcePath) -> Iterator[ResourcePath]:
|
|
86
|
+
"""Return a temporary path context manager given the predicted final
|
|
87
|
+
path.
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
final_path : `lsst.resources.ResourcePath`
|
|
92
|
+
Predicted final path.
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
context : `contextlib.AbstractContextManager`
|
|
97
|
+
A context manager that yields the temporary
|
|
98
|
+
`~lsst.resources.ResourcePath` when entered and deletes that file
|
|
99
|
+
when exited.
|
|
100
|
+
"""
|
|
101
|
+
# Always write to a temporary even if using a local file system -- that
|
|
102
|
+
# gives us atomic writes. If a process is killed as the file is being
|
|
103
|
+
# written we do not want it to remain in the correct place but in
|
|
104
|
+
# corrupt state. For local files write to the output directory not
|
|
105
|
+
# temporary dir.
|
|
106
|
+
prefix = final_path.dirname() if final_path.isLocal else None
|
|
107
|
+
if prefix is not None:
|
|
108
|
+
prefix.mkdir()
|
|
109
|
+
with ResourcePath.temporary_uri(
|
|
110
|
+
suffix=cls._get_temporary_suffix(final_path), prefix=prefix
|
|
111
|
+
) as temporary_path:
|
|
112
|
+
yield temporary_path
|
|
113
|
+
|
|
114
|
+
def ingest(self, record_validation_info: bool = True) -> None:
|
|
115
|
+
"""Ingest the file into the butler.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
record_validation_info : `bool`, optional
|
|
120
|
+
Whether to- record the file size and checksum upon ingest.
|
|
121
|
+
"""
|
|
122
|
+
self.butler.ingest(self.dataset, transfer="move", record_validation_info=record_validation_info)
|
|
123
|
+
|
|
124
|
+
def __enter__(self) -> Self:
|
|
125
|
+
from .._file_dataset import FileDataset
|
|
126
|
+
|
|
127
|
+
final_path = self.butler.getURI(self.ref, predict=True).replace(fragment="")
|
|
128
|
+
prefix = final_path.dirname() if final_path.isLocal else None
|
|
129
|
+
if prefix is not None:
|
|
130
|
+
prefix.mkdir()
|
|
131
|
+
self._temporary_path_context = self.make_path(final_path)
|
|
132
|
+
temporary_path = self._temporary_path_context.__enter__()
|
|
133
|
+
self.dataset = FileDataset(temporary_path, [self.ref], formatter=None)
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
def __exit__(
|
|
137
|
+
self,
|
|
138
|
+
exc_type: type[BaseException] | None,
|
|
139
|
+
exc_value: BaseException | None,
|
|
140
|
+
traceback: TracebackType | None,
|
|
141
|
+
) -> bool | None:
|
|
142
|
+
return self._temporary_path_context.__exit__(exc_type, exc_value, traceback)
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def find_orphaned_temporaries_by_path(cls, final_path: ResourcePath) -> list[ResourcePath]:
|
|
146
|
+
"""Search for temporary files that were not successfully ingested.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
final_path : `lsst.resources.ResourcePath`
|
|
151
|
+
Final path a successfully-ingested file would have.
|
|
152
|
+
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
paths : `list` [ `lsst.resources.ResourcePath` ]
|
|
156
|
+
Files that look like temporaries that might have been created while
|
|
157
|
+
trying to write the target dataset.
|
|
158
|
+
|
|
159
|
+
Notes
|
|
160
|
+
-----
|
|
161
|
+
Orphaned files are only possible when a context manager is interrupted
|
|
162
|
+
by a hard error that prevents any cleanup code from running (e.g.
|
|
163
|
+
sudden loss of power).
|
|
164
|
+
"""
|
|
165
|
+
if not final_path.isLocal:
|
|
166
|
+
# We return true tempfile for non-local predicted paths, so orphans
|
|
167
|
+
# are not our problem (the OS etc. will take care of them).
|
|
168
|
+
return []
|
|
169
|
+
return [
|
|
170
|
+
ResourcePath(filename)
|
|
171
|
+
for filename in glob.glob(
|
|
172
|
+
f"{glob.escape(final_path.dirname().ospath)}*{glob.escape(cls._get_temporary_suffix(final_path))}"
|
|
173
|
+
)
|
|
174
|
+
if filename != final_path.ospath
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def find_orphaned_temporaries_by_ref(cls, ref: DatasetRef, butler: LimitedButler) -> list[ResourcePath]:
|
|
179
|
+
"""Search for temporary files that were not successfully ingested.
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
----------
|
|
183
|
+
ref : `..DatasetRef`
|
|
184
|
+
A dataset reference the temporaries correspond to.
|
|
185
|
+
butler : `lsst.daf.butler.LimitedButler`
|
|
186
|
+
Butler that can be used to obtain a predicted URI for a dataset.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
paths : `list` [ `lsst.resources.ResourcePath` ]
|
|
191
|
+
Files that look like temporaries that might have been created while
|
|
192
|
+
trying to write the target dataset.
|
|
193
|
+
|
|
194
|
+
Notes
|
|
195
|
+
-----
|
|
196
|
+
Orphaned files are only possible when a context manager is interrupted
|
|
197
|
+
by a hard error that prevents any cleanup code from running (e.g.
|
|
198
|
+
sudden loss of power).
|
|
199
|
+
"""
|
|
200
|
+
final_path = butler.getURI(ref, predict=True).replace(fragment="")
|
|
201
|
+
return cls.find_orphaned_temporaries_by_path(final_path)
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def _get_temporary_suffix(path: ResourcePath) -> str:
|
|
205
|
+
ext = path.getExtension()
|
|
206
|
+
basename = path.basename().removesuffix(ext)
|
|
207
|
+
return f"{basename}.tmp{ext}"
|
|
@@ -114,18 +114,7 @@ def remove_runs(context: click.Context, confirm: bool, force: bool, **kwargs: An
|
|
|
114
114
|
|
|
115
115
|
This command can be used to remove RUN collections and the datasets within
|
|
116
116
|
them.
|
|
117
|
-
|
|
118
|
-
Parameters
|
|
119
|
-
----------
|
|
120
|
-
context : `click.Context`
|
|
121
|
-
Context provided by Click.
|
|
122
|
-
confirm : `bool`
|
|
123
|
-
Confirmation for removal of the run.
|
|
124
|
-
force : `bool`
|
|
125
|
-
Force removal.
|
|
126
|
-
**kwargs : `dict` [`str`, `str`]
|
|
127
|
-
The parameters to pass to `~lsst.daf.butler.script.removeRuns`.
|
|
128
|
-
"""
|
|
117
|
+
""" # numpydoc ignore=PR01
|
|
129
118
|
result = script.removeRuns(**kwargs)
|
|
130
119
|
canRemoveRuns = len(result.runs)
|
|
131
120
|
if not canRemoveRuns:
|