lsst-daf-butler 30.0.0rc2__py3-none-any.whl → 30.0.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. lsst/daf/butler/_butler.py +27 -8
  2. lsst/daf/butler/_butler_collections.py +4 -4
  3. lsst/daf/butler/_butler_metrics.py +51 -2
  4. lsst/daf/butler/_dataset_provenance.py +1 -1
  5. lsst/daf/butler/_dataset_ref.py +1 -1
  6. lsst/daf/butler/_exceptions.py +2 -2
  7. lsst/daf/butler/_file_dataset.py +2 -1
  8. lsst/daf/butler/_formatter.py +14 -7
  9. lsst/daf/butler/_labeled_butler_factory.py +28 -8
  10. lsst/daf/butler/_query_all_datasets.py +2 -0
  11. lsst/daf/butler/_rubin/temporary_for_ingest.py +207 -0
  12. lsst/daf/butler/cli/cmd/_remove_runs.py +1 -12
  13. lsst/daf/butler/column_spec.py +4 -4
  14. lsst/daf/butler/configs/datastores/formatters.yaml +1 -0
  15. lsst/daf/butler/configs/storageClasses.yaml +15 -0
  16. lsst/daf/butler/datastore/_datastore.py +21 -1
  17. lsst/daf/butler/datastore/record_data.py +1 -1
  18. lsst/daf/butler/datastore/stored_file_info.py +2 -2
  19. lsst/daf/butler/datastores/chainedDatastore.py +4 -0
  20. lsst/daf/butler/datastores/fileDatastore.py +26 -13
  21. lsst/daf/butler/datastores/file_datastore/get.py +4 -4
  22. lsst/daf/butler/datastores/file_datastore/retrieve_artifacts.py +5 -1
  23. lsst/daf/butler/datastores/file_datastore/transfer.py +2 -2
  24. lsst/daf/butler/datastores/inMemoryDatastore.py +8 -0
  25. lsst/daf/butler/ddl.py +2 -2
  26. lsst/daf/butler/dimensions/_coordinate.py +11 -8
  27. lsst/daf/butler/dimensions/_record_set.py +1 -1
  28. lsst/daf/butler/dimensions/_records.py +9 -3
  29. lsst/daf/butler/direct_butler/_direct_butler.py +85 -51
  30. lsst/daf/butler/direct_query_driver/_driver.py +5 -4
  31. lsst/daf/butler/direct_query_driver/_result_page_converter.py +1 -1
  32. lsst/daf/butler/formatters/parquet.py +6 -6
  33. lsst/daf/butler/logging.py +9 -3
  34. lsst/daf/butler/nonempty_mapping.py +1 -1
  35. lsst/daf/butler/persistence_context.py +8 -5
  36. lsst/daf/butler/queries/_general_query_results.py +1 -1
  37. lsst/daf/butler/queries/driver.py +1 -1
  38. lsst/daf/butler/queries/expression_factory.py +2 -2
  39. lsst/daf/butler/queries/expressions/parser/exprTree.py +1 -1
  40. lsst/daf/butler/queries/expressions/parser/parserYacc.py +1 -1
  41. lsst/daf/butler/queries/overlaps.py +2 -2
  42. lsst/daf/butler/queries/tree/_column_set.py +1 -1
  43. lsst/daf/butler/registry/_collection_record_cache.py +1 -1
  44. lsst/daf/butler/registry/_collection_summary_cache.py +5 -4
  45. lsst/daf/butler/registry/_registry.py +4 -0
  46. lsst/daf/butler/registry/bridge/monolithic.py +17 -13
  47. lsst/daf/butler/registry/databases/postgresql.py +2 -1
  48. lsst/daf/butler/registry/datasets/byDimensions/_dataset_type_cache.py +1 -1
  49. lsst/daf/butler/registry/datasets/byDimensions/_manager.py +53 -47
  50. lsst/daf/butler/registry/datasets/byDimensions/summaries.py +3 -2
  51. lsst/daf/butler/registry/expand_data_ids.py +93 -0
  52. lsst/daf/butler/registry/interfaces/_database.py +6 -1
  53. lsst/daf/butler/registry/interfaces/_datasets.py +2 -1
  54. lsst/daf/butler/registry/interfaces/_obscore.py +1 -1
  55. lsst/daf/butler/registry/obscore/_records.py +1 -1
  56. lsst/daf/butler/registry/obscore/_spatial.py +2 -2
  57. lsst/daf/butler/registry/queries/_results.py +2 -2
  58. lsst/daf/butler/registry/sql_registry.py +3 -25
  59. lsst/daf/butler/registry/wildcards.py +5 -5
  60. lsst/daf/butler/remote_butler/_get.py +1 -1
  61. lsst/daf/butler/remote_butler/_remote_butler.py +6 -1
  62. lsst/daf/butler/remote_butler/_remote_file_transfer_source.py +4 -0
  63. lsst/daf/butler/remote_butler/authentication/cadc.py +4 -3
  64. lsst/daf/butler/script/_pruneDatasets.py +4 -2
  65. lsst/daf/butler/script/configValidate.py +2 -2
  66. lsst/daf/butler/script/queryCollections.py +2 -2
  67. lsst/daf/butler/script/removeCollections.py +2 -0
  68. lsst/daf/butler/script/removeRuns.py +2 -0
  69. lsst/daf/butler/tests/cliCmdTestBase.py +2 -0
  70. lsst/daf/butler/tests/cliLogTestBase.py +2 -0
  71. lsst/daf/butler/tests/hybrid_butler.py +10 -2
  72. lsst/daf/butler/tests/registry_data/lsstcam-subset.yaml +191 -0
  73. lsst/daf/butler/tests/registry_data/spatial.py +4 -2
  74. lsst/daf/butler/tests/testFormatters.py +2 -2
  75. lsst/daf/butler/tests/utils.py +1 -1
  76. lsst/daf/butler/timespan_database_representation.py +3 -3
  77. lsst/daf/butler/transfers/_context.py +7 -6
  78. lsst/daf/butler/version.py +1 -1
  79. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/METADATA +3 -2
  80. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/RECORD +88 -85
  81. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/WHEEL +1 -1
  82. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/entry_points.txt +0 -0
  83. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/licenses/COPYRIGHT +0 -0
  84. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/licenses/LICENSE +0 -0
  85. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/licenses/bsd_license.txt +0 -0
  86. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/licenses/gpl-v3.0.txt +0 -0
  87. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/top_level.txt +0 -0
  88. {lsst_daf_butler-30.0.0rc2.dist-info → lsst_daf_butler-30.0.1rc1.dist-info}/zip-safe +0 -0
@@ -138,7 +138,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
138
138
  without_datastore : `bool`, optional
139
139
  If `True` do not attach a datastore to this butler. Any attempts
140
140
  to use a datastore will fail.
141
- **kwargs : `Any`
141
+ metrics : `ButlerMetrics` or `None`
142
+ External metrics object to be used for tracking butler usage. If `None`
143
+ a new metrics object is created.
144
+ **kwargs : `typing.Any`
142
145
  Additional keyword arguments passed to a constructor of actual butler
143
146
  class.
144
147
 
@@ -240,7 +243,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
240
243
  to use a datastore will fail.
241
244
  metrics : `ButlerMetrics` or `None`, optional
242
245
  Metrics object to record butler usage statistics.
243
- **kwargs : `Any`
246
+ **kwargs : `typing.Any`
244
247
  Default data ID key-value pairs. These may only identify
245
248
  "governor" dimensions like ``instrument`` and ``skymap``.
246
249
 
@@ -1390,6 +1393,10 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1390
1393
  raised if any datasets with the same dataset ID already exist
1391
1394
  in the datastore.
1392
1395
 
1396
+ Returns
1397
+ -------
1398
+ None
1399
+
1393
1400
  Raises
1394
1401
  ------
1395
1402
  TypeError
@@ -1429,6 +1436,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1429
1436
  *,
1430
1437
  transfer_dimensions: bool = False,
1431
1438
  dry_run: bool = False,
1439
+ skip_existing: bool = False,
1432
1440
  ) -> None:
1433
1441
  """Ingest a Zip file into this butler.
1434
1442
 
@@ -1447,6 +1455,14 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1447
1455
  If `True` the ingest will be processed without any modifications
1448
1456
  made to the target butler and as if the target butler did not
1449
1457
  have any of the datasets.
1458
+ skip_existing : `bool`, optional
1459
+ If `True`, a zip will not be ingested if the dataset entries listed
1460
+ in the index with the same dataset ID already exists in the butler.
1461
+ If `False` (the default), a `ConflictingDefinitionError` will be
1462
+ raised if any datasets with the same dataset ID already exist
1463
+ in the repository. If, somehow, some datasets are known to the
1464
+ butler and some are not, this is currently treated as an error
1465
+ rather than attempting to do a partial ingest.
1450
1466
 
1451
1467
  Notes
1452
1468
  -----
@@ -1566,7 +1582,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1566
1582
 
1567
1583
  @abstractmethod
1568
1584
  def transfer_dimension_records_from(
1569
- self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
1585
+ self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate]
1570
1586
  ) -> None:
1571
1587
  """Transfer dimension records to this Butler from another Butler.
1572
1588
 
@@ -1578,10 +1594,9 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
1578
1594
  `Butler` whose registry will be used to expand data IDs. If the
1579
1595
  source refs contain coordinates that are used to populate other
1580
1596
  records then this will also need to be a full `Butler`.
1581
- source_refs : iterable of `DatasetRef`
1582
- Datasets defined in the source butler whose dimension records
1583
- should be transferred to this butler. In most circumstances.
1584
- transfer is faster if the dataset refs are expanded.
1597
+ source_refs : iterable of `DatasetRef` or `DataCoordinate`
1598
+ Datasets or data IDs defined in the source butler whose dimension
1599
+ records should be transferred to this butler.
1585
1600
  """
1586
1601
  raise NotImplementedError()
1587
1602
 
@@ -2025,7 +2040,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2025
2040
 
2026
2041
  Returns
2027
2042
  -------
2028
- records : `list`[`DimensionRecord`]
2043
+ records : `list` [`DimensionRecord`]
2029
2044
  Dimension records matching the given query parameters.
2030
2045
 
2031
2046
  Raises
@@ -2227,3 +2242,7 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
2227
2242
  @abstractmethod
2228
2243
  def close(self) -> None:
2229
2244
  raise NotImplementedError()
2245
+
2246
+ @abstractmethod
2247
+ def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]:
2248
+ raise NotImplementedError()
@@ -360,10 +360,10 @@ class ButlerCollections(ABC, Sequence):
360
360
  name : `str`
361
361
  The name of the collection of interest.
362
362
  include_parents : `bool`, optional
363
- If `True` any parents of this collection will be included.
363
+ If `True` any parents of this collection will be included.
364
364
  include_summary : `bool`, optional
365
- If `True` dataset type names and governor dimensions of datasets
366
- stored in this collection will be included in the result.
365
+ If `True` dataset type names and governor dimensions of datasets
366
+ stored in this collection will be included in the result.
367
367
 
368
368
  Returns
369
369
  -------
@@ -464,7 +464,7 @@ class ButlerCollections(ABC, Sequence):
464
464
 
465
465
  Returns
466
466
  -------
467
- filtered : `~collections.abc.Mapping` [`str`, `list`[`str`]]
467
+ filtered : `~collections.abc.Mapping` [`str`, `list` [`str`]]
468
468
  Mapping of the dataset type name to its corresponding list of
469
469
  collection names.
470
470
  """
@@ -27,14 +27,19 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
+ __all__ = ["ButlerMetrics"]
31
+
30
32
  from collections.abc import Callable, Iterator
31
33
  from contextlib import contextmanager
34
+ from typing import Concatenate, ParamSpec
32
35
 
33
36
  from pydantic import BaseModel
34
37
 
35
38
  from lsst.utils.logging import LsstLoggers
36
39
  from lsst.utils.timer import time_this
37
40
 
41
+ P = ParamSpec("P")
42
+
38
43
 
39
44
  class ButlerMetrics(BaseModel):
40
45
  """Metrics collected during Butler operations."""
@@ -45,18 +50,26 @@ class ButlerMetrics(BaseModel):
45
50
  time_in_get: float = 0.0
46
51
  """Wall-clock time, in seconds, spent in get()."""
47
52
 
53
+ time_in_ingest: float = 0.0
54
+ """Wall-clock time, in seconds, spent in ingest()."""
55
+
48
56
  n_get: int = 0
49
57
  """Number of datasets retrieved with get()."""
50
58
 
51
59
  n_put: int = 0
52
60
  """Number of datasets stored with put()."""
53
61
 
62
+ n_ingest: int = 0
63
+ """Number of datasets ingested."""
64
+
54
65
  def reset(self) -> None:
55
66
  """Reset all metrics."""
56
67
  self.time_in_put = 0.0
57
68
  self.time_in_get = 0.0
69
+ self.time_in_ingest = 0.0
58
70
  self.n_get = 0
59
71
  self.n_put = 0
72
+ self.n_ingest = 0
60
73
 
61
74
  def increment_get(self, duration: float) -> None:
62
75
  """Increment time for get().
@@ -80,13 +93,31 @@ class ButlerMetrics(BaseModel):
80
93
  self.time_in_put += duration
81
94
  self.n_put += 1
82
95
 
96
+ def increment_ingest(self, duration: float, n_datasets: int) -> None:
97
+ """Increment time and datasets for ingest().
98
+
99
+ Parameters
100
+ ----------
101
+ duration : `float`
102
+ Duration to add to the ingest() statistics.
103
+ n_datasets : `int`
104
+ Number of datasets to be ingested for this call.
105
+ """
106
+ self.time_in_ingest += duration
107
+ self.n_ingest += n_datasets
108
+
83
109
  @contextmanager
84
110
  def _timer(
85
- self, handler: Callable[[float], None], log: LsstLoggers | None = None, msg: str | None = None
111
+ self,
112
+ handler: Callable[Concatenate[float, P], None],
113
+ log: LsstLoggers | None = None,
114
+ msg: str | None = None,
115
+ *args: P.args,
116
+ **kwargs: P.kwargs,
86
117
  ) -> Iterator[None]:
87
118
  with time_this(log=log, msg=msg) as timer:
88
119
  yield
89
- handler(timer.duration)
120
+ handler(timer.duration, *args, **kwargs)
90
121
 
91
122
  @contextmanager
92
123
  def instrument_get(self, log: LsstLoggers | None = None, msg: str | None = None) -> Iterator[None]:
@@ -115,3 +146,21 @@ class ButlerMetrics(BaseModel):
115
146
  """
116
147
  with self._timer(self.increment_put, log=log, msg=msg):
117
148
  yield
149
+
150
+ @contextmanager
151
+ def instrument_ingest(
152
+ self, n_datasets: int, log: LsstLoggers | None = None, msg: str | None = None
153
+ ) -> Iterator[None]:
154
+ """Run code and increment ingest statistics.
155
+
156
+ Parameters
157
+ ----------
158
+ n_datasets : `int`
159
+ Number of datasets being ingested.
160
+ log : `logging.Logger` or `None`
161
+ Logger to use for any timing information.
162
+ msg : `str` or `None`
163
+ Any message to be included in log output.
164
+ """
165
+ with self._timer(self.increment_ingest, n_datasets=n_datasets, log=log, msg=msg):
166
+ yield
@@ -267,7 +267,7 @@ class DatasetProvenance(pydantic.BaseModel):
267
267
  use_upper : `bool` or `None`
268
268
  If `True` use upper case for provenance keys, if `False` use lower
269
269
  case, if `None` match the case of the prefix.
270
- keys : `tuple` of `str` | `int`
270
+ *keys : `tuple` of `str` | `int`
271
271
  Components of key to combine with prefix and separator.
272
272
 
273
273
  Returns
@@ -479,7 +479,7 @@ class DatasetRef:
479
479
 
480
480
  Parameters
481
481
  ----------
482
- simple : `dict` of [`str`, `Any`]
482
+ simple : `dict` of [`str`, `typing.Any`]
483
483
  The value returned by `to_simple()`.
484
484
  universe : `DimensionUniverse`
485
485
  The special graph of all known dimensions.
@@ -196,8 +196,8 @@ class ValidationError(RuntimeError):
196
196
 
197
197
 
198
198
  class EmptyQueryResultError(Exception):
199
- """Exception raised when query methods return an empty result and `explain`
200
- flag is set.
199
+ """Exception raised when query methods return an empty result and
200
+ ``explain`` flag is set.
201
201
 
202
202
  Parameters
203
203
  ----------
@@ -129,7 +129,8 @@ class FileDataset:
129
129
  ----------
130
130
  dataset : `SerializedFileDataset`
131
131
  Object to deserialize.
132
- dataset_type_loader : `Callable` [[ `str` ], `DatasetType` ]
132
+ dataset_type_loader : `~collections.abc.Callable` \
133
+ [[ `str` ], `DatasetType` ]
133
134
  Function that takes a string dataset type name as its
134
135
  only parameter, and returns an instance of `DatasetType`.
135
136
  Used to deserialize the `DatasetRef` instances contained
@@ -54,6 +54,7 @@ from ._config import Config
54
54
  from ._config_support import LookupKey, processLookupConfigs
55
55
  from ._file_descriptor import FileDescriptor
56
56
  from ._location import Location
57
+ from ._rubin.temporary_for_ingest import TemporaryForIngest
57
58
  from .dimensions import DataCoordinate, DimensionUniverse
58
59
  from .mapping_factory import MappingFactory
59
60
 
@@ -909,6 +910,10 @@ class FormatterV2:
909
910
  provenance : `DatasetProvenance` | `None`, optional
910
911
  Provenance to attach to the file being written.
911
912
 
913
+ Returns
914
+ -------
915
+ None
916
+
912
917
  Raises
913
918
  ------
914
919
  FormatterNotImplementedError
@@ -1031,13 +1036,7 @@ class FormatterV2:
1031
1036
  """
1032
1037
  cache_manager = self._ensure_cache(cache_manager)
1033
1038
 
1034
- # Always write to a temporary even if
1035
- # using a local file system -- that gives us atomic writes.
1036
- # If a process is killed as the file is being written we do not
1037
- # want it to remain in the correct place but in corrupt state.
1038
- # For local files write to the output directory not temporary dir.
1039
- prefix = uri.dirname() if uri.isLocal else None
1040
- with ResourcePath.temporary_uri(suffix=uri.getExtension(), prefix=prefix) as temporary_uri:
1039
+ with TemporaryForIngest.make_path(uri) as temporary_uri:
1041
1040
  # Need to configure the formatter to write to a different
1042
1041
  # location and that needs us to overwrite internals
1043
1042
  log.debug("Writing dataset to temporary location at %s", temporary_uri)
@@ -1142,6 +1141,10 @@ class FormatterV2:
1142
1141
  location : `Location`
1143
1142
  Location from which to extract a file extension.
1144
1143
 
1144
+ Returns
1145
+ -------
1146
+ None
1147
+
1145
1148
  Raises
1146
1149
  ------
1147
1150
  ValueError
@@ -1588,6 +1591,10 @@ class Formatter(metaclass=ABCMeta):
1588
1591
  location : `Location`
1589
1592
  Location from which to extract a file extension.
1590
1593
 
1594
+ Returns
1595
+ -------
1596
+ None
1597
+
1591
1598
  Raises
1592
1599
  ------
1593
1600
  NotImplementedError
@@ -30,7 +30,9 @@ from __future__ import annotations
30
30
  __all__ = ("LabeledButlerFactory", "LabeledButlerFactoryProtocol")
31
31
 
32
32
  from collections.abc import Mapping
33
- from typing import Protocol
33
+ from contextlib import AbstractContextManager
34
+ from logging import getLogger
35
+ from typing import Any, Literal, Protocol, Self
34
36
 
35
37
  from lsst.resources import ResourcePathExpression
36
38
 
@@ -40,6 +42,8 @@ from ._butler_repo_index import ButlerRepoIndex
40
42
  from ._utilities.named_locks import NamedLocks
41
43
  from ._utilities.thread_safe_cache import ThreadSafeCache
42
44
 
45
+ _LOG = getLogger(__name__)
46
+
43
47
 
44
48
  class LabeledButlerFactoryProtocol(Protocol):
45
49
  """Callable to retrieve a butler from a label."""
@@ -47,7 +51,7 @@ class LabeledButlerFactoryProtocol(Protocol):
47
51
  def __call__(self, label: str) -> Butler: ...
48
52
 
49
53
 
50
- class LabeledButlerFactory:
54
+ class LabeledButlerFactory(AbstractContextManager):
51
55
  """Factory for efficiently instantiating Butler instances from the
52
56
  repository index file. This is intended for use from long-lived services
53
57
  that want to instantiate a separate Butler instance for each end user
@@ -60,6 +64,9 @@ class LabeledButlerFactory:
60
64
  files. If not provided, defaults to the global repository index
61
65
  configured by the ``DAF_BUTLER_REPOSITORY_INDEX`` environment variable
62
66
  -- see `ButlerRepoIndex`.
67
+ writeable : `bool`, optional
68
+ If `True`, Butler instances created by this factory will be writeable.
69
+ If `False` (the default), instances will be read-only.
63
70
 
64
71
  Notes
65
72
  -----
@@ -76,11 +83,12 @@ class LabeledButlerFactory:
76
83
  safely be used by separate threads.
77
84
  """
78
85
 
79
- def __init__(self, repositories: Mapping[str, str] | None = None) -> None:
86
+ def __init__(self, repositories: Mapping[str, str] | None = None, writeable: bool = False) -> None:
80
87
  if repositories is None:
81
88
  self._repositories = None
82
89
  else:
83
90
  self._repositories = dict(repositories)
91
+ self._writeable = writeable
84
92
 
85
93
  self._factories = ThreadSafeCache[str, _ButlerFactory]()
86
94
  self._initialization_locks = NamedLocks()
@@ -88,6 +96,16 @@ class LabeledButlerFactory:
88
96
  # This may be overridden by unit tests.
89
97
  self._preload_unsafe_direct_butler_caches = True
90
98
 
99
+ def __enter__(self) -> Self:
100
+ return self
101
+
102
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
103
+ try:
104
+ self.close()
105
+ except Exception:
106
+ _LOG.exception("An exception occurred during LabeledButlerFactory.close()")
107
+ return False
108
+
91
109
  def bind(self, access_token: str | None) -> LabeledButlerFactoryProtocol:
92
110
  """Create a callable factory function for generating Butler instances
93
111
  with out needing to specify access tokans again.
@@ -109,7 +127,7 @@ class LabeledButlerFactory:
109
127
 
110
128
  return create
111
129
 
112
- def create_butler(self, *, label: str, access_token: str | None) -> Butler:
130
+ def create_butler(self, label: str, *, access_token: str | None = None) -> Butler:
113
131
  """Create a Butler instance.
114
132
 
115
133
  Parameters
@@ -118,7 +136,7 @@ class LabeledButlerFactory:
118
136
  Label of the repository to instantiate, from the ``repositories``
119
137
  parameter to the `LabeledButlerFactory` constructor or the global
120
138
  repository index file.
121
- access_token : `str` | `None`
139
+ access_token : `str` | `None`, optional
122
140
  Gafaelfawr access token used to authenticate to a Butler server.
123
141
  This is required for any repositories configured to use
124
142
  `RemoteButler`. If you only use `DirectButler`, this may be
@@ -167,7 +185,9 @@ class LabeledButlerFactory:
167
185
 
168
186
  match butler_type:
169
187
  case ButlerType.DIRECT:
170
- return _DirectButlerFactory(config, self._preload_unsafe_direct_butler_caches)
188
+ return _DirectButlerFactory(
189
+ config, self._preload_unsafe_direct_butler_caches, self._writeable
190
+ )
171
191
  case ButlerType.REMOTE:
172
192
  return _RemoteButlerFactory(config)
173
193
  case _:
@@ -189,12 +209,12 @@ class _ButlerFactory(Protocol):
189
209
 
190
210
 
191
211
  class _DirectButlerFactory(_ButlerFactory):
192
- def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool) -> None:
212
+ def __init__(self, config: ButlerConfig, preload_unsafe_caches: bool, writeable: bool) -> None:
193
213
  import lsst.daf.butler.direct_butler
194
214
 
195
215
  # Create a 'template' Butler that will be cloned when callers request
196
216
  # an instance.
197
- self._butler = Butler.from_config(config)
217
+ self._butler = Butler.from_config(config, writeable=writeable)
198
218
  assert isinstance(self._butler, lsst.daf.butler.direct_butler.DirectButler)
199
219
 
200
220
  # Load caches so that data is available in cloned instances without
@@ -151,6 +151,8 @@ def _filter_collections_and_dataset_types(
151
151
 
152
152
  Parameters
153
153
  ----------
154
+ butler
155
+ Butler repository to use.
154
156
  collections
155
157
  List of collection names or collection search globs.
156
158
  dataset_type_query
@@ -0,0 +1,207 @@
1
+ # This file is part of daf_butler.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("TemporaryForIngest",)
31
+
32
+ import dataclasses
33
+ import glob
34
+ from contextlib import contextmanager
35
+ from typing import TYPE_CHECKING, Self, cast
36
+
37
+ from lsst.resources import ResourcePath
38
+
39
+ if TYPE_CHECKING:
40
+ from collections.abc import Iterator
41
+ from types import TracebackType
42
+
43
+ from .._butler import Butler
44
+ from .._dataset_ref import DatasetRef
45
+ from .._file_dataset import FileDataset
46
+ from .._limited_butler import LimitedButler
47
+
48
+
49
+ @dataclasses.dataclass
50
+ class TemporaryForIngest:
51
+ """A context manager for generating temporary paths that will be ingested
52
+ as butler datasets.
53
+
54
+ Notes
55
+ -----
56
+ Neither this class nor its `make_path` method run ingest automatically when
57
+ their context manager is exited; the `ingest` method must always be called
58
+ explicitly.
59
+ """
60
+
61
+ butler: Butler
62
+ """Full butler to obtain a predicted path from and ingest into."""
63
+
64
+ ref: DatasetRef
65
+ """Description of the dataset to ingest."""
66
+
67
+ dataset: FileDataset = dataclasses.field(init=False)
68
+ """The dataset that will be passed to `Butler.ingest`."""
69
+
70
+ @property
71
+ def path(self) -> ResourcePath:
72
+ """The temporary path.
73
+
74
+ Guaranteed to be a local POSIX path.
75
+ """
76
+ return cast(ResourcePath, self.dataset.path)
77
+
78
+ @property
79
+ def ospath(self) -> str:
80
+ """The temporary path as a complete filename."""
81
+ return self.path.ospath
82
+
83
+ @classmethod
84
+ @contextmanager
85
+ def make_path(cls, final_path: ResourcePath) -> Iterator[ResourcePath]:
86
+ """Return a temporary path context manager given the predicted final
87
+ path.
88
+
89
+ Parameters
90
+ ----------
91
+ final_path : `lsst.resources.ResourcePath`
92
+ Predicted final path.
93
+
94
+ Returns
95
+ -------
96
+ context : `contextlib.AbstractContextManager`
97
+ A context manager that yields the temporary
98
+ `~lsst.resources.ResourcePath` when entered and deletes that file
99
+ when exited.
100
+ """
101
+ # Always write to a temporary even if using a local file system -- that
102
+ # gives us atomic writes. If a process is killed as the file is being
103
+ # written we do not want it to remain in the correct place but in
104
+ # corrupt state. For local files write to the output directory not
105
+ # temporary dir.
106
+ prefix = final_path.dirname() if final_path.isLocal else None
107
+ if prefix is not None:
108
+ prefix.mkdir()
109
+ with ResourcePath.temporary_uri(
110
+ suffix=cls._get_temporary_suffix(final_path), prefix=prefix
111
+ ) as temporary_path:
112
+ yield temporary_path
113
+
114
+ def ingest(self, record_validation_info: bool = True) -> None:
115
+ """Ingest the file into the butler.
116
+
117
+ Parameters
118
+ ----------
119
+ record_validation_info : `bool`, optional
120
+ Whether to- record the file size and checksum upon ingest.
121
+ """
122
+ self.butler.ingest(self.dataset, transfer="move", record_validation_info=record_validation_info)
123
+
124
+ def __enter__(self) -> Self:
125
+ from .._file_dataset import FileDataset
126
+
127
+ final_path = self.butler.getURI(self.ref, predict=True).replace(fragment="")
128
+ prefix = final_path.dirname() if final_path.isLocal else None
129
+ if prefix is not None:
130
+ prefix.mkdir()
131
+ self._temporary_path_context = self.make_path(final_path)
132
+ temporary_path = self._temporary_path_context.__enter__()
133
+ self.dataset = FileDataset(temporary_path, [self.ref], formatter=None)
134
+ return self
135
+
136
+ def __exit__(
137
+ self,
138
+ exc_type: type[BaseException] | None,
139
+ exc_value: BaseException | None,
140
+ traceback: TracebackType | None,
141
+ ) -> bool | None:
142
+ return self._temporary_path_context.__exit__(exc_type, exc_value, traceback)
143
+
144
+ @classmethod
145
+ def find_orphaned_temporaries_by_path(cls, final_path: ResourcePath) -> list[ResourcePath]:
146
+ """Search for temporary files that were not successfully ingested.
147
+
148
+ Parameters
149
+ ----------
150
+ final_path : `lsst.resources.ResourcePath`
151
+ Final path a successfully-ingested file would have.
152
+
153
+ Returns
154
+ -------
155
+ paths : `list` [ `lsst.resources.ResourcePath` ]
156
+ Files that look like temporaries that might have been created while
157
+ trying to write the target dataset.
158
+
159
+ Notes
160
+ -----
161
+ Orphaned files are only possible when a context manager is interrupted
162
+ by a hard error that prevents any cleanup code from running (e.g.
163
+ sudden loss of power).
164
+ """
165
+ if not final_path.isLocal:
166
+ # We return true tempfile for non-local predicted paths, so orphans
167
+ # are not our problem (the OS etc. will take care of them).
168
+ return []
169
+ return [
170
+ ResourcePath(filename)
171
+ for filename in glob.glob(
172
+ f"{glob.escape(final_path.dirname().ospath)}*{glob.escape(cls._get_temporary_suffix(final_path))}"
173
+ )
174
+ if filename != final_path.ospath
175
+ ]
176
+
177
+ @classmethod
178
+ def find_orphaned_temporaries_by_ref(cls, ref: DatasetRef, butler: LimitedButler) -> list[ResourcePath]:
179
+ """Search for temporary files that were not successfully ingested.
180
+
181
+ Parameters
182
+ ----------
183
+ ref : `..DatasetRef`
184
+ A dataset reference the temporaries correspond to.
185
+ butler : `lsst.daf.butler.LimitedButler`
186
+ Butler that can be used to obtain a predicted URI for a dataset.
187
+
188
+ Returns
189
+ -------
190
+ paths : `list` [ `lsst.resources.ResourcePath` ]
191
+ Files that look like temporaries that might have been created while
192
+ trying to write the target dataset.
193
+
194
+ Notes
195
+ -----
196
+ Orphaned files are only possible when a context manager is interrupted
197
+ by a hard error that prevents any cleanup code from running (e.g.
198
+ sudden loss of power).
199
+ """
200
+ final_path = butler.getURI(ref, predict=True).replace(fragment="")
201
+ return cls.find_orphaned_temporaries_by_path(final_path)
202
+
203
+ @staticmethod
204
+ def _get_temporary_suffix(path: ResourcePath) -> str:
205
+ ext = path.getExtension()
206
+ basename = path.basename().removesuffix(ext)
207
+ return f"{basename}.tmp{ext}"
@@ -114,18 +114,7 @@ def remove_runs(context: click.Context, confirm: bool, force: bool, **kwargs: An
114
114
 
115
115
  This command can be used to remove RUN collections and the datasets within
116
116
  them.
117
-
118
- Parameters
119
- ----------
120
- context : `click.Context`
121
- Context provided by Click.
122
- confirm : `bool`
123
- Confirmation for removal of the run.
124
- force : `bool`
125
- Force removal.
126
- **kwargs : `dict` [`str`, `str`]
127
- The parameters to pass to `~lsst.daf.butler.script.removeRuns`.
128
- """
117
+ """ # numpydoc ignore=PR01
129
118
  result = script.removeRuns(**kwargs)
130
119
  canRemoveRuns = len(result.runs)
131
120
  if not canRemoveRuns: