lsst-pipe-base 29.2025.2300__py3-none-any.whl → 29.2025.2400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,11 +27,17 @@
27
27
 
28
28
  __all__ = ["transfer_from_graph"]
29
29
 
30
+ import math
31
+
30
32
  from lsst.daf.butler import Butler, CollectionType, QuantumBackedButler, Registry
31
33
  from lsst.daf.butler.registry import MissingCollectionError
32
34
  from lsst.pipe.base import QuantumGraph
35
+ from lsst.utils.iteration import chunk_iterable
36
+ from lsst.utils.logging import getLogger
37
+
38
+ from .utils import filter_by_dataset_type_glob, filter_by_existence
33
39
 
34
- from .utils import filter_by_dataset_type_glob
40
+ _LOG = getLogger(__name__)
35
41
 
36
42
 
37
43
  def transfer_from_graph(
@@ -92,18 +98,31 @@ def transfer_from_graph(
92
98
 
93
99
  # Filter the refs based on requested dataset types.
94
100
  filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
101
+ _LOG.verbose("After filtering by dataset_type, number of datasets to transfer: %d", len(filtered_refs))
95
102
 
96
103
  dest_butler = Butler.from_config(dest, writeable=True)
97
104
 
98
- transferred = dest_butler.transfer_from(
99
- qbb,
100
- filtered_refs,
101
- transfer="auto",
102
- register_dataset_types=register_dataset_types,
103
- transfer_dimensions=transfer_dimensions,
104
- dry_run=dry_run,
105
- )
106
- count = len(transferred)
105
+ # For faster restarts, filter out those the destination already knows.
106
+ filtered_refs = filter_by_existence(dest_butler, filtered_refs)
107
+
108
+ # Transfer in chunks
109
+ chunk_size = 50_000
110
+ n_chunks = math.ceil(len(filtered_refs) / chunk_size)
111
+ chunk_num = 0
112
+ count = 0
113
+ for chunk in chunk_iterable(filtered_refs, chunk_size=chunk_size):
114
+ chunk_num += 1
115
+ if n_chunks > 1:
116
+ _LOG.verbose("Transferring %d datasets in chunk %d/%d", len(chunk), chunk_num, n_chunks)
117
+ transferred = dest_butler.transfer_from(
118
+ qbb,
119
+ chunk,
120
+ transfer="auto",
121
+ register_dataset_types=register_dataset_types,
122
+ transfer_dimensions=transfer_dimensions,
123
+ dry_run=dry_run,
124
+ )
125
+ count += len(transferred)
107
126
 
108
127
  # If anything was transferred then update output chain definition if asked.
109
128
  if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None:
@@ -27,15 +27,17 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
- __all__ = [
31
- "filter_by_dataset_type_glob",
32
- ]
30
+ __all__ = ["filter_by_dataset_type_glob", "filter_by_existence"]
33
31
 
34
32
  import re
35
33
  from collections.abc import Collection
36
34
 
37
- from lsst.daf.butler import DatasetRef
35
+ from lsst.daf.butler import Butler, DatasetRef
38
36
  from lsst.daf.butler.utils import globToRegex
37
+ from lsst.utils.logging import getLogger
38
+ from lsst.utils.timer import time_this
39
+
40
+ _LOG = getLogger(__name__)
39
41
 
40
42
 
41
43
  def _matches_dataset_type(dataset_type_name: str, regexes: list[str | re.Pattern]) -> bool:
@@ -72,3 +74,30 @@ def filter_by_dataset_type_glob(
72
74
  return refs
73
75
 
74
76
  return {ref for ref in refs if _matches_dataset_type(ref.datasetType.name, regexes)}
77
+
78
+
79
+ def filter_by_existence(butler: Butler, refs: Collection[DatasetRef]) -> Collection[DatasetRef]:
80
+ """Filter out the refs that the butler already knows exist.
81
+
82
+ Parameters
83
+ ----------
84
+ butler : `lsst.daf.butler.Butler`
85
+ Butler in which to check existence of given datarefs.
86
+ refs : `collections.abc.Collection` [ `lsst.daf.butler.DatasetRef` ]
87
+ Datasets to be filtered.
88
+
89
+ Returns
90
+ -------
91
+ filtered : `collections.abc.Collection` [ `lsst.daf.butler.DatasetRef` ]
92
+ Filter datasets.
93
+ """
94
+ _LOG.verbose("Filtering out datasets already known to the target butler...")
95
+ with time_this(log=_LOG, msg="Completed checking existence"):
96
+ existence = butler._datastore.knows_these(refs)
97
+ filtered = [ref for ref in existence if not existence[ref]]
98
+ _LOG.verbose(
99
+ "After filtering out those already in the target butler, number of datasets to transfer: %d",
100
+ len(filtered),
101
+ )
102
+
103
+ return filtered
lsst/pipe/base/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  __all__ = ["__version__"]
2
- __version__ = "29.2025.2300"
2
+ __version__ = "29.2025.2400"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-pipe-base
3
- Version: 29.2025.2300
3
+ Version: 29.2025.2400
4
4
  Summary: Pipeline infrastructure for the Rubin Science Pipelines.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: BSD 3-Clause License
@@ -32,7 +32,7 @@ lsst/pipe/base/task.py,sha256=XHBd-7m1a4-6LgobBYA1DgY4H7EV-_RWKfxbhZbMmD4,15145
32
32
  lsst/pipe/base/taskFactory.py,sha256=4GhN2DozPM8suBYIvoKN4E6VP0I3mYZHBjCUO5JcCGk,2901
33
33
  lsst/pipe/base/testUtils.py,sha256=lSBKMhoKflbi8JkMNYfEqqHNl-rtFI8UYT3QneDYpLo,18477
34
34
  lsst/pipe/base/utils.py,sha256=JmEt3l0xrh9uayKrSXuQEq12aXOhDr2YXmbYduaxCko,1940
35
- lsst/pipe/base/version.py,sha256=GnyaHNlpLZJmdCekPuL3VEJCw758_rgDNpEpGPT5mRw,55
35
+ lsst/pipe/base/version.py,sha256=AyivLkwA4FcYO6eiFsfTA_EuqMhydgvzBupNZlQZL-E,55
36
36
  lsst/pipe/base/cli/__init__.py,sha256=861tXIAW7SqtqNUYkjbeEdfg8lDswXsjJQca0gVCFz4,54
37
37
  lsst/pipe/base/cli/_get_cli_subcommands.py,sha256=g_af64klRybBGKAg7fmBSZBdw2LYBAsFON_yQIMZON0,1289
38
38
  lsst/pipe/base/cli/cmd/__init__.py,sha256=BGicstnryQ48rYcNRh4fa6Vy63ZIlZ_pPAEa17jhkwY,1519
@@ -74,8 +74,8 @@ lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py,sha256=dp7PXl9C
74
74
  lsst/pipe/base/script/__init__.py,sha256=cLEXE7aq5UZ0juL_ScmRw0weFgp4tDgwEX_ts-NEYic,1522
75
75
  lsst/pipe/base/script/register_instrument.py,sha256=TRC2r2tSoYBNWNVQya01ELxAtGH8WVk9Ya-uNgCIL5U,2426
76
76
  lsst/pipe/base/script/retrieve_artifacts_for_quanta.py,sha256=pYI0wNl5PU8ImgzWfGEDrRz3PSKSg2szWLEIVKdm7Og,3939
77
- lsst/pipe/base/script/transfer_from_graph.py,sha256=YkOFszLJuaqD2vl1izhSWemZzGCxDxUSdZNDoDyFEfo,5949
78
- lsst/pipe/base/script/utils.py,sha256=E00wb3usCuL3-3rJHZeqLJ_kwHV9V7Vaj6SdUe1VxU4,2631
77
+ lsst/pipe/base/script/transfer_from_graph.py,sha256=WMpmzaN8hdW4FoZPNL6LRiPW9gvxf9mw20Z1i-KCkWI,6740
78
+ lsst/pipe/base/script/utils.py,sha256=zNqpHG3kXA8OaNXnwYIo0Hu_LCie1qoBAARAME3WEjs,3739
79
79
  lsst/pipe/base/script/zip_from_graph.py,sha256=rbH_5Jk7Yc-YFD3X4mbDE4Vzddtu5y90Z77wha94mdM,3228
80
80
  lsst/pipe/base/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
81
  lsst/pipe/base/tests/no_dimensions.py,sha256=58UpyRN8cLAMZtkOmjTm3dJZyRFRekotQ-7-OgEfiAI,4710
@@ -86,13 +86,13 @@ lsst/pipe/base/tests/mocks/__init__.py,sha256=NrIJYDeYgR3HsOJXBEXi8EXDhhV7iw7dgw
86
86
  lsst/pipe/base/tests/mocks/_data_id_match.py,sha256=WU0-5cPsU4565UhlsvQwhZKP5RzPJPyZ8sGio1CKAPI,6813
87
87
  lsst/pipe/base/tests/mocks/_pipeline_task.py,sha256=fqaJ-tB7K3jxlfCvCSnVd_GNrz-JhX7FB914h7nHLXc,29366
88
88
  lsst/pipe/base/tests/mocks/_storage_class.py,sha256=gC0czHURMk7PWj8N6dLxnY5V4HWX5i8ukb5SZbgWKy8,25257
89
- lsst_pipe_base-29.2025.2300.dist-info/licenses/COPYRIGHT,sha256=kB3Z9_f6a6uFLGpEmNJT_n186CE65H6wHu4F6BNt_zA,368
90
- lsst_pipe_base-29.2025.2300.dist-info/licenses/LICENSE,sha256=pRExkS03v0MQW-neNfIcaSL6aiAnoLxYgtZoFzQ6zkM,232
91
- lsst_pipe_base-29.2025.2300.dist-info/licenses/bsd_license.txt,sha256=7MIcv8QRX9guUtqPSBDMPz2SnZ5swI-xZMqm_VDSfxY,1606
92
- lsst_pipe_base-29.2025.2300.dist-info/licenses/gpl-v3.0.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
93
- lsst_pipe_base-29.2025.2300.dist-info/METADATA,sha256=qN3C-TyotJss0dGGTai0UtAR3E-x-T3BKgV7BOAihZI,2195
94
- lsst_pipe_base-29.2025.2300.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
95
- lsst_pipe_base-29.2025.2300.dist-info/entry_points.txt,sha256=bnmUhJBsChxMdqST9VmFBYYKxLQoToOfqW1wjW7khjk,64
96
- lsst_pipe_base-29.2025.2300.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
97
- lsst_pipe_base-29.2025.2300.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
98
- lsst_pipe_base-29.2025.2300.dist-info/RECORD,,
89
+ lsst_pipe_base-29.2025.2400.dist-info/licenses/COPYRIGHT,sha256=kB3Z9_f6a6uFLGpEmNJT_n186CE65H6wHu4F6BNt_zA,368
90
+ lsst_pipe_base-29.2025.2400.dist-info/licenses/LICENSE,sha256=pRExkS03v0MQW-neNfIcaSL6aiAnoLxYgtZoFzQ6zkM,232
91
+ lsst_pipe_base-29.2025.2400.dist-info/licenses/bsd_license.txt,sha256=7MIcv8QRX9guUtqPSBDMPz2SnZ5swI-xZMqm_VDSfxY,1606
92
+ lsst_pipe_base-29.2025.2400.dist-info/licenses/gpl-v3.0.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
93
+ lsst_pipe_base-29.2025.2400.dist-info/METADATA,sha256=J-bk-ErrNnOs-UY8uVxZhvQVUmZTJ2X4CeBBITdsgIc,2195
94
+ lsst_pipe_base-29.2025.2400.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
95
+ lsst_pipe_base-29.2025.2400.dist-info/entry_points.txt,sha256=bnmUhJBsChxMdqST9VmFBYYKxLQoToOfqW1wjW7khjk,64
96
+ lsst_pipe_base-29.2025.2400.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
97
+ lsst_pipe_base-29.2025.2400.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
98
+ lsst_pipe_base-29.2025.2400.dist-info/RECORD,,