lsst-pipe-base 29.2025.2700__py3-none-any.whl → 29.2025.2900__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +836 -257
- lsst/pipe/base/connections.py +86 -0
- lsst/pipe/base/prerequisite_helpers.py +4 -0
- lsst/pipe/base/quantum_graph_skeleton.py +5 -5
- lsst/pipe/base/tests/mocks/_data_id_match.py +16 -0
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/RECORD +16 -16
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.2700.dist-info → lsst_pipe_base-29.2025.2900.dist-info}/zip-safe +0 -0
|
@@ -34,8 +34,9 @@ from __future__ import annotations
|
|
|
34
34
|
__all__ = ("AllDimensionsQuantumGraphBuilder", "DatasetQueryConstraintVariant")
|
|
35
35
|
|
|
36
36
|
import dataclasses
|
|
37
|
+
import itertools
|
|
37
38
|
from collections import defaultdict
|
|
38
|
-
from collections.abc import Iterable, Mapping
|
|
39
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
39
40
|
from typing import TYPE_CHECKING, Any, final
|
|
40
41
|
|
|
41
42
|
import astropy.table
|
|
@@ -44,10 +45,13 @@ from lsst.daf.butler import (
|
|
|
44
45
|
Butler,
|
|
45
46
|
DataCoordinate,
|
|
46
47
|
DimensionDataAttacher,
|
|
48
|
+
DimensionElement,
|
|
47
49
|
DimensionGroup,
|
|
48
50
|
DimensionRecordSet,
|
|
49
51
|
MissingDatasetTypeError,
|
|
52
|
+
SkyPixDimension,
|
|
50
53
|
)
|
|
54
|
+
from lsst.sphgeom import RangeSet
|
|
51
55
|
from lsst.utils.logging import LsstLogAdapter, PeriodicLogger
|
|
52
56
|
from lsst.utils.timer import timeMethod
|
|
53
57
|
|
|
@@ -132,14 +136,17 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
132
136
|
# be the same as or a dimension-subset of another. This is an
|
|
133
137
|
# optimization opportunity we're not currently taking advantage of.
|
|
134
138
|
tree = _DimensionGroupTree(subgraph)
|
|
139
|
+
tree.build(self.dataset_query_constraint, self.data_id_tables, log=self.log)
|
|
140
|
+
tree.pprint(printer=self.log.debug)
|
|
135
141
|
self._query_for_data_ids(tree)
|
|
142
|
+
dimension_records = self._fetch_most_dimension_records(tree)
|
|
143
|
+
tree.generate_data_ids(self.log)
|
|
136
144
|
skeleton = self._make_subgraph_skeleton(tree)
|
|
137
145
|
if not skeleton.has_any_quanta:
|
|
138
146
|
# QG is going to be empty; exit early not just for efficiency, but
|
|
139
147
|
# also so downstream code doesn't have to guard against this case.
|
|
140
148
|
return skeleton
|
|
141
149
|
self._find_followup_datasets(tree, skeleton)
|
|
142
|
-
dimension_records = self._fetch_most_dimension_records(tree)
|
|
143
150
|
self._attach_dimension_records(skeleton, dimension_records)
|
|
144
151
|
return skeleton
|
|
145
152
|
|
|
@@ -153,42 +160,14 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
153
160
|
Tree with dimension group branches that holds subgraph-specific
|
|
154
161
|
state for this builder, to be modified in place.
|
|
155
162
|
"""
|
|
156
|
-
self.log.debug("Analyzing subgraph dimensions and overall-inputs.")
|
|
157
|
-
constraint_datasets: set[str] = set()
|
|
158
|
-
self.log.debug("Building query for data IDs.")
|
|
159
|
-
if self.dataset_query_constraint == DatasetQueryConstraintVariant.ALL:
|
|
160
|
-
self.log.debug("Constraining graph query using all datasets not marked as deferred.")
|
|
161
|
-
constraint_datasets = {
|
|
162
|
-
name
|
|
163
|
-
for name, dataset_type_node in tree.overall_inputs.items()
|
|
164
|
-
if (dataset_type_node.is_initial_query_constraint and dataset_type_node.dimensions)
|
|
165
|
-
}
|
|
166
|
-
elif self.dataset_query_constraint == DatasetQueryConstraintVariant.OFF:
|
|
167
|
-
self.log.debug("Not using dataset existence to constrain query.")
|
|
168
|
-
elif self.dataset_query_constraint == DatasetQueryConstraintVariant.LIST:
|
|
169
|
-
constraint = set(self.dataset_query_constraint)
|
|
170
|
-
inputs = tree.overall_inputs - tree.empty_dimensions_branch.dataset_types.keys()
|
|
171
|
-
if remainder := constraint.difference(inputs):
|
|
172
|
-
self.log.debug(
|
|
173
|
-
"Ignoring dataset types %s in dataset query constraint that are not inputs to this "
|
|
174
|
-
"subgraph, on the assumption that they are relevant for a different subgraph.",
|
|
175
|
-
remainder,
|
|
176
|
-
)
|
|
177
|
-
constraint.intersection_update(inputs)
|
|
178
|
-
self.log.debug(f"Constraining graph query using {constraint}")
|
|
179
|
-
constraint_datasets = constraint
|
|
180
|
-
else:
|
|
181
|
-
raise QuantumGraphBuilderError(
|
|
182
|
-
f"Unable to handle type {self.dataset_query_constraint} given as datasetQueryConstraint."
|
|
183
|
-
)
|
|
184
163
|
query_cmd: list[str] = []
|
|
185
164
|
with self.butler.query() as query:
|
|
186
165
|
query_cmd.append("with butler.query() as query:")
|
|
187
|
-
query_cmd.append(f" query = query.join_dimensions({list(tree.
|
|
188
|
-
query = query.join_dimensions(tree.
|
|
189
|
-
if
|
|
166
|
+
query_cmd.append(f" query = query.join_dimensions({list(tree.queryable_dimensions.names)})")
|
|
167
|
+
query = query.join_dimensions(tree.queryable_dimensions)
|
|
168
|
+
if tree.dataset_constraint:
|
|
190
169
|
query_cmd.append(f" collections = {list(self.input_collections)}")
|
|
191
|
-
for dataset_type_name in
|
|
170
|
+
for dataset_type_name in tree.dataset_constraint:
|
|
192
171
|
query_cmd.append(f" query = query.join_dataset_search({dataset_type_name!r}, collections)")
|
|
193
172
|
try:
|
|
194
173
|
query = query.join_dataset_search(dataset_type_name, self.input_collections)
|
|
@@ -221,7 +200,7 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
221
200
|
# quickly as possible in case that holds a connection/cursor open.
|
|
222
201
|
n_rows = 0
|
|
223
202
|
progress_logger: PeriodicLogger | None = None
|
|
224
|
-
for common_data_id in query.data_ids(tree.
|
|
203
|
+
for common_data_id in query.data_ids(tree.queryable_dimensions):
|
|
225
204
|
if progress_logger is None:
|
|
226
205
|
# There can be a long wait between submitting the query and
|
|
227
206
|
# returning the first row, so we want to make sure we log
|
|
@@ -230,7 +209,7 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
230
209
|
# first log is seen.
|
|
231
210
|
self.log.info("Iterating over data ID query results.")
|
|
232
211
|
progress_logger = PeriodicLogger(self.log)
|
|
233
|
-
for branch_dimensions, branch in tree.
|
|
212
|
+
for branch_dimensions, branch in tree.queryable_branches.items():
|
|
234
213
|
data_id = common_data_id.subset(branch_dimensions)
|
|
235
214
|
branch.data_ids.add(data_id)
|
|
236
215
|
n_rows += 1
|
|
@@ -272,13 +251,20 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
272
251
|
Preliminary quantum graph.
|
|
273
252
|
"""
|
|
274
253
|
skeleton = QuantumGraphSkeleton(tree.subgraph.tasks)
|
|
275
|
-
for branch_dimensions, branch in tree.
|
|
254
|
+
for branch_dimensions, branch in tree.branches_by_dimensions.items():
|
|
255
|
+
self.log.verbose(
|
|
256
|
+
"Adding nodes for %s %s data ID(s).",
|
|
257
|
+
len(branch.data_ids),
|
|
258
|
+
branch_dimensions,
|
|
259
|
+
)
|
|
260
|
+
branch.update_skeleton_nodes(skeleton)
|
|
261
|
+
for branch_dimensions, branch in tree.branches_by_dimensions.items():
|
|
276
262
|
self.log.verbose(
|
|
277
|
-
"Adding
|
|
263
|
+
"Adding edges for %s %s data ID(s).",
|
|
278
264
|
len(branch.data_ids),
|
|
279
265
|
branch_dimensions,
|
|
280
266
|
)
|
|
281
|
-
branch.
|
|
267
|
+
branch.update_skeleton_edges(skeleton)
|
|
282
268
|
n_quanta = sum(len(skeleton.get_quanta(task_label)) for task_label in tree.subgraph.tasks)
|
|
283
269
|
self.log.info(
|
|
284
270
|
"Initial bipartite graph has %d quanta, %d dataset nodes, and %d edges.",
|
|
@@ -302,16 +288,18 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
302
288
|
In-progress quantum graph to modify in place.
|
|
303
289
|
"""
|
|
304
290
|
dataset_key: DatasetKey | PrerequisiteDatasetKey
|
|
305
|
-
for dataset_type_name in tree.empty_dimensions_branch.dataset_types.keys():
|
|
306
|
-
dataset_key = DatasetKey(dataset_type_name, self.empty_data_id.required_values)
|
|
307
|
-
if ref := self.empty_dimensions_datasets.inputs.get(dataset_key):
|
|
308
|
-
skeleton.set_dataset_ref(ref, dataset_key)
|
|
309
|
-
if ref := self.empty_dimensions_datasets.outputs_for_skip.get(dataset_key):
|
|
310
|
-
skeleton.set_output_for_skip(ref)
|
|
311
|
-
if ref := self.empty_dimensions_datasets.outputs_in_the_way.get(dataset_key):
|
|
312
|
-
skeleton.set_output_in_the_way(ref)
|
|
313
291
|
for dimensions, branch in tree.branches_by_dimensions.items():
|
|
314
|
-
if not
|
|
292
|
+
if not dimensions:
|
|
293
|
+
for dataset_type_name in branch.dataset_types.keys():
|
|
294
|
+
dataset_key = DatasetKey(dataset_type_name, self.empty_data_id.required_values)
|
|
295
|
+
if ref := self.empty_dimensions_datasets.inputs.get(dataset_key):
|
|
296
|
+
skeleton.set_dataset_ref(ref, dataset_key)
|
|
297
|
+
if ref := self.empty_dimensions_datasets.outputs_for_skip.get(dataset_key):
|
|
298
|
+
skeleton.set_output_for_skip(ref)
|
|
299
|
+
if ref := self.empty_dimensions_datasets.outputs_in_the_way.get(dataset_key):
|
|
300
|
+
skeleton.set_output_in_the_way(ref)
|
|
301
|
+
continue
|
|
302
|
+
if not branch.dataset_types and not branch.tasks:
|
|
315
303
|
continue
|
|
316
304
|
if not branch.data_ids:
|
|
317
305
|
continue
|
|
@@ -320,7 +308,7 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
320
308
|
with self.butler.query() as butler_query:
|
|
321
309
|
butler_query = butler_query.join_data_coordinates(branch.data_ids)
|
|
322
310
|
for dataset_type_node in branch.dataset_types.values():
|
|
323
|
-
if dataset_type_node.name
|
|
311
|
+
if tree.subgraph.producer_of(dataset_type_node.name) is None:
|
|
324
312
|
# Dataset type is an overall input; we always need to
|
|
325
313
|
# try to find these.
|
|
326
314
|
count = 0
|
|
@@ -457,9 +445,8 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
457
445
|
finder.dataset_type_node.name,
|
|
458
446
|
task_node.label,
|
|
459
447
|
)
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
del branch.data_ids
|
|
448
|
+
# Delete data ID sets we don't need anymore to save memory.
|
|
449
|
+
del branch.data_ids
|
|
463
450
|
|
|
464
451
|
@timeMethod
|
|
465
452
|
def _fetch_most_dimension_records(self, tree: _DimensionGroupTree) -> list[DimensionRecordSet]:
|
|
@@ -468,8 +455,9 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
468
455
|
|
|
469
456
|
Parameters
|
|
470
457
|
----------
|
|
471
|
-
|
|
472
|
-
|
|
458
|
+
tree : `_DimensionGroupTree`
|
|
459
|
+
Tree with dimension group branches that holds subgraph-specific
|
|
460
|
+
state for this builder.
|
|
473
461
|
|
|
474
462
|
Returns
|
|
475
463
|
-------
|
|
@@ -485,18 +473,15 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
485
473
|
self.log.verbose("Performing follow-up queries for dimension records.")
|
|
486
474
|
result: list[DimensionRecordSet] = []
|
|
487
475
|
for branch in tree.branches_by_dimensions.values():
|
|
488
|
-
if not branch.
|
|
476
|
+
if not branch.dimension_records:
|
|
489
477
|
continue
|
|
490
478
|
if not branch.data_ids:
|
|
491
479
|
continue
|
|
492
480
|
with self.butler.query() as butler_query:
|
|
493
481
|
butler_query = butler_query.join_data_coordinates(branch.data_ids)
|
|
494
|
-
for
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
element, butler_query.dimension_records(element), universe=self.universe
|
|
498
|
-
)
|
|
499
|
-
)
|
|
482
|
+
for record_set in branch.dimension_records:
|
|
483
|
+
record_set.update(butler_query.dimension_records(record_set.element.name))
|
|
484
|
+
result.append(record_set)
|
|
500
485
|
return result
|
|
501
486
|
|
|
502
487
|
@timeMethod
|
|
@@ -575,10 +560,8 @@ class _DimensionGroupBranch:
|
|
|
575
560
|
dataset type name.
|
|
576
561
|
"""
|
|
577
562
|
|
|
578
|
-
|
|
579
|
-
"""
|
|
580
|
-
these dimensions.
|
|
581
|
-
"""
|
|
563
|
+
dimension_records: list[DimensionRecordSet] = dataclasses.field(default_factory=list)
|
|
564
|
+
"""Sets of dimension records looked up with these dimensions."""
|
|
582
565
|
|
|
583
566
|
data_ids: set[DataCoordinate] = dataclasses.field(default_factory=set)
|
|
584
567
|
"""All data IDs with these dimensions seen in the QuantumGraph."""
|
|
@@ -599,7 +582,8 @@ class _DimensionGroupBranch:
|
|
|
599
582
|
|
|
600
583
|
branches: dict[DimensionGroup, _DimensionGroupBranch] = dataclasses.field(default_factory=dict)
|
|
601
584
|
"""Child branches whose dimensions are strict subsets of this branch's
|
|
602
|
-
dimensions.
|
|
585
|
+
dimensions, populated by projecting this branch's set of data IDs (i.e.
|
|
586
|
+
remove a dimension, then deduplicate).
|
|
603
587
|
"""
|
|
604
588
|
|
|
605
589
|
twigs: defaultdict[DimensionGroup, _DimensionGroupTwig] = dataclasses.field(
|
|
@@ -609,146 +593,16 @@ class _DimensionGroupBranch:
|
|
|
609
593
|
edge in `input_edges` or `output_edges`.
|
|
610
594
|
"""
|
|
611
595
|
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
""
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
@staticmethod
|
|
620
|
-
def populate_record_elements(
|
|
621
|
-
all_dimensions: DimensionGroup, branches: dict[DimensionGroup, _DimensionGroupBranch]
|
|
622
|
-
) -> None:
|
|
623
|
-
"""Ensure we have branches for all dimension elements we'll need to
|
|
624
|
-
fetch dimension records for.
|
|
625
|
-
|
|
626
|
-
Parameters
|
|
627
|
-
----------
|
|
628
|
-
all_dimensions : `~lsst.daf.butler.DimensionGroup`
|
|
629
|
-
All dimensions that appear in the quantum graph.
|
|
630
|
-
branches : `dict` [ `~lsst.daf.butler.DimensionGroup`,\
|
|
631
|
-
`_DimensionGroupBranch` ]
|
|
632
|
-
Flat mapping of all branches to update in-place. New branches may
|
|
633
|
-
be added and existing branches may have their `record_element`
|
|
634
|
-
attributes updated.
|
|
635
|
-
"""
|
|
636
|
-
for element_name in all_dimensions.elements:
|
|
637
|
-
element = all_dimensions.universe[element_name]
|
|
638
|
-
if element.minimal_group in branches:
|
|
639
|
-
branches[element.minimal_group].record_elements.append(element_name)
|
|
640
|
-
else:
|
|
641
|
-
branches[element.minimal_group] = _DimensionGroupBranch(record_elements=[element_name])
|
|
642
|
-
|
|
643
|
-
@staticmethod
|
|
644
|
-
def populate_edges(
|
|
645
|
-
pipeline_graph: PipelineGraph, branches: dict[DimensionGroup, _DimensionGroupBranch]
|
|
596
|
+
def pprint(
|
|
597
|
+
self,
|
|
598
|
+
dimensions: DimensionGroup,
|
|
599
|
+
indent: str = " ",
|
|
600
|
+
suffix: str = "",
|
|
601
|
+
printer: Callable[[str], None] = print,
|
|
646
602
|
) -> None:
|
|
647
|
-
""
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
----------
|
|
651
|
-
pipeline_graph : `~..pipeline_graph.PipelineGraph``
|
|
652
|
-
Graph of tasks and dataset types.
|
|
653
|
-
branches : `dict` [ `~lsst.daf.butler.DimensionGroup`,\
|
|
654
|
-
`_DimensionGroupBranch` ]
|
|
655
|
-
Flat mapping of all branches to update in-place. New branches may
|
|
656
|
-
be added and existing branches may have their `input_edges`,
|
|
657
|
-
`output_edges`, and `twigs` attributes updated.
|
|
658
|
-
"""
|
|
659
|
-
|
|
660
|
-
def update_edge_branch(
|
|
661
|
-
task_node: TaskNode, dataset_type_node: DatasetTypeNode
|
|
662
|
-
) -> _DimensionGroupBranch:
|
|
663
|
-
union_dimensions = task_node.dimensions.union(dataset_type_node.dimensions)
|
|
664
|
-
if (branch := branches.get(union_dimensions)) is None:
|
|
665
|
-
branch = _DimensionGroupBranch()
|
|
666
|
-
branches[union_dimensions] = branch
|
|
667
|
-
branch.twigs[dataset_type_node.dimensions].parent_edge_dataset_types.add(dataset_type_node.name)
|
|
668
|
-
branch.twigs[task_node.dimensions].parent_edge_tasks.add(task_node.label)
|
|
669
|
-
return branch
|
|
670
|
-
|
|
671
|
-
for task_node in pipeline_graph.tasks.values():
|
|
672
|
-
for dataset_type_node in pipeline_graph.inputs_of(task_node.label).values():
|
|
673
|
-
assert dataset_type_node is not None, "Pipeline graph is resolved."
|
|
674
|
-
if dataset_type_node.is_prerequisite:
|
|
675
|
-
continue
|
|
676
|
-
branch = update_edge_branch(task_node, dataset_type_node)
|
|
677
|
-
branch.input_edges.append((dataset_type_node.name, task_node.label))
|
|
678
|
-
for dataset_type_node in pipeline_graph.outputs_of(task_node.label).values():
|
|
679
|
-
assert dataset_type_node is not None, "Pipeline graph is resolved."
|
|
680
|
-
branch = update_edge_branch(task_node, dataset_type_node)
|
|
681
|
-
branch.output_edges.append((task_node.label, dataset_type_node.name))
|
|
682
|
-
|
|
683
|
-
@staticmethod
|
|
684
|
-
def find_next_uncontained_dimensions(
|
|
685
|
-
parent_dimensions: DimensionGroup | None, candidates: Iterable[DimensionGroup]
|
|
686
|
-
) -> list[DimensionGroup]:
|
|
687
|
-
"""Find dimension groups that are not a subset of any other dimension
|
|
688
|
-
groups in a set.
|
|
689
|
-
|
|
690
|
-
Parameters
|
|
691
|
-
----------
|
|
692
|
-
parent_dimensions : `~lsst.daf.butler.DimensionGroup` or `None`
|
|
693
|
-
If not `None`, first filter out any candidates that are not strict
|
|
694
|
-
subsets of these dimensions.
|
|
695
|
-
candidates : `~collections.abc.Iterable` [\
|
|
696
|
-
`~lsst.daf.butler.DimensionGroup` ]
|
|
697
|
-
Iterable of dimension groups to consider.
|
|
698
|
-
|
|
699
|
-
Returns
|
|
700
|
-
-------
|
|
701
|
-
uncontained : `list` [ `~lsst.daf.butler.DimensionGroup` ]
|
|
702
|
-
Dimension groups that are not contained by any other dimension
|
|
703
|
-
group in the set of filtered candidates.
|
|
704
|
-
"""
|
|
705
|
-
if parent_dimensions is None:
|
|
706
|
-
refined_candidates = candidates
|
|
707
|
-
else:
|
|
708
|
-
refined_candidates = [dimensions for dimensions in candidates if dimensions < parent_dimensions]
|
|
709
|
-
return [
|
|
710
|
-
dimensions
|
|
711
|
-
for dimensions in refined_candidates
|
|
712
|
-
if not any(dimensions < other for other in refined_candidates)
|
|
713
|
-
]
|
|
714
|
-
|
|
715
|
-
@classmethod
|
|
716
|
-
def populate_branches(
|
|
717
|
-
cls,
|
|
718
|
-
parent_dimensions: DimensionGroup | None,
|
|
719
|
-
branches: dict[DimensionGroup, _DimensionGroupBranch],
|
|
720
|
-
) -> dict[DimensionGroup, _DimensionGroupBranch]:
|
|
721
|
-
"""Transform a flat mapping of dimension group branches into a tree.
|
|
722
|
-
|
|
723
|
-
Parameters
|
|
724
|
-
----------
|
|
725
|
-
parent_dimensions : `~lsst.daf.butler.DimensionGroup` or `None`
|
|
726
|
-
If not `None`, ignore any candidates in `branches` that are not
|
|
727
|
-
strict subsets of these dimensions.
|
|
728
|
-
branches : `dict` [ `~lsst.daf.butler.DimensionGroup`,\
|
|
729
|
-
`_DimensionGroupBranch` ]
|
|
730
|
-
Flat mapping of all branches to update in-place, by populating
|
|
731
|
-
the `branches` attributes to form a tree and removing entries that
|
|
732
|
-
have been put into the tree.
|
|
733
|
-
|
|
734
|
-
Returns
|
|
735
|
-
-------
|
|
736
|
-
uncontained_branches : `dict` [ `~lsst.daf.butler.DimensionGroup`,\
|
|
737
|
-
`_DimensionGroupBranch` ]
|
|
738
|
-
Branches whose dimensions were not subsets of any others in the
|
|
739
|
-
mapping except those that were supersets of ``parent_dimensions``.
|
|
740
|
-
"""
|
|
741
|
-
result: dict[DimensionGroup, _DimensionGroupBranch] = {}
|
|
742
|
-
for parent_branch_dimensions in cls.find_next_uncontained_dimensions(
|
|
743
|
-
parent_dimensions, branches.keys()
|
|
744
|
-
):
|
|
745
|
-
parent_branch = branches.pop(parent_branch_dimensions)
|
|
746
|
-
result[parent_branch_dimensions] = parent_branch
|
|
747
|
-
for child_branch_dimensions, child_branch in cls.populate_branches(
|
|
748
|
-
parent_branch_dimensions, branches
|
|
749
|
-
).items():
|
|
750
|
-
parent_branch.branches[child_branch_dimensions] = child_branch
|
|
751
|
-
return result
|
|
603
|
+
printer(f"{indent}{dimensions}{suffix}")
|
|
604
|
+
for branch_dimensions, branch in self.branches.items():
|
|
605
|
+
branch.pprint(branch_dimensions, indent + " ", printer=printer)
|
|
752
606
|
|
|
753
607
|
def project_data_ids(self, log: LsstLogAdapter, log_indent: str = " ") -> None:
|
|
754
608
|
"""Populate the data ID sets of child branches from the data IDs in
|
|
@@ -766,12 +620,10 @@ class _DimensionGroupBranch:
|
|
|
766
620
|
for branch_dimensions, branch in self.branches.items():
|
|
767
621
|
branch.data_ids.add(data_id.subset(branch_dimensions))
|
|
768
622
|
for branch_dimensions, branch in self.branches.items():
|
|
769
|
-
log.
|
|
623
|
+
log.verbose("%sProjecting query data ID(s) to %s.", log_indent, branch_dimensions)
|
|
770
624
|
branch.project_data_ids(log, log_indent + " ")
|
|
771
625
|
|
|
772
|
-
def
|
|
773
|
-
self, skeleton: QuantumGraphSkeleton, log: LsstLogAdapter, log_indent: str = " "
|
|
774
|
-
) -> None:
|
|
626
|
+
def update_skeleton_nodes(self, skeleton: QuantumGraphSkeleton) -> None:
|
|
775
627
|
"""Process the data ID sets of this branch and its children recursively
|
|
776
628
|
to add nodes and edges to the under-construction quantum graph.
|
|
777
629
|
|
|
@@ -779,25 +631,23 @@ class _DimensionGroupBranch:
|
|
|
779
631
|
----------
|
|
780
632
|
skeleton : `QuantumGraphSkeleton`
|
|
781
633
|
Under-construction quantum graph to modify in place.
|
|
782
|
-
log : `lsst.logging.LsstLogAdapter`
|
|
783
|
-
Logger to use for status reporting.
|
|
784
|
-
log_indent : `str`, optional
|
|
785
|
-
Indentation to prefix the log message. This is used when recursing
|
|
786
|
-
to make the branch structure clear.
|
|
787
634
|
"""
|
|
788
|
-
for branch_dimensions, branch in self.branches.items():
|
|
789
|
-
log.verbose(
|
|
790
|
-
"%sAdding nodes and edges for %s %s data ID(s).",
|
|
791
|
-
log_indent,
|
|
792
|
-
len(branch.data_ids),
|
|
793
|
-
branch_dimensions,
|
|
794
|
-
)
|
|
795
|
-
branch.update_skeleton(skeleton, log, log_indent + " ")
|
|
796
635
|
for data_id in self.data_ids:
|
|
797
636
|
for task_label in self.tasks:
|
|
798
637
|
skeleton.add_quantum_node(task_label, data_id)
|
|
799
638
|
for dataset_type_name in self.dataset_types:
|
|
800
639
|
skeleton.add_dataset_node(dataset_type_name, data_id)
|
|
640
|
+
|
|
641
|
+
def update_skeleton_edges(self, skeleton: QuantumGraphSkeleton) -> None:
|
|
642
|
+
"""Process the data ID sets of this branch and its children recursively
|
|
643
|
+
to add nodes and edges to the under-construction quantum graph.
|
|
644
|
+
|
|
645
|
+
Parameters
|
|
646
|
+
----------
|
|
647
|
+
skeleton : `QuantumGraphSkeleton`
|
|
648
|
+
Under-construction quantum graph to modify in place.
|
|
649
|
+
"""
|
|
650
|
+
for data_id in self.data_ids:
|
|
801
651
|
quantum_keys: dict[str, QuantumKey] = {}
|
|
802
652
|
dataset_keys: dict[str, DatasetKey] = {}
|
|
803
653
|
for twig_dimensions, twig in self.twigs.items():
|
|
@@ -812,7 +662,7 @@ class _DimensionGroupBranch:
|
|
|
812
662
|
skeleton.add_input_edge(quantum_keys[task_label], dataset_keys[dataset_type_name])
|
|
813
663
|
for task_label, dataset_type_name in self.output_edges:
|
|
814
664
|
skeleton.add_output_edge(quantum_keys[task_label], dataset_keys[dataset_type_name])
|
|
815
|
-
if not self.
|
|
665
|
+
if not self.dataset_types and not self.tasks:
|
|
816
666
|
# Delete data IDs we don't need anymore to save memory.
|
|
817
667
|
del self.data_ids
|
|
818
668
|
|
|
@@ -842,15 +692,18 @@ class _DimensionGroupTree:
|
|
|
842
692
|
dimensions are those dimensions;
|
|
843
693
|
- if there is a dimension element in any task or non-prerequisite dataset
|
|
844
694
|
type dimensions whose `~lsst.daf.butler.DimensionElement.minimal_group`
|
|
845
|
-
is those dimensions.
|
|
695
|
+
is those dimensions (allowing us to look up dimension records).
|
|
696
|
+
|
|
697
|
+
In addition, for any dimension group that has unqueryable dimensions (e.g.
|
|
698
|
+
non-common skypix dimensions, like healpix), we create a branch for the
|
|
699
|
+
subset of the group with only queryable dimensions.
|
|
846
700
|
|
|
847
701
|
We process the initial data query by recursing through this tree structure
|
|
848
702
|
to populate a data ID set for each branch
|
|
849
|
-
(`_DimensionGroupBranch.project_data_ids`), and then process those sets
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
to smaller sets of dimensions.
|
|
703
|
+
(`_DimensionGroupBranch.project_data_ids`), and then process those sets.
|
|
704
|
+
This can be far faster than the non-recursive processing the QG builder
|
|
705
|
+
used to use because the set of data IDs is smaller (sometimes dramatically
|
|
706
|
+
smaller) as we move to smaller sets of dimensions.
|
|
854
707
|
|
|
855
708
|
In addition to their child branches, a branch that is used to define graph
|
|
856
709
|
edges also has "twigs", which are a flatter set of dimension subsets for
|
|
@@ -867,31 +720,35 @@ class _DimensionGroupTree:
|
|
|
867
720
|
(non-prerequisite) dataset type in this subgraph.
|
|
868
721
|
"""
|
|
869
722
|
|
|
870
|
-
|
|
871
|
-
"""
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
Prerequisite dataset types are not included.
|
|
875
|
-
"""
|
|
876
|
-
|
|
877
|
-
trunk_branches: dict[DimensionGroup, _DimensionGroupBranch] = dataclasses.field(init=False)
|
|
878
|
-
"""The top-level branches in the tree of dimension groups.
|
|
723
|
+
queryable_dimensions: DimensionGroup = dataclasses.field(init=False)
|
|
724
|
+
"""All dimensions except those that cannot be queried for directly via the
|
|
725
|
+
butler (e.g. skypix systems other than the common one).
|
|
879
726
|
"""
|
|
880
727
|
|
|
881
728
|
branches_by_dimensions: dict[DimensionGroup, _DimensionGroupBranch] = dataclasses.field(init=False)
|
|
882
729
|
"""The tasks and dataset types of this subset of the pipeline, grouped
|
|
883
730
|
by their dimensions.
|
|
731
|
+
"""
|
|
732
|
+
|
|
733
|
+
dataset_constraint: set[str] = dataclasses.field(default_factory=set)
|
|
734
|
+
"""The names of dataset types used as query constraints."""
|
|
884
735
|
|
|
885
|
-
|
|
886
|
-
in
|
|
887
|
-
|
|
736
|
+
queryable_branches: dict[DimensionGroup, _DimensionGroupBranch] = dataclasses.field(default_factory=dict)
|
|
737
|
+
"""The top-level branches in the tree of dimension groups populated by the
|
|
738
|
+
butler query.
|
|
888
739
|
|
|
889
|
-
|
|
740
|
+
Data IDs in these branches are populated from the top down, with each
|
|
741
|
+
branch a projection ("remove dimension, then deduplicate") of its parent,
|
|
742
|
+
starting with the query result rows.
|
|
890
743
|
"""
|
|
891
744
|
|
|
892
|
-
|
|
893
|
-
"""
|
|
894
|
-
|
|
745
|
+
generators: list[DataIdGenerator] = dataclasses.field(default_factory=list)
|
|
746
|
+
"""Branches for dimensions groups that are populated by algorithmically
|
|
747
|
+
generating data IDs from those in one or more other branches.
|
|
748
|
+
|
|
749
|
+
These are typically variants on the theme of adding a skypix dimension to
|
|
750
|
+
another set of dimensions by identifying the sky pixels that overlap the
|
|
751
|
+
region of the original dimensions.
|
|
895
752
|
"""
|
|
896
753
|
|
|
897
754
|
def __post_init__(self) -> None:
|
|
@@ -902,29 +759,751 @@ class _DimensionGroupTree:
|
|
|
902
759
|
for dimensions, (tasks, dataset_types) in self.subgraph.group_by_dimensions().items()
|
|
903
760
|
}
|
|
904
761
|
self.all_dimensions = DimensionGroup.union(*self.branches_by_dimensions.keys(), universe=universe)
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
self
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
762
|
+
|
|
763
|
+
def build(
|
|
764
|
+
self,
|
|
765
|
+
requested: DatasetQueryConstraintVariant,
|
|
766
|
+
data_id_tables: Iterable[astropy.table.Table],
|
|
767
|
+
*,
|
|
768
|
+
log: LsstLogAdapter,
|
|
769
|
+
) -> None:
|
|
770
|
+
"""Organize the branches into a tree.
|
|
771
|
+
|
|
772
|
+
Parameters
|
|
773
|
+
----------
|
|
774
|
+
requested : `DatasetQueryConstraintVariant`
|
|
775
|
+
Query constraint specified by the user.
|
|
776
|
+
data_id_tables : `~collections.abc.Iterable` [ `astropy.table.Table` ]
|
|
777
|
+
Data ID tables being joined into the query.
|
|
778
|
+
log : `lsst.log.LsstLogAdapter`
|
|
779
|
+
Logger that supports ``verbose`` output.
|
|
780
|
+
"""
|
|
781
|
+
universe = self.all_dimensions.universe
|
|
782
|
+
self._make_dimension_record_branches()
|
|
783
|
+
self._make_edge_branches()
|
|
784
|
+
self._set_dataset_constraint(requested, log)
|
|
785
|
+
# Work out which dimensions we can potentially query the database for.
|
|
786
|
+
# We start out by dropping all skypix dimensions other than the common
|
|
787
|
+
# one, and then we add them back in if a constraint dataset type or
|
|
788
|
+
# data ID table provides them.
|
|
789
|
+
unqueryable_skypix = universe.conform(self.all_dimensions.skypix - {universe.commonSkyPix.name})
|
|
790
|
+
self.queryable_dimensions = self.all_dimensions.difference(unqueryable_skypix)
|
|
791
|
+
for dataset_type_name in sorted(self.dataset_constraint):
|
|
792
|
+
dataset_type_dimensions = self.subgraph.dataset_types[dataset_type_name].dimensions
|
|
793
|
+
dataset_type_skypix = dataset_type_dimensions.intersection(unqueryable_skypix)
|
|
794
|
+
if dataset_type_skypix:
|
|
795
|
+
log.info(
|
|
796
|
+
f"Including {dataset_type_skypix} in the set of dimensions to query via "
|
|
797
|
+
f"{dataset_type_name}. If this query fails, exclude those dataset type "
|
|
798
|
+
"from the constraint or provide a data ID table for missing spatial joins."
|
|
799
|
+
)
|
|
800
|
+
self.queryable_dimensions = self.queryable_dimensions.union(dataset_type_dimensions)
|
|
801
|
+
for data_id_table in data_id_tables:
|
|
802
|
+
table_dimensions = universe.conform(data_id_table.colnames)
|
|
803
|
+
if table_dimensions.skypix:
|
|
804
|
+
self.queryable_dimensions = self.queryable_dimensions.union(table_dimensions)
|
|
805
|
+
# Set up the tree to generate most data IDs by querying for them from
|
|
806
|
+
# the database and then projecting to subset dimensions.
|
|
807
|
+
branches_not_in_tree = set(self.branches_by_dimensions.keys())
|
|
808
|
+
self._make_queryable_branch_tree(branches_not_in_tree)
|
|
809
|
+
# Try to find ways to generate other data IDs directly from the
|
|
810
|
+
# queryable branches.
|
|
811
|
+
self._make_queryable_overlap_branch_generators(branches_not_in_tree)
|
|
812
|
+
# As long as there are still branches that haven't been inserted into
|
|
813
|
+
# the tree, try to add them as projections of generated branches or
|
|
814
|
+
# generators on generated branches.
|
|
815
|
+
while branches_not_in_tree:
|
|
816
|
+
# Look for projections first, since those are more efficient, and
|
|
817
|
+
# some may be available after we've added some generators.
|
|
818
|
+
# We intentionally add the same branch as a projection of multiple
|
|
819
|
+
# parents since (unlike queryable dimensions) there's no guarantee
|
|
820
|
+
# that each parent branch's data IDs would project to the same set
|
|
821
|
+
# (e.g. a visit-healpix overlap may yield different healpixels than
|
|
822
|
+
# a patch-healpix overlap, even if the visits and patches overlap).
|
|
823
|
+
for target_dimensions in sorted(branches_not_in_tree):
|
|
824
|
+
for generator in self.generators:
|
|
825
|
+
if self._maybe_insert_projection_branch(
|
|
826
|
+
target_dimensions, generator.dimensions, generator.branch.branches
|
|
827
|
+
):
|
|
828
|
+
branches_not_in_tree.discard(target_dimensions)
|
|
829
|
+
if not self._make_general_overlap_branch_generator(branches_not_in_tree):
|
|
830
|
+
break
|
|
831
|
+
# After we've exhausted overlap generation, try generation via joins
|
|
832
|
+
# of dimensions we can already query for or generate.
|
|
833
|
+
while branches_not_in_tree:
|
|
834
|
+
if not self._make_join_branch_generator(branches_not_in_tree):
|
|
835
|
+
raise QuantumGraphBuilderError(f"Could not generate data IDs for {branches_not_in_tree}.")
|
|
836
|
+
|
|
837
|
+
def _set_dataset_constraint(self, requested: DatasetQueryConstraintVariant, log: LsstLogAdapter) -> None:
|
|
838
|
+
"""Set the dataset query constraint.
|
|
839
|
+
|
|
840
|
+
Parameters
|
|
841
|
+
----------
|
|
842
|
+
requested : `DatasetQueryConstraintVariant`
|
|
843
|
+
Query constraint specified by the user.
|
|
844
|
+
log : `lsst.log.LsstLogAdapter`
|
|
845
|
+
Logger that supports ``verbose`` output.
|
|
846
|
+
"""
|
|
847
|
+
overall_inputs: dict[str, DatasetTypeNode] = {
|
|
914
848
|
name: node # type: ignore
|
|
915
849
|
for name, node in self.subgraph.iter_overall_inputs()
|
|
916
850
|
if not node.is_prerequisite # type: ignore
|
|
917
851
|
}
|
|
852
|
+
match requested:
|
|
853
|
+
case DatasetQueryConstraintVariant.ALL:
|
|
854
|
+
self.dataset_constraint = {
|
|
855
|
+
name
|
|
856
|
+
for name, dataset_type_node in overall_inputs.items()
|
|
857
|
+
if (dataset_type_node.is_initial_query_constraint and dataset_type_node.dimensions)
|
|
858
|
+
}
|
|
859
|
+
case DatasetQueryConstraintVariant.OFF:
|
|
860
|
+
pass
|
|
861
|
+
case DatasetQueryConstraintVariant.LIST:
|
|
862
|
+
self.dataset_constraint = set(requested)
|
|
863
|
+
inputs = {
|
|
864
|
+
name for name, dataset_type_node in overall_inputs.items() if dataset_type_node.dimensions
|
|
865
|
+
}
|
|
866
|
+
if remainder := self.dataset_constraint.difference(inputs):
|
|
867
|
+
log.verbose(
|
|
868
|
+
"Ignoring dataset types %s in dataset query constraint that are not inputs to this "
|
|
869
|
+
"subgraph, on the assumption that they are relevant for a different subgraph.",
|
|
870
|
+
remainder,
|
|
871
|
+
)
|
|
872
|
+
self.dataset_constraint.intersection_update(inputs)
|
|
873
|
+
case _:
|
|
874
|
+
raise QuantumGraphBuilderError(
|
|
875
|
+
f"Unable to handle type {requested} given as dataset query constraint."
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
def _make_dimension_record_branches(self) -> None:
|
|
879
|
+
"""Ensure we have branches for all dimension elements we'll need to
|
|
880
|
+
fetch dimension records for.
|
|
881
|
+
"""
|
|
882
|
+
for element_name in self.all_dimensions.elements:
|
|
883
|
+
element = self.all_dimensions.universe[element_name]
|
|
884
|
+
record_set = DimensionRecordSet(element_name, universe=self.all_dimensions.universe)
|
|
885
|
+
if element.minimal_group in self.branches_by_dimensions:
|
|
886
|
+
self.branches_by_dimensions[element.minimal_group].dimension_records.append(record_set)
|
|
887
|
+
else:
|
|
888
|
+
self.branches_by_dimensions[element.minimal_group] = _DimensionGroupBranch(
|
|
889
|
+
dimension_records=[record_set]
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
def _make_edge_branches(self) -> None:
|
|
893
|
+
"""Ensure we have branches for all edges in the graph."""
|
|
894
|
+
|
|
895
|
+
def update_edge_branch(
|
|
896
|
+
task_node: TaskNode, dataset_type_node: DatasetTypeNode
|
|
897
|
+
) -> _DimensionGroupBranch:
|
|
898
|
+
union_dimensions = task_node.dimensions.union(dataset_type_node.dimensions)
|
|
899
|
+
if (branch := self.branches_by_dimensions.get(union_dimensions)) is None:
|
|
900
|
+
branch = _DimensionGroupBranch()
|
|
901
|
+
self.branches_by_dimensions[union_dimensions] = branch
|
|
902
|
+
branch.twigs[dataset_type_node.dimensions].parent_edge_dataset_types.add(dataset_type_node.name)
|
|
903
|
+
branch.twigs[task_node.dimensions].parent_edge_tasks.add(task_node.label)
|
|
904
|
+
return branch
|
|
905
|
+
|
|
906
|
+
for task_node in self.subgraph.tasks.values():
|
|
907
|
+
for dataset_type_node in self.subgraph.inputs_of(task_node.label).values():
|
|
908
|
+
assert dataset_type_node is not None, "Pipeline graph is resolved."
|
|
909
|
+
if dataset_type_node.is_prerequisite:
|
|
910
|
+
continue
|
|
911
|
+
branch = update_edge_branch(task_node, dataset_type_node)
|
|
912
|
+
branch.input_edges.append((dataset_type_node.name, task_node.label))
|
|
913
|
+
for dataset_type_node in self.subgraph.outputs_of(task_node.label).values():
|
|
914
|
+
assert dataset_type_node is not None, "Pipeline graph is resolved."
|
|
915
|
+
branch = update_edge_branch(task_node, dataset_type_node)
|
|
916
|
+
branch.output_edges.append((task_node.label, dataset_type_node.name))
|
|
917
|
+
|
|
918
|
+
def _make_queryable_branch_tree(self, branches_not_in_tree: set[DimensionGroup]) -> None:
|
|
919
|
+
"""Assemble the branches with queryable dimensions into a tree, in
|
|
920
|
+
which each branch has a subset of the dimensions of its parent.
|
|
921
|
+
|
|
922
|
+
Parameters
|
|
923
|
+
----------
|
|
924
|
+
branches_not_in_tree : `set` [ `lsst.daf.butler.DimensionGroup` ]
|
|
925
|
+
Dimensions that have not yet been inserted into the tree. Updated
|
|
926
|
+
in place.
|
|
927
|
+
"""
|
|
928
|
+
for target_dimensions in sorted(branches_not_in_tree):
|
|
929
|
+
if target_dimensions.issubset(self.queryable_dimensions):
|
|
930
|
+
if self._maybe_insert_projection_branch(
|
|
931
|
+
target_dimensions, self.queryable_dimensions, self.queryable_branches
|
|
932
|
+
):
|
|
933
|
+
branches_not_in_tree.remove(target_dimensions)
|
|
934
|
+
else:
|
|
935
|
+
raise AssertionError(
|
|
936
|
+
"Projection-branch insertion should not fail for queryable dimensions."
|
|
937
|
+
)
|
|
938
|
+
|
|
939
|
+
def _maybe_insert_projection_branch(
|
|
940
|
+
self,
|
|
941
|
+
target_dimensions: DimensionGroup,
|
|
942
|
+
candidate_dimensions: DimensionGroup,
|
|
943
|
+
candidate_projection_branches: dict[DimensionGroup, _DimensionGroupBranch],
|
|
944
|
+
) -> bool:
|
|
945
|
+
"""Insert a branch at the appropriate location in a [sub]tree.
|
|
946
|
+
|
|
947
|
+
Branches are inserted below the first parent branch whose dimensions
|
|
948
|
+
are a superset of their own.
|
|
949
|
+
|
|
950
|
+
Parameters
|
|
951
|
+
----------
|
|
952
|
+
target_dimensions : `lsst.daf.butler.DimensionGroup`
|
|
953
|
+
Dimensions of the branch to be inserted.
|
|
954
|
+
candidate_dimensions : `lsst.daf.butler.DimensionGroup`
|
|
955
|
+
Dimensions of the subtree the branch might be inserted under. If
|
|
956
|
+
this is not a superset of ``target_dimensions``, this method
|
|
957
|
+
returns `False` and nothing is done.
|
|
958
|
+
candidate_projection_branches : `dict` [ \
|
|
959
|
+
`lsst.daf.butler.DimensionGroup`, `_DimensionGroupBranch` ]
|
|
960
|
+
Subtree branches to be updated directly or indirectly (i.e. in a
|
|
961
|
+
nested branch).
|
|
962
|
+
|
|
963
|
+
Returns
|
|
964
|
+
-------
|
|
965
|
+
inserted : `bool`
|
|
966
|
+
Whether the branch was actually inserted.
|
|
967
|
+
"""
|
|
968
|
+
if candidate_dimensions >= target_dimensions:
|
|
969
|
+
target_branch = self.branches_by_dimensions[target_dimensions]
|
|
970
|
+
for child_dimensions in list(candidate_projection_branches.keys()):
|
|
971
|
+
if self._maybe_insert_projection_branch(
|
|
972
|
+
child_dimensions, target_dimensions, target_branch.branches
|
|
973
|
+
):
|
|
974
|
+
del candidate_projection_branches[child_dimensions]
|
|
975
|
+
for child_dimensions, child_branch in candidate_projection_branches.items():
|
|
976
|
+
if self._maybe_insert_projection_branch(
|
|
977
|
+
target_dimensions, child_dimensions, child_branch.branches
|
|
978
|
+
):
|
|
979
|
+
return True
|
|
980
|
+
candidate_projection_branches[target_dimensions] = target_branch
|
|
981
|
+
return True
|
|
982
|
+
return False
|
|
983
|
+
|
|
984
|
+
def _make_queryable_overlap_branch_generators(self, branches_not_in_tree: set[DimensionGroup]) -> None:
|
|
985
|
+
"""Add data ID generators for sets of dimensions that can only
|
|
986
|
+
partially queried for, with the rest needing to be generated by
|
|
987
|
+
manipulating the data IDs of the queryable subset.
|
|
988
|
+
|
|
989
|
+
Parameters
|
|
990
|
+
----------
|
|
991
|
+
branches_not_in_tree : `set` [ `lsst.daf.butler.DimensionGroup` ]
|
|
992
|
+
Dimensions that have not yet been inserted into the tree. Updated
|
|
993
|
+
in place.
|
|
994
|
+
"""
|
|
995
|
+
for target_dimensions in sorted(branches_not_in_tree):
|
|
996
|
+
queryable_subset_dimensions = target_dimensions.intersection(self.queryable_dimensions)
|
|
997
|
+
# Make sure we actually have a branch to capture the queryable
|
|
998
|
+
# subset data IDs (i.e. in case we didn't already have one for some
|
|
999
|
+
# dataset type or task, etc).
|
|
1000
|
+
if queryable_subset_dimensions not in self.branches_by_dimensions:
|
|
1001
|
+
# If we have to make a new queryable branch, we also have to
|
|
1002
|
+
# insert it into the tree so its data IDs get populated.
|
|
1003
|
+
self.branches_by_dimensions[queryable_subset_dimensions] = _DimensionGroupBranch()
|
|
1004
|
+
if not self._maybe_insert_projection_branch(
|
|
1005
|
+
queryable_subset_dimensions,
|
|
1006
|
+
self.queryable_dimensions,
|
|
1007
|
+
self.queryable_branches,
|
|
1008
|
+
):
|
|
1009
|
+
raise AssertionError(
|
|
1010
|
+
"Projection-branch insertion should not fail for queryable dimensions."
|
|
1011
|
+
)
|
|
1012
|
+
if queryable_region_name := queryable_subset_dimensions.region_dimension:
|
|
1013
|
+
# If there is a single well-defined region for the queryable
|
|
1014
|
+
# subset, we can potentially generate skypix IDs from it.
|
|
1015
|
+
# Do the target dimensions just add a single skypix dimension
|
|
1016
|
+
# to the queryable subset?
|
|
1017
|
+
remainder_dimensions = target_dimensions - queryable_subset_dimensions
|
|
1018
|
+
if (remainder_skypix := get_single_skypix(remainder_dimensions)) is not None:
|
|
1019
|
+
queryable_region_element = target_dimensions.universe[queryable_region_name]
|
|
1020
|
+
self._append_data_id_generator(
|
|
1021
|
+
queryable_subset_dimensions,
|
|
1022
|
+
queryable_region_element,
|
|
1023
|
+
target_dimensions,
|
|
1024
|
+
remainder_skypix,
|
|
1025
|
+
branches_not_in_tree,
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
def _append_data_id_generator(
|
|
1029
|
+
self,
|
|
1030
|
+
source_dimensions: DimensionGroup,
|
|
1031
|
+
source_region_element: DimensionElement,
|
|
1032
|
+
target_dimensions: DimensionGroup,
|
|
1033
|
+
remainder_skypix: SkyPixDimension,
|
|
1034
|
+
branches_not_in_tree: set[DimensionGroup],
|
|
1035
|
+
) -> None:
|
|
1036
|
+
"""Append an appropriate `DataIdGenerator` instance for generating
|
|
1037
|
+
data IDs with the given characteristics.
|
|
1038
|
+
|
|
1039
|
+
Parameters
|
|
1040
|
+
----------
|
|
1041
|
+
source_dimensions : `lsst.daf.butler.DimensionGroup`
|
|
1042
|
+
Dimensions whose data IDs can already populated, to use as a
|
|
1043
|
+
starting point.
|
|
1044
|
+
source_region_element : `lsst.daf.butler.DimensionElement`
|
|
1045
|
+
Dimension element associated with the region for the source
|
|
1046
|
+
dimensions. It is guaranteed that there is exactly one such
|
|
1047
|
+
region.
|
|
1048
|
+
target_dimensions : `lsst.daf.butler.DimensionGroup`
|
|
1049
|
+
Dimensions of the data IDs to be generated.
|
|
1050
|
+
remainder_skypix : `lsst.daf.butler.SkyPixDimension`
|
|
1051
|
+
The single skypix dimension that is being added to
|
|
1052
|
+
``source_dimensions`` to yield ``target_dimensions``.
|
|
1053
|
+
branches_not_in_tree : `set` [ `lsst.daf.butler.DimensionGroup` ]
|
|
1054
|
+
Dimensions that have not yet been inserted into the tree. Updated
|
|
1055
|
+
in place.
|
|
1056
|
+
"""
|
|
1057
|
+
target_branch = self.branches_by_dimensions[target_dimensions]
|
|
1058
|
+
# We want to do the overlap calculation without any extra dimensions
|
|
1059
|
+
# beyond the two spatial dimensions, which may or may not be what we
|
|
1060
|
+
# already have.
|
|
1061
|
+
overlap_dimensions = source_region_element.minimal_group | remainder_skypix.minimal_group
|
|
1062
|
+
generator: DataIdGenerator
|
|
1063
|
+
if overlap_dimensions == target_dimensions:
|
|
1064
|
+
if isinstance(source_region_element, SkyPixDimension):
|
|
1065
|
+
if source_region_element.system == remainder_skypix.system:
|
|
1066
|
+
if source_region_element.level > remainder_skypix.level:
|
|
1067
|
+
generator = SkyPixGatherDataIdGenerator(
|
|
1068
|
+
target_branch,
|
|
1069
|
+
target_dimensions,
|
|
1070
|
+
source_dimensions,
|
|
1071
|
+
remainder_skypix,
|
|
1072
|
+
source_region_element,
|
|
1073
|
+
)
|
|
1074
|
+
else:
|
|
1075
|
+
generator = SkyPixScatterDataIdGenerator(
|
|
1076
|
+
target_branch,
|
|
1077
|
+
target_dimensions,
|
|
1078
|
+
source_dimensions,
|
|
1079
|
+
remainder_skypix,
|
|
1080
|
+
source_region_element,
|
|
1081
|
+
)
|
|
1082
|
+
else:
|
|
1083
|
+
generator = CrossSystemDataIdGenerator(
|
|
1084
|
+
target_branch,
|
|
1085
|
+
target_dimensions,
|
|
1086
|
+
source_dimensions,
|
|
1087
|
+
remainder_skypix,
|
|
1088
|
+
source_region_element,
|
|
1089
|
+
)
|
|
1090
|
+
else:
|
|
1091
|
+
generator = DatabaseSourceDataIdGenerator(
|
|
1092
|
+
target_branch,
|
|
1093
|
+
target_dimensions,
|
|
1094
|
+
source_dimensions,
|
|
1095
|
+
remainder_skypix,
|
|
1096
|
+
source_region_element,
|
|
1097
|
+
)
|
|
1098
|
+
# We know we can populate the data IDs in remainder_skypix_branch
|
|
1099
|
+
# from the target branch by projection. Even if it's already
|
|
1100
|
+
# populated by some other generated branch, we want to populate it
|
|
1101
|
+
# again in case that picks up additional sky pixels.
|
|
1102
|
+
target_branch.branches[remainder_skypix.minimal_group] = self.branches_by_dimensions[
|
|
1103
|
+
remainder_skypix.minimal_group
|
|
1104
|
+
]
|
|
1105
|
+
branches_not_in_tree.discard(remainder_skypix.minimal_group)
|
|
1106
|
+
else:
|
|
1107
|
+
if overlap_dimensions not in self.branches_by_dimensions:
|
|
1108
|
+
self.branches_by_dimensions[overlap_dimensions] = _DimensionGroupBranch()
|
|
1109
|
+
branches_not_in_tree.add(overlap_dimensions)
|
|
1110
|
+
self._append_data_id_generator(
|
|
1111
|
+
source_region_element.minimal_group,
|
|
1112
|
+
source_region_element,
|
|
1113
|
+
overlap_dimensions,
|
|
1114
|
+
remainder_skypix,
|
|
1115
|
+
branches_not_in_tree,
|
|
1116
|
+
)
|
|
1117
|
+
generator = JoinDataIdGenerator(
|
|
1118
|
+
target_branch,
|
|
1119
|
+
target_dimensions,
|
|
1120
|
+
source_dimensions,
|
|
1121
|
+
overlap_dimensions,
|
|
1122
|
+
)
|
|
1123
|
+
self.generators.append(generator)
|
|
1124
|
+
branches_not_in_tree.remove(target_dimensions)
|
|
1125
|
+
|
|
1126
|
+
def _make_general_overlap_branch_generator(self, branches_not_in_tree: set[DimensionGroup]) -> bool:
|
|
1127
|
+
"""Add data ID generators for sets of dimensions that can be generated
|
|
1128
|
+
via skypix envelopes of other generated data IDs.
|
|
1129
|
+
|
|
1130
|
+
This method should be called in a loop until it returns `False`
|
|
1131
|
+
(indicating no progress was made) or ``branches_not_in_tree`` is empty
|
|
1132
|
+
(indicating no more work to be done).
|
|
1133
|
+
|
|
1134
|
+
Parameters
|
|
1135
|
+
----------
|
|
1136
|
+
branches_not_in_tree : `set` [ `lsst.daf.butler.DimensionGroup` ]
|
|
1137
|
+
Dimensions that have not yet been inserted into the tree. Updated
|
|
1138
|
+
in place.
|
|
1139
|
+
|
|
1140
|
+
Returns
|
|
1141
|
+
-------
|
|
1142
|
+
appended : `bool`
|
|
1143
|
+
Whether a new data ID generator was successfully appended.
|
|
1144
|
+
"""
|
|
1145
|
+
dimensions_done = sorted(self.branches_by_dimensions.keys() - branches_not_in_tree)
|
|
1146
|
+
for source_dimensions in dimensions_done:
|
|
1147
|
+
for target_dimensions in sorted(branches_not_in_tree):
|
|
1148
|
+
if not source_dimensions <= target_dimensions:
|
|
1149
|
+
continue
|
|
1150
|
+
remainder_dimensions = target_dimensions - source_dimensions
|
|
1151
|
+
if (remainder_skypix := get_single_skypix(remainder_dimensions)) is not None:
|
|
1152
|
+
if source_region_name := source_dimensions.region_dimension:
|
|
1153
|
+
# If the target dimensions are just adding a single
|
|
1154
|
+
# skypix to the source dimensions and the source
|
|
1155
|
+
# dimensions have a single region column, we can
|
|
1156
|
+
# generate the skypix indices from the envelopes of
|
|
1157
|
+
# those regions.
|
|
1158
|
+
source_region_element = source_dimensions.universe[source_region_name]
|
|
1159
|
+
self._append_data_id_generator(
|
|
1160
|
+
source_dimensions,
|
|
1161
|
+
source_region_element,
|
|
1162
|
+
target_dimensions,
|
|
1163
|
+
remainder_skypix,
|
|
1164
|
+
branches_not_in_tree,
|
|
1165
|
+
)
|
|
1166
|
+
return True
|
|
1167
|
+
return not branches_not_in_tree
|
|
1168
|
+
|
|
1169
|
+
def _make_join_branch_generator(self, branches_not_in_tree: set[DimensionGroup]) -> bool:
|
|
1170
|
+
"""Add data ID generators for sets of dimensions that can be generated
|
|
1171
|
+
via inner joints of other generated data IDs.
|
|
1172
|
+
|
|
1173
|
+
This method should be called in a loop until it returns `False`
|
|
1174
|
+
(indicating no progress was made) or ``branches_not_in_tree`` is empty
|
|
1175
|
+
(indicating no more work to be done).
|
|
1176
|
+
|
|
1177
|
+
Parameters
|
|
1178
|
+
----------
|
|
1179
|
+
branches_not_in_tree : `set` [ `lsst.daf.butler.DimensionGroup` ]
|
|
1180
|
+
Dimensions that have not yet been inserted into the tree. Updated
|
|
1181
|
+
in place.
|
|
1182
|
+
|
|
1183
|
+
Returns
|
|
1184
|
+
-------
|
|
1185
|
+
appended : `bool`
|
|
1186
|
+
Whether a new data ID generator was successfully appended.
|
|
1187
|
+
"""
|
|
1188
|
+
for target_dimensions in sorted(branches_not_in_tree):
|
|
1189
|
+
dimensions_done = sorted(self.branches_by_dimensions.keys() - branches_not_in_tree)
|
|
1190
|
+
candidates_by_common: dict[DimensionGroup, tuple[DimensionGroup, DimensionGroup]] = {}
|
|
1191
|
+
for operand1, operand2 in itertools.combinations(dimensions_done, 2):
|
|
1192
|
+
if operand1.union(operand2) == target_dimensions:
|
|
1193
|
+
candidates_by_common[operand1.intersection(operand2)] = (operand1, operand2)
|
|
1194
|
+
if candidates_by_common:
|
|
1195
|
+
# Because DimensionGroup defines a set-like inequality
|
|
1196
|
+
# operator, 'max' returns the set of dimensions that contains
|
|
1197
|
+
# as many of the other sets of dimensions as possible, which is
|
|
1198
|
+
# a reasonable guess at the most-constrained join.
|
|
1199
|
+
operand1, operand2 = candidates_by_common[max(candidates_by_common)]
|
|
1200
|
+
generator = JoinDataIdGenerator(
|
|
1201
|
+
self.branches_by_dimensions[target_dimensions],
|
|
1202
|
+
target_dimensions,
|
|
1203
|
+
operand1,
|
|
1204
|
+
operand2,
|
|
1205
|
+
)
|
|
1206
|
+
self.generators.append(generator)
|
|
1207
|
+
branches_not_in_tree.remove(target_dimensions)
|
|
1208
|
+
return True
|
|
1209
|
+
return not branches_not_in_tree
|
|
918
1210
|
|
|
919
1211
|
def project_data_ids(self, log: LsstLogAdapter) -> None:
|
|
920
1212
|
"""Recursively populate the data ID sets of the dimension group tree
|
|
921
|
-
from the data ID sets of the
|
|
1213
|
+
from the data ID sets of the queryable branches.
|
|
922
1214
|
|
|
923
1215
|
Parameters
|
|
924
1216
|
----------
|
|
925
1217
|
log : `lsst.logging.LsstLogAdapter`
|
|
926
1218
|
Logger to use for status reporting.
|
|
927
1219
|
"""
|
|
928
|
-
for branch_dimensions, branch in self.
|
|
929
|
-
log.
|
|
1220
|
+
for branch_dimensions, branch in self.queryable_branches.items():
|
|
1221
|
+
log.verbose("Projecting query data ID(s) to %s.", branch_dimensions)
|
|
930
1222
|
branch.project_data_ids(log)
|
|
1223
|
+
|
|
1224
|
+
def generate_data_ids(self, log: LsstLogAdapter) -> None:
|
|
1225
|
+
"""Run all data ID generators.
|
|
1226
|
+
|
|
1227
|
+
This runs data ID generators and projects data IDs to their subset
|
|
1228
|
+
dimensions. It can only be called after queryable data IDs have been
|
|
1229
|
+
populated and dimension records fetched.
|
|
1230
|
+
|
|
1231
|
+
Parameters
|
|
1232
|
+
----------
|
|
1233
|
+
log : `lsst.logging.LsstLogAdapter`
|
|
1234
|
+
Logger to use for status reporting.
|
|
1235
|
+
"""
|
|
1236
|
+
for generator in self.generators:
|
|
1237
|
+
generator.run(log, self.branches_by_dimensions)
|
|
1238
|
+
generator.branch.project_data_ids(log, log_indent=" ")
|
|
1239
|
+
|
|
1240
|
+
def pprint(self, printer: Callable[[str], None] = print) -> None:
|
|
1241
|
+
"""Print a human-readable representation of the dimensions tree.
|
|
1242
|
+
|
|
1243
|
+
Parameters
|
|
1244
|
+
----------
|
|
1245
|
+
printer : `~collections.abc.Callable`, optional
|
|
1246
|
+
A function that takes a single string argument and prints a single
|
|
1247
|
+
line (including a newline). Default is the built-in `print`
|
|
1248
|
+
function.
|
|
1249
|
+
"""
|
|
1250
|
+
printer("Queryable:")
|
|
1251
|
+
for branch_dimensions, branch in self.queryable_branches.items():
|
|
1252
|
+
branch.pprint(branch_dimensions, " ", printer=printer)
|
|
1253
|
+
printer("Generator:")
|
|
1254
|
+
for generator in self.generators:
|
|
1255
|
+
generator.pprint(" ", printer=printer)
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
def get_single_skypix(dimensions: DimensionGroup) -> SkyPixDimension | None:
|
|
1259
|
+
"""Try to coerce a dimension group a single skypix dimenison.
|
|
1260
|
+
|
|
1261
|
+
Parameters
|
|
1262
|
+
----------
|
|
1263
|
+
dimensions : `lsst.daf.butler.DimensionGroup`
|
|
1264
|
+
Input dimensions.
|
|
1265
|
+
|
|
1266
|
+
Returns
|
|
1267
|
+
-------
|
|
1268
|
+
skypix : `lsst.daf.butler.SkyPixDimension` or `None`
|
|
1269
|
+
A skypix dimension that is the only dimension in the given group, or
|
|
1270
|
+
`None` in all other cases.
|
|
1271
|
+
"""
|
|
1272
|
+
if len(dimensions) == 1:
|
|
1273
|
+
(name,) = dimensions.names
|
|
1274
|
+
return dimensions.universe.skypix_dimensions.get(name)
|
|
1275
|
+
return None
|
|
1276
|
+
|
|
1277
|
+
|
|
1278
|
+
@dataclasses.dataclass
|
|
1279
|
+
class DataIdGenerator:
|
|
1280
|
+
"""A base class for generators for quantum and dataset data IDs that cannot
|
|
1281
|
+
be directly queried for.
|
|
1282
|
+
"""
|
|
1283
|
+
|
|
1284
|
+
branch: _DimensionGroupBranch
|
|
1285
|
+
"""Branch of the dimensions tree that this generator populates."""
|
|
1286
|
+
|
|
1287
|
+
dimensions: DimensionGroup
|
|
1288
|
+
"""Dimensions of the data IDs generated."""
|
|
1289
|
+
|
|
1290
|
+
source: DimensionGroup
|
|
1291
|
+
"""Dimensions of another set of data IDs that this generator uses as a
|
|
1292
|
+
starting point.
|
|
1293
|
+
"""
|
|
1294
|
+
|
|
1295
|
+
def pprint(self, indent: str = " ", printer: Callable[[str], None] = print) -> None:
|
|
1296
|
+
"""Print a human-readable representation of this generator.
|
|
1297
|
+
|
|
1298
|
+
Parameters
|
|
1299
|
+
----------
|
|
1300
|
+
indent : `str`
|
|
1301
|
+
Blank spaces to prefix the output with (useful when this is nested
|
|
1302
|
+
in hierarchical object being printed).
|
|
1303
|
+
printer : `~collections.abc.Callable`, optional
|
|
1304
|
+
A function that takes a single string argument and prints a single
|
|
1305
|
+
line (including a newline). Default is the built-in `print`
|
|
1306
|
+
function.
|
|
1307
|
+
"""
|
|
1308
|
+
self.branch.pprint(
|
|
1309
|
+
self.dimensions,
|
|
1310
|
+
indent,
|
|
1311
|
+
f" <- {self.source} ({self.__class__.__name__})",
|
|
1312
|
+
printer=printer,
|
|
1313
|
+
)
|
|
1314
|
+
|
|
1315
|
+
def run(self, log: LsstLogAdapter, branches: Mapping[DimensionGroup, _DimensionGroupBranch]) -> None:
|
|
1316
|
+
"""Run the generator, populating its branch's data IDs.
|
|
1317
|
+
|
|
1318
|
+
Parameters
|
|
1319
|
+
----------
|
|
1320
|
+
log : `lsst.log.LsstLogAdapter`
|
|
1321
|
+
Logger with a ``verbose`` method as well as the built-in ones.
|
|
1322
|
+
branches : `~collections.abc.Mapping`
|
|
1323
|
+
Mapping of other dimension branches, keyed by their dimensions.
|
|
1324
|
+
"""
|
|
1325
|
+
raise NotImplementedError()
|
|
1326
|
+
|
|
1327
|
+
|
|
1328
|
+
@dataclasses.dataclass
|
|
1329
|
+
class DatabaseSourceDataIdGenerator(DataIdGenerator):
|
|
1330
|
+
"""A data ID generator that generates skypix indices from the envelope of
|
|
1331
|
+
regions stored in the database.
|
|
1332
|
+
"""
|
|
1333
|
+
|
|
1334
|
+
remainder_skypix: SkyPixDimension
|
|
1335
|
+
"""A single additional skypix dimension to be added to the source
|
|
1336
|
+
dimensions.
|
|
1337
|
+
"""
|
|
1338
|
+
|
|
1339
|
+
source_element: DimensionElement
|
|
1340
|
+
"""Dimension element that the database-stored regions are associated with.
|
|
1341
|
+
"""
|
|
1342
|
+
|
|
1343
|
+
def run(self, log: LsstLogAdapter, branches: Mapping[DimensionGroup, _DimensionGroupBranch]) -> None:
|
|
1344
|
+
# Docstring inherited.
|
|
1345
|
+
source_branch = branches[self.source]
|
|
1346
|
+
log.verbose(
|
|
1347
|
+
"Generating %s data IDs via %s envelope of %s %s region(s).",
|
|
1348
|
+
self.dimensions,
|
|
1349
|
+
self.remainder_skypix,
|
|
1350
|
+
len(source_branch.data_ids),
|
|
1351
|
+
self.source_element,
|
|
1352
|
+
)
|
|
1353
|
+
pixelization = self.remainder_skypix.pixelization
|
|
1354
|
+
(source_records,) = [
|
|
1355
|
+
record_set
|
|
1356
|
+
for record_set in source_branch.dimension_records
|
|
1357
|
+
if record_set.element == self.source_element
|
|
1358
|
+
]
|
|
1359
|
+
for source_data_id in source_branch.data_ids:
|
|
1360
|
+
source_record = source_records.find(source_data_id)
|
|
1361
|
+
for begin, end in pixelization.envelope(source_record.region):
|
|
1362
|
+
for index in range(begin, end):
|
|
1363
|
+
target_data_id = DataCoordinate.standardize(
|
|
1364
|
+
source_data_id,
|
|
1365
|
+
**{self.remainder_skypix.name: index}, # type: ignore[arg-type]
|
|
1366
|
+
)
|
|
1367
|
+
self.branch.data_ids.add(target_data_id)
|
|
1368
|
+
|
|
1369
|
+
|
|
1370
|
+
@dataclasses.dataclass
|
|
1371
|
+
class CrossSystemDataIdGenerator(DataIdGenerator):
|
|
1372
|
+
"""A data ID generator that generates skypix indices from the envelope of
|
|
1373
|
+
skypix regions from some other system (e.g. healpix from HTM).
|
|
1374
|
+
"""
|
|
1375
|
+
|
|
1376
|
+
remainder_skypix: SkyPixDimension
|
|
1377
|
+
"""A single additional skypix dimension to be added to the source
|
|
1378
|
+
dimensions.
|
|
1379
|
+
"""
|
|
1380
|
+
|
|
1381
|
+
source_skypix: SkyPixDimension
|
|
1382
|
+
"""Dimension element for the already-known skypix indices."""
|
|
1383
|
+
|
|
1384
|
+
def run(self, log: LsstLogAdapter, branches: Mapping[DimensionGroup, _DimensionGroupBranch]) -> None:
|
|
1385
|
+
# Docstring inherited.
|
|
1386
|
+
source_branch = branches[self.source]
|
|
1387
|
+
log.verbose(
|
|
1388
|
+
"Generating %s data IDs via %s envelope of %s %s region(s).",
|
|
1389
|
+
self.dimensions,
|
|
1390
|
+
self.remainder_skypix,
|
|
1391
|
+
len(source_branch.data_ids),
|
|
1392
|
+
self.source_skypix,
|
|
1393
|
+
)
|
|
1394
|
+
source_pixelization = self.source_skypix.pixelization
|
|
1395
|
+
remainder_pixelization = self.remainder_skypix.pixelization
|
|
1396
|
+
for source_data_id in source_branch.data_ids:
|
|
1397
|
+
source_region = source_pixelization.pixel(source_data_id[self.source_skypix.name])
|
|
1398
|
+
for begin, end in remainder_pixelization.envelope(source_region):
|
|
1399
|
+
for index in range(begin, end):
|
|
1400
|
+
target_data_id = DataCoordinate.standardize(
|
|
1401
|
+
source_data_id,
|
|
1402
|
+
**{self.remainder_skypix.name: index}, # type: ignore[arg-type]
|
|
1403
|
+
)
|
|
1404
|
+
self.branch.data_ids.add(target_data_id)
|
|
1405
|
+
|
|
1406
|
+
|
|
1407
|
+
@dataclasses.dataclass
|
|
1408
|
+
class SkyPixScatterDataIdGenerator(DataIdGenerator):
|
|
1409
|
+
"""A data ID generator that generates skypix indices at a high (fine) level
|
|
1410
|
+
from low-level (coarse) indices in the same system.
|
|
1411
|
+
"""
|
|
1412
|
+
|
|
1413
|
+
remainder_skypix: SkyPixDimension
|
|
1414
|
+
"""A single additional skypix dimension to be added to the source
|
|
1415
|
+
dimensions.
|
|
1416
|
+
"""
|
|
1417
|
+
|
|
1418
|
+
source_skypix: SkyPixDimension
|
|
1419
|
+
"""Dimension element for the already-known skypix indices."""
|
|
1420
|
+
|
|
1421
|
+
def run(self, log: LsstLogAdapter, branches: Mapping[DimensionGroup, _DimensionGroupBranch]) -> None:
|
|
1422
|
+
# Docstring inherited.
|
|
1423
|
+
factor = 4 ** (self.remainder_skypix.level - self.source_skypix.level)
|
|
1424
|
+
source_branch = branches[self.source]
|
|
1425
|
+
log.verbose(
|
|
1426
|
+
"Generating %s data IDs by scaling %s %s IDs in %s by %s.",
|
|
1427
|
+
self.dimensions,
|
|
1428
|
+
len(source_branch.data_ids),
|
|
1429
|
+
self.remainder_skypix,
|
|
1430
|
+
self.source,
|
|
1431
|
+
factor,
|
|
1432
|
+
)
|
|
1433
|
+
for source_data_id in source_branch.data_ids:
|
|
1434
|
+
ranges = RangeSet(source_data_id[self.source_skypix.name])
|
|
1435
|
+
ranges.scale(factor)
|
|
1436
|
+
for begin, end in ranges:
|
|
1437
|
+
for index in range(begin, end):
|
|
1438
|
+
target_data_id = DataCoordinate.standardize(
|
|
1439
|
+
source_data_id,
|
|
1440
|
+
**{self.remainder_skypix.name: index}, # type: ignore[arg-type]
|
|
1441
|
+
)
|
|
1442
|
+
self.branch.data_ids.add(target_data_id)
|
|
1443
|
+
|
|
1444
|
+
|
|
1445
|
+
@dataclasses.dataclass
|
|
1446
|
+
class SkyPixGatherDataIdGenerator(DataIdGenerator):
|
|
1447
|
+
"""A data ID generator that generates skypix indices at a low (coarse)
|
|
1448
|
+
level from high-level (fine) indices in the same system.
|
|
1449
|
+
"""
|
|
1450
|
+
|
|
1451
|
+
remainder_skypix: SkyPixDimension
|
|
1452
|
+
"""A single additional skypix dimension to be added to the source
|
|
1453
|
+
dimensions.
|
|
1454
|
+
"""
|
|
1455
|
+
|
|
1456
|
+
source_skypix: SkyPixDimension
|
|
1457
|
+
"""Dimension element for the already-known skypix indices."""
|
|
1458
|
+
|
|
1459
|
+
def run(self, log: LsstLogAdapter, branches: Mapping[DimensionGroup, _DimensionGroupBranch]) -> None:
|
|
1460
|
+
# Docstring inherited.
|
|
1461
|
+
factor = 4 ** (self.source_skypix.level - self.remainder_skypix.level)
|
|
1462
|
+
source_branch = branches[self.source]
|
|
1463
|
+
log.verbose(
|
|
1464
|
+
"Generating %s data IDs by dividing %s %s IDs in %s by %s.",
|
|
1465
|
+
self.dimensions,
|
|
1466
|
+
len(source_branch.data_ids),
|
|
1467
|
+
self.remainder_skypix,
|
|
1468
|
+
self.source,
|
|
1469
|
+
factor,
|
|
1470
|
+
)
|
|
1471
|
+
for source_data_id in source_branch.data_ids:
|
|
1472
|
+
index = source_data_id[self.source_skypix.name] // factor
|
|
1473
|
+
target_data_id = DataCoordinate.standardize(source_data_id, **{self.remainder_skypix.name: index})
|
|
1474
|
+
self.branch.data_ids.add(target_data_id)
|
|
1475
|
+
|
|
1476
|
+
|
|
1477
|
+
@dataclasses.dataclass
|
|
1478
|
+
class JoinDataIdGenerator(DataIdGenerator):
|
|
1479
|
+
"""A data ID that does an inner join between two already-populated
|
|
1480
|
+
sets of data IDs.
|
|
1481
|
+
"""
|
|
1482
|
+
|
|
1483
|
+
other: DimensionGroup
|
|
1484
|
+
"""Dimensions of the other data ID branches to join to those of ``source``.
|
|
1485
|
+
"""
|
|
1486
|
+
|
|
1487
|
+
def run(self, log: LsstLogAdapter, branches: Mapping[DimensionGroup, _DimensionGroupBranch]) -> None:
|
|
1488
|
+
# Docstring inherited.
|
|
1489
|
+
source_branch = branches[self.source]
|
|
1490
|
+
other_branch = branches[self.other]
|
|
1491
|
+
log.verbose(
|
|
1492
|
+
"Generating %s data IDs by joining %s (%s) to %s (%s).",
|
|
1493
|
+
self.dimensions,
|
|
1494
|
+
self.source,
|
|
1495
|
+
len(source_branch.data_ids),
|
|
1496
|
+
self.other,
|
|
1497
|
+
len(other_branch.data_ids),
|
|
1498
|
+
)
|
|
1499
|
+
common = self.source & self.other
|
|
1500
|
+
other_by_common: defaultdict[DataCoordinate, list[DataCoordinate]] = defaultdict(list)
|
|
1501
|
+
for other_data_id in other_branch.data_ids:
|
|
1502
|
+
other_by_common[other_data_id.subset(common)].append(other_data_id)
|
|
1503
|
+
source_by_common: defaultdict[DataCoordinate, list[DataCoordinate]] = defaultdict(list)
|
|
1504
|
+
for source_data_id in source_branch.data_ids:
|
|
1505
|
+
source_by_common[source_data_id.subset(common)].append(source_data_id)
|
|
1506
|
+
for common_data_id in other_by_common.keys() & source_by_common.keys():
|
|
1507
|
+
for other_data_id in other_by_common[common_data_id]:
|
|
1508
|
+
for source_data_id in source_by_common[common_data_id]:
|
|
1509
|
+
self.branch.data_ids.add(other_data_id.union(source_data_id))
|