pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.142.dist-info/METADATA +555 -0
- pytrilogy-0.3.142.dist-info/RECORD +200 -0
- pytrilogy-0.3.142.dist-info/WHEEL +5 -0
- pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +16 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2669 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +501 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +751 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1392 -0
- trilogy/dialect/bigquery.py +308 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +231 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +769 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +303 -0
- trilogy/scripts/common.py +355 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +303 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +512 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +471 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +713 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/run.py +63 -0
- trilogy/scripts/serve.py +140 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +119 -0
- trilogy/scripts/trilogy.py +68 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from trilogy.constants import logger
|
|
4
|
+
from trilogy.core.enums import (
|
|
5
|
+
Derivation,
|
|
6
|
+
FunctionType,
|
|
7
|
+
Granularity,
|
|
8
|
+
Purpose,
|
|
9
|
+
SourceType,
|
|
10
|
+
)
|
|
11
|
+
from trilogy.core.models.build import (
|
|
12
|
+
BuildConcept,
|
|
13
|
+
BuildDatasource,
|
|
14
|
+
BuildFilterItem,
|
|
15
|
+
BuildFunction,
|
|
16
|
+
BuildGrain,
|
|
17
|
+
BuildRowsetItem,
|
|
18
|
+
BuildWhereClause,
|
|
19
|
+
)
|
|
20
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
21
|
+
from trilogy.core.models.execute import QueryDatasource, UnnestJoin
|
|
22
|
+
from trilogy.core.processing.constants import ROOT_DERIVATIONS
|
|
23
|
+
from trilogy.core.processing.nodes import GroupNode, MergeNode, StrategyNode
|
|
24
|
+
from trilogy.core.processing.utility import GroupRequiredResponse
|
|
25
|
+
from trilogy.utility import unique
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def depth_to_prefix(depth: int) -> str:
|
|
29
|
+
return "\t" * depth
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
NO_PUSHDOWN_DERIVATIONS: list[Derivation] = ROOT_DERIVATIONS + [
|
|
33
|
+
Derivation.BASIC,
|
|
34
|
+
Derivation.ROWSET,
|
|
35
|
+
Derivation.UNNEST,
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
LOGGER_PREFIX = "[DISCOVERY LOOP]"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def calculate_effective_parent_grain(
|
|
43
|
+
node: QueryDatasource | BuildDatasource,
|
|
44
|
+
) -> BuildGrain:
|
|
45
|
+
# calculate the effective grain of the parent node
|
|
46
|
+
# this is the union of all parent grains
|
|
47
|
+
if isinstance(node, QueryDatasource):
|
|
48
|
+
grain = BuildGrain()
|
|
49
|
+
qds = node
|
|
50
|
+
if not qds.joins:
|
|
51
|
+
return qds.datasources[0].grain
|
|
52
|
+
seen = set()
|
|
53
|
+
for join in qds.joins:
|
|
54
|
+
if isinstance(join, UnnestJoin):
|
|
55
|
+
grain += BuildGrain(components=set([x.address for x in join.concepts]))
|
|
56
|
+
continue
|
|
57
|
+
pairs = join.concept_pairs or []
|
|
58
|
+
for key in pairs:
|
|
59
|
+
left = key.existing_datasource
|
|
60
|
+
logger.debug(f"adding left grain {left.grain} for join key {key.left}")
|
|
61
|
+
grain += left.grain
|
|
62
|
+
seen.add(left.name)
|
|
63
|
+
keys = [key.right for key in pairs]
|
|
64
|
+
join_grain = BuildGrain.from_concepts(keys)
|
|
65
|
+
if join_grain == join.right_datasource.grain:
|
|
66
|
+
logger.debug(f"irrelevant right join {join}, does not change grain")
|
|
67
|
+
else:
|
|
68
|
+
logger.debug(
|
|
69
|
+
f"join changes grain, adding {join.right_datasource.grain} to {grain}"
|
|
70
|
+
)
|
|
71
|
+
grain += join.right_datasource.grain
|
|
72
|
+
seen.add(join.right_datasource.name)
|
|
73
|
+
for x in qds.datasources:
|
|
74
|
+
# if we haven't seen it, it's still contributing to grain
|
|
75
|
+
# unless used ONLY in a subselect
|
|
76
|
+
# so the existence check is a [bad] proxy for that
|
|
77
|
+
if x.name not in seen and not (
|
|
78
|
+
qds.condition
|
|
79
|
+
and qds.condition.existence_arguments
|
|
80
|
+
and any(
|
|
81
|
+
[
|
|
82
|
+
c.address in block
|
|
83
|
+
for c in x.output_concepts
|
|
84
|
+
for block in qds.condition.existence_arguments
|
|
85
|
+
]
|
|
86
|
+
)
|
|
87
|
+
):
|
|
88
|
+
logger.debug(f"adding unjoined grain {x.grain} for datasource {x.name}")
|
|
89
|
+
grain += x.grain
|
|
90
|
+
return grain
|
|
91
|
+
else:
|
|
92
|
+
return node.grain or BuildGrain()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def check_if_group_required(
|
|
96
|
+
downstream_concepts: List[BuildConcept],
|
|
97
|
+
parents: list[QueryDatasource | BuildDatasource],
|
|
98
|
+
environment: BuildEnvironment,
|
|
99
|
+
depth: int = 0,
|
|
100
|
+
) -> GroupRequiredResponse:
|
|
101
|
+
padding = "\t" * depth
|
|
102
|
+
target_grain = BuildGrain.from_concepts(
|
|
103
|
+
downstream_concepts,
|
|
104
|
+
environment=environment,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
comp_grain = BuildGrain()
|
|
108
|
+
for source in parents:
|
|
109
|
+
# comp_grain += source.grain
|
|
110
|
+
comp_grain += calculate_effective_parent_grain(source)
|
|
111
|
+
|
|
112
|
+
# dynamically select if we need to group
|
|
113
|
+
# we must avoid grouping if we are already at grain
|
|
114
|
+
if comp_grain.abstract and not target_grain.abstract:
|
|
115
|
+
logger.info(
|
|
116
|
+
f"{padding}{LOGGER_PREFIX} Group requirement check: upstream grain is abstract, cannot determine grouping requirement, assuming group required"
|
|
117
|
+
)
|
|
118
|
+
return GroupRequiredResponse(target_grain, comp_grain, True)
|
|
119
|
+
if comp_grain.issubset(target_grain):
|
|
120
|
+
|
|
121
|
+
logger.info(
|
|
122
|
+
f"{padding}{LOGGER_PREFIX} Group requirement check: {comp_grain}, target: {target_grain}, grain is subset of target, no group node required"
|
|
123
|
+
)
|
|
124
|
+
return GroupRequiredResponse(target_grain, comp_grain, False)
|
|
125
|
+
# find out what extra is in the comp grain vs target grain
|
|
126
|
+
difference = [
|
|
127
|
+
environment.concepts[c] for c in (comp_grain - target_grain).components
|
|
128
|
+
]
|
|
129
|
+
logger.info(
|
|
130
|
+
f"{padding}{LOGGER_PREFIX} Group requirement check: upstream grain: {comp_grain}, desired grain: {target_grain} from, difference {[x.address for x in difference]}"
|
|
131
|
+
)
|
|
132
|
+
for x in difference:
|
|
133
|
+
logger.info(
|
|
134
|
+
f"{padding}{LOGGER_PREFIX} Difference concept {x.address} purpose {x.purpose} keys {x.keys}"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# if the difference is all unique properties whose keys are in the source grain
|
|
138
|
+
# we can also suppress the group
|
|
139
|
+
if difference and all(
|
|
140
|
+
[
|
|
141
|
+
x.keys
|
|
142
|
+
and all(
|
|
143
|
+
environment.concepts[z].address in comp_grain.components for z in x.keys
|
|
144
|
+
)
|
|
145
|
+
for x in difference
|
|
146
|
+
]
|
|
147
|
+
):
|
|
148
|
+
logger.info(
|
|
149
|
+
f"{padding}{LOGGER_PREFIX} Group requirement check: skipped due to unique property validation"
|
|
150
|
+
)
|
|
151
|
+
return GroupRequiredResponse(target_grain, comp_grain, False)
|
|
152
|
+
if difference and all([x.purpose == Purpose.KEY for x in difference]):
|
|
153
|
+
logger.info(
|
|
154
|
+
f"{padding}{LOGGER_PREFIX} checking if downstream is unique properties of key"
|
|
155
|
+
)
|
|
156
|
+
replaced_grain_raw: list[set[str]] = [
|
|
157
|
+
(
|
|
158
|
+
x.keys or set()
|
|
159
|
+
if x.purpose == Purpose.UNIQUE_PROPERTY
|
|
160
|
+
else set([x.address])
|
|
161
|
+
)
|
|
162
|
+
for x in downstream_concepts
|
|
163
|
+
if x.address in target_grain.components
|
|
164
|
+
]
|
|
165
|
+
# flatten the list of lists
|
|
166
|
+
replaced_grain = [item for sublist in replaced_grain_raw for item in sublist]
|
|
167
|
+
# if the replaced grain is a subset of the comp grain, we can skip the group
|
|
168
|
+
unique_grain_comp = BuildGrain.from_concepts(
|
|
169
|
+
replaced_grain, environment=environment
|
|
170
|
+
)
|
|
171
|
+
if comp_grain.issubset(unique_grain_comp):
|
|
172
|
+
logger.info(
|
|
173
|
+
f"{padding}{LOGGER_PREFIX} Group requirement check: skipped due to unique property validation"
|
|
174
|
+
)
|
|
175
|
+
return GroupRequiredResponse(target_grain, comp_grain, False)
|
|
176
|
+
logger.info(
|
|
177
|
+
f"{padding}{LOGGER_PREFIX} Checking for grain equivalence for filters and rowsets"
|
|
178
|
+
)
|
|
179
|
+
ngrain = []
|
|
180
|
+
for con in target_grain.components:
|
|
181
|
+
full = environment.concepts[con]
|
|
182
|
+
if full.derivation == Derivation.ROWSET:
|
|
183
|
+
ngrain.append(full.address.split(".", 1)[1])
|
|
184
|
+
elif full.derivation == Derivation.FILTER:
|
|
185
|
+
assert isinstance(full.lineage, BuildFilterItem)
|
|
186
|
+
if isinstance(full.lineage.content, BuildConcept):
|
|
187
|
+
ngrain.append(full.lineage.content.address)
|
|
188
|
+
else:
|
|
189
|
+
ngrain.append(full.address)
|
|
190
|
+
target_grain2 = BuildGrain.from_concepts(
|
|
191
|
+
ngrain,
|
|
192
|
+
environment=environment,
|
|
193
|
+
)
|
|
194
|
+
if comp_grain.issubset(target_grain2):
|
|
195
|
+
logger.info(
|
|
196
|
+
f"{padding}{LOGGER_PREFIX} Group requirement check: {comp_grain}, {target_grain2}, pre rowset grain is subset of target, no group node required"
|
|
197
|
+
)
|
|
198
|
+
return GroupRequiredResponse(target_grain2, comp_grain, False)
|
|
199
|
+
|
|
200
|
+
logger.info(f"{padding}{LOGGER_PREFIX} Group requirement check: group required")
|
|
201
|
+
return GroupRequiredResponse(target_grain, comp_grain, True)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def group_if_required_v2(
|
|
205
|
+
root: StrategyNode,
|
|
206
|
+
final: List[BuildConcept],
|
|
207
|
+
environment: BuildEnvironment,
|
|
208
|
+
where_injected: set[str] | None = None,
|
|
209
|
+
depth: int = 0,
|
|
210
|
+
):
|
|
211
|
+
where_injected = where_injected or set()
|
|
212
|
+
required = check_if_group_required(
|
|
213
|
+
downstream_concepts=final,
|
|
214
|
+
parents=[root.resolve()],
|
|
215
|
+
environment=environment,
|
|
216
|
+
depth=depth,
|
|
217
|
+
)
|
|
218
|
+
targets = [
|
|
219
|
+
x
|
|
220
|
+
for x in root.output_concepts
|
|
221
|
+
if x.address in final or any(c in final for c in x.pseudonyms)
|
|
222
|
+
]
|
|
223
|
+
if required.required:
|
|
224
|
+
if isinstance(root, MergeNode):
|
|
225
|
+
root.force_group = True
|
|
226
|
+
root.set_output_concepts(targets, rebuild=False, change_visibility=False)
|
|
227
|
+
root.rebuild_cache()
|
|
228
|
+
return root
|
|
229
|
+
elif isinstance(root, GroupNode) and root.source_type == SourceType.BASIC:
|
|
230
|
+
# we need to group this one more time
|
|
231
|
+
pass
|
|
232
|
+
elif isinstance(root, GroupNode):
|
|
233
|
+
if set(x.address for x in final) != set(
|
|
234
|
+
x.address for x in root.output_concepts
|
|
235
|
+
):
|
|
236
|
+
allowed_outputs = [
|
|
237
|
+
x
|
|
238
|
+
for x in root.output_concepts
|
|
239
|
+
if not (
|
|
240
|
+
x.address in where_injected
|
|
241
|
+
and x.address not in (root.required_outputs or set())
|
|
242
|
+
)
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
logger.info(
|
|
246
|
+
f"Adjusting group node outputs to remove injected concepts {where_injected}: remaining {allowed_outputs}"
|
|
247
|
+
)
|
|
248
|
+
root.set_output_concepts(allowed_outputs)
|
|
249
|
+
return root
|
|
250
|
+
return GroupNode(
|
|
251
|
+
output_concepts=targets,
|
|
252
|
+
input_concepts=targets,
|
|
253
|
+
environment=environment,
|
|
254
|
+
parents=[root],
|
|
255
|
+
partial_concepts=root.partial_concepts,
|
|
256
|
+
preexisting_conditions=root.preexisting_conditions,
|
|
257
|
+
)
|
|
258
|
+
elif isinstance(root, GroupNode):
|
|
259
|
+
|
|
260
|
+
return root
|
|
261
|
+
else:
|
|
262
|
+
root.set_output_concepts(targets, rebuild=False, change_visibility=False)
|
|
263
|
+
return root
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def get_upstream_concepts(base: BuildConcept, nested: bool = False) -> set[str]:
|
|
267
|
+
upstream = set()
|
|
268
|
+
if nested:
|
|
269
|
+
upstream.add(base.address)
|
|
270
|
+
if not base.lineage:
|
|
271
|
+
return upstream
|
|
272
|
+
for x in base.lineage.concept_arguments:
|
|
273
|
+
# if it's derived from any value in a rowset, ALL rowset items are upstream
|
|
274
|
+
if x.derivation == Derivation.ROWSET:
|
|
275
|
+
assert isinstance(x.lineage, BuildRowsetItem), type(x.lineage)
|
|
276
|
+
for y in x.lineage.rowset.select.output_components:
|
|
277
|
+
upstream.add(f"{x.lineage.rowset.name}.{y.address}")
|
|
278
|
+
# upstream = upstream.union(get_upstream_concepts(y, nested=True))
|
|
279
|
+
upstream = upstream.union(get_upstream_concepts(x, nested=True))
|
|
280
|
+
return upstream
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def evaluate_loop_condition_pushdown(
|
|
284
|
+
mandatory: list[BuildConcept],
|
|
285
|
+
conditions: BuildWhereClause | None,
|
|
286
|
+
depth: int,
|
|
287
|
+
force_no_condition_pushdown: bool,
|
|
288
|
+
forced_pushdown: list[BuildConcept],
|
|
289
|
+
) -> BuildWhereClause | None:
|
|
290
|
+
# filter evaluation
|
|
291
|
+
# always pass the filter up when we aren't looking at all filter inputs
|
|
292
|
+
# or there are any non-filter complex types
|
|
293
|
+
if not conditions:
|
|
294
|
+
return None
|
|
295
|
+
# first, check if we *have* to push up conditions above complex derivations
|
|
296
|
+
if forced_pushdown:
|
|
297
|
+
logger.info(
|
|
298
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Force including conditions to push filtering above complex concepts {forced_pushdown} that are not condition row inputs {conditions.row_arguments} or parent"
|
|
299
|
+
)
|
|
300
|
+
return conditions
|
|
301
|
+
# otherwise, only prevent pushdown
|
|
302
|
+
# (forcing local condition evaluation)
|
|
303
|
+
# only if all condition inputs are here and we only have roots
|
|
304
|
+
should_evaluate_filter_on_this_level_not_push_down = all(
|
|
305
|
+
[x.address in mandatory for x in conditions.row_arguments]
|
|
306
|
+
) and not any(
|
|
307
|
+
[
|
|
308
|
+
x.derivation not in (ROOT_DERIVATIONS + [Derivation.BASIC])
|
|
309
|
+
for x in mandatory
|
|
310
|
+
if x.address not in conditions.row_arguments
|
|
311
|
+
]
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
if (
|
|
315
|
+
force_no_condition_pushdown
|
|
316
|
+
or should_evaluate_filter_on_this_level_not_push_down
|
|
317
|
+
):
|
|
318
|
+
logger.info(
|
|
319
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Forcing condition evaluation at this level: all basic_no_agg: {should_evaluate_filter_on_this_level_not_push_down}"
|
|
320
|
+
)
|
|
321
|
+
return None
|
|
322
|
+
|
|
323
|
+
return conditions
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def generate_candidates_restrictive(
|
|
327
|
+
priority_concept: BuildConcept,
|
|
328
|
+
candidates: list[BuildConcept],
|
|
329
|
+
exhausted: set[str],
|
|
330
|
+
# conditions_exist: bool,
|
|
331
|
+
) -> list[BuildConcept]:
|
|
332
|
+
unselected_candidates = [
|
|
333
|
+
x for x in candidates if x.address != priority_concept.address
|
|
334
|
+
]
|
|
335
|
+
local_candidates = [
|
|
336
|
+
x
|
|
337
|
+
for x in unselected_candidates
|
|
338
|
+
if x.address not in exhausted
|
|
339
|
+
and x.granularity != Granularity.SINGLE_ROW
|
|
340
|
+
and x.address not in priority_concept.pseudonyms
|
|
341
|
+
and priority_concept.address not in x.pseudonyms
|
|
342
|
+
]
|
|
343
|
+
|
|
344
|
+
# if it's single row, joins are irrelevant. Fetch without keys.
|
|
345
|
+
if priority_concept.granularity == Granularity.SINGLE_ROW:
|
|
346
|
+
logger.info("Have single row concept, including only other single row optional")
|
|
347
|
+
optional = (
|
|
348
|
+
[
|
|
349
|
+
x
|
|
350
|
+
for x in unselected_candidates
|
|
351
|
+
if x.granularity == Granularity.SINGLE_ROW
|
|
352
|
+
and x.address not in priority_concept.pseudonyms
|
|
353
|
+
and priority_concept.address not in x.pseudonyms
|
|
354
|
+
]
|
|
355
|
+
if priority_concept.derivation == Derivation.AGGREGATE
|
|
356
|
+
else []
|
|
357
|
+
)
|
|
358
|
+
return optional
|
|
359
|
+
return local_candidates
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def get_priority_concept(
|
|
363
|
+
all_concepts: List[BuildConcept],
|
|
364
|
+
attempted_addresses: set[str],
|
|
365
|
+
found_concepts: set[str],
|
|
366
|
+
partial_concepts: set[str],
|
|
367
|
+
depth: int,
|
|
368
|
+
) -> BuildConcept:
|
|
369
|
+
# optimized search for missing concepts
|
|
370
|
+
all_concepts_local = all_concepts
|
|
371
|
+
pass_one = sorted(
|
|
372
|
+
[
|
|
373
|
+
c
|
|
374
|
+
for c in all_concepts_local
|
|
375
|
+
if c.address not in attempted_addresses
|
|
376
|
+
and (c.address not in found_concepts or c.address in partial_concepts)
|
|
377
|
+
],
|
|
378
|
+
key=lambda x: x.address,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
priority = (
|
|
382
|
+
# then multiselects to remove them from scope
|
|
383
|
+
[c for c in pass_one if c.derivation == Derivation.MULTISELECT]
|
|
384
|
+
+
|
|
385
|
+
# then rowsets to remove them from scope, as they cannot get partials
|
|
386
|
+
[c for c in pass_one if c.derivation == Derivation.ROWSET]
|
|
387
|
+
+
|
|
388
|
+
# then rowsets to remove them from scope, as they cannot get partials
|
|
389
|
+
[c for c in pass_one if c.derivation == Derivation.UNION]
|
|
390
|
+
# we should be home-free here
|
|
391
|
+
+ [c for c in pass_one if c.derivation == Derivation.BASIC]
|
|
392
|
+
+
|
|
393
|
+
# then aggregates to remove them from scope, as they cannot get partials
|
|
394
|
+
[c for c in pass_one if c.derivation == Derivation.AGGREGATE]
|
|
395
|
+
# then windows to remove them from scope, as they cannot get partials
|
|
396
|
+
+ [c for c in pass_one if c.derivation == Derivation.WINDOW]
|
|
397
|
+
# then filters to remove them from scope, also cannot get partials
|
|
398
|
+
+ [c for c in pass_one if c.derivation == Derivation.FILTER]
|
|
399
|
+
# unnests are weird?
|
|
400
|
+
+ [c for c in pass_one if c.derivation == Derivation.UNNEST]
|
|
401
|
+
+ [c for c in pass_one if c.derivation == Derivation.RECURSIVE]
|
|
402
|
+
+ [c for c in pass_one if c.derivation == Derivation.GROUP_TO]
|
|
403
|
+
+ [c for c in pass_one if c.derivation == Derivation.CONSTANT]
|
|
404
|
+
# finally our plain selects
|
|
405
|
+
+ [
|
|
406
|
+
c for c in pass_one if c.derivation == Derivation.ROOT
|
|
407
|
+
] # and any non-single row constants
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
priority += [c for c in pass_one if c.address not in [x.address for x in priority]]
|
|
411
|
+
final = []
|
|
412
|
+
# if any thing is derived from another concept
|
|
413
|
+
# get the derived copy first
|
|
414
|
+
# as this will usually resolve cleaner
|
|
415
|
+
for x in priority:
|
|
416
|
+
if any(
|
|
417
|
+
[
|
|
418
|
+
x.address
|
|
419
|
+
in get_upstream_concepts(
|
|
420
|
+
c,
|
|
421
|
+
)
|
|
422
|
+
for c in priority
|
|
423
|
+
]
|
|
424
|
+
):
|
|
425
|
+
logger.info(
|
|
426
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} delaying fetch of {x.address} as parent of another concept"
|
|
427
|
+
)
|
|
428
|
+
continue
|
|
429
|
+
final.append(x)
|
|
430
|
+
# then append anything we didn't get
|
|
431
|
+
for x2 in priority:
|
|
432
|
+
if x2 not in final:
|
|
433
|
+
final.append(x2)
|
|
434
|
+
if final:
|
|
435
|
+
return final[0]
|
|
436
|
+
raise ValueError(
|
|
437
|
+
f"Cannot resolve query. No remaining priority concepts, have attempted {attempted_addresses} out of {all_concepts} with found {found_concepts}"
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def is_pushdown_aliased_concept(c: BuildConcept) -> bool:
|
|
442
|
+
return (
|
|
443
|
+
isinstance(c.lineage, BuildFunction)
|
|
444
|
+
and c.lineage.operator == FunctionType.ALIAS
|
|
445
|
+
and isinstance(c.lineage.arguments[0], BuildConcept)
|
|
446
|
+
and c.lineage.arguments[0].derivation not in NO_PUSHDOWN_DERIVATIONS
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def get_inputs_that_require_pushdown(
|
|
451
|
+
conditions: BuildWhereClause | None, mandatory: list[BuildConcept]
|
|
452
|
+
) -> list[BuildConcept]:
|
|
453
|
+
if not conditions:
|
|
454
|
+
return []
|
|
455
|
+
return [
|
|
456
|
+
x
|
|
457
|
+
for x in mandatory
|
|
458
|
+
if x.address not in conditions.row_arguments
|
|
459
|
+
and (
|
|
460
|
+
x.derivation not in NO_PUSHDOWN_DERIVATIONS
|
|
461
|
+
or is_pushdown_aliased_concept(x)
|
|
462
|
+
)
|
|
463
|
+
]
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def get_loop_iteration_targets(
|
|
467
|
+
mandatory: list[BuildConcept],
|
|
468
|
+
conditions: BuildWhereClause | None,
|
|
469
|
+
attempted: set[str],
|
|
470
|
+
force_conditions: bool,
|
|
471
|
+
found: set[str],
|
|
472
|
+
partial: set[str],
|
|
473
|
+
depth: int,
|
|
474
|
+
materialized_canonical: set[str],
|
|
475
|
+
) -> tuple[BuildConcept, List[BuildConcept], BuildWhereClause | None]:
|
|
476
|
+
# objectives
|
|
477
|
+
# 1. if we have complex types; push any conditions further up until we only have roots
|
|
478
|
+
# 2. if we only have roots left, push all condition inputs into the candidate list
|
|
479
|
+
# 3. from the final candidate list, select the highest priority concept to attempt next
|
|
480
|
+
force_pushdown_to_complex_input = False
|
|
481
|
+
|
|
482
|
+
pushdown_targets = get_inputs_that_require_pushdown(conditions, mandatory)
|
|
483
|
+
if pushdown_targets:
|
|
484
|
+
force_pushdown_to_complex_input = True
|
|
485
|
+
# a list of all non-materialized concepts, or all concepts
|
|
486
|
+
# if a pushdown is required
|
|
487
|
+
all_concepts_local: list[BuildConcept] = [
|
|
488
|
+
x
|
|
489
|
+
for x in mandatory
|
|
490
|
+
if force_pushdown_to_complex_input
|
|
491
|
+
or (x.canonical_address not in materialized_canonical)
|
|
492
|
+
# keep Root/Constant
|
|
493
|
+
or x.derivation in (Derivation.ROOT, Derivation.CONSTANT)
|
|
494
|
+
]
|
|
495
|
+
remaining_concrete = [x for x in mandatory if x.address not in all_concepts_local]
|
|
496
|
+
|
|
497
|
+
for x in remaining_concrete:
|
|
498
|
+
logger.info(
|
|
499
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Adding materialized concept {x.address} as root instead of derived."
|
|
500
|
+
)
|
|
501
|
+
all_concepts_local.append(x.with_materialized_source())
|
|
502
|
+
|
|
503
|
+
remaining = [x for x in all_concepts_local if x.address not in attempted]
|
|
504
|
+
conditions = evaluate_loop_condition_pushdown(
|
|
505
|
+
mandatory=all_concepts_local,
|
|
506
|
+
conditions=conditions,
|
|
507
|
+
depth=depth,
|
|
508
|
+
force_no_condition_pushdown=force_conditions,
|
|
509
|
+
forced_pushdown=pushdown_targets,
|
|
510
|
+
)
|
|
511
|
+
local_all = [*all_concepts_local]
|
|
512
|
+
|
|
513
|
+
if all([x.derivation in (Derivation.ROOT,) for x in remaining]) and conditions:
|
|
514
|
+
logger.info(
|
|
515
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} All remaining mandatory concepts are roots or constants, injecting condition inputs into candidate list"
|
|
516
|
+
)
|
|
517
|
+
local_all = unique(
|
|
518
|
+
list(conditions.row_arguments) + remaining,
|
|
519
|
+
"address",
|
|
520
|
+
)
|
|
521
|
+
conditions = None
|
|
522
|
+
if conditions and force_conditions:
|
|
523
|
+
logger.info(
|
|
524
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} condition evaluation at this level forced"
|
|
525
|
+
)
|
|
526
|
+
local_all = unique(
|
|
527
|
+
list(conditions.row_arguments) + remaining,
|
|
528
|
+
"address",
|
|
529
|
+
)
|
|
530
|
+
# if we have a forced pushdown, also push them down while keeping them at this level too
|
|
531
|
+
conditions = conditions if force_pushdown_to_complex_input else None
|
|
532
|
+
|
|
533
|
+
priority_concept = get_priority_concept(
|
|
534
|
+
all_concepts=local_all,
|
|
535
|
+
attempted_addresses=attempted,
|
|
536
|
+
found_concepts=found,
|
|
537
|
+
partial_concepts=partial,
|
|
538
|
+
depth=depth,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
optional = generate_candidates_restrictive(
|
|
542
|
+
priority_concept=priority_concept,
|
|
543
|
+
candidates=local_all,
|
|
544
|
+
exhausted=attempted,
|
|
545
|
+
# conditions_exist = conditions is not None,
|
|
546
|
+
# depth=depth,
|
|
547
|
+
)
|
|
548
|
+
return priority_concept, optional, conditions
|