pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,517 @@
1
+ from typing import List
2
+
3
+ from trilogy.constants import logger
4
+ from trilogy.core.enums import Derivation, Granularity, Purpose, SourceType
5
+ from trilogy.core.models.build import (
6
+ BuildConcept,
7
+ BuildDatasource,
8
+ BuildFilterItem,
9
+ BuildGrain,
10
+ BuildRowsetItem,
11
+ BuildWhereClause,
12
+ )
13
+ from trilogy.core.models.build_environment import BuildEnvironment
14
+ from trilogy.core.models.execute import QueryDatasource, UnnestJoin
15
+ from trilogy.core.processing.constants import ROOT_DERIVATIONS
16
+ from trilogy.core.processing.nodes import GroupNode, MergeNode, StrategyNode
17
+ from trilogy.core.processing.utility import GroupRequiredResponse
18
+ from trilogy.utility import unique
19
+
20
+
21
+ def depth_to_prefix(depth: int) -> str:
22
+ return "\t" * depth
23
+
24
+
25
+ LOGGER_PREFIX = "[DISCOVERY LOOP]"
26
+
27
+
28
+ def calculate_effective_parent_grain(
29
+ node: QueryDatasource | BuildDatasource,
30
+ ) -> BuildGrain:
31
+ # calculate the effective grain of the parent node
32
+ # this is the union of all parent grains
33
+ if isinstance(node, QueryDatasource):
34
+ grain = BuildGrain()
35
+ qds = node
36
+ if not qds.joins:
37
+ return qds.datasources[0].grain
38
+ seen = set()
39
+ for join in qds.joins:
40
+ if isinstance(join, UnnestJoin):
41
+ grain += BuildGrain(components=set([x.address for x in join.concepts]))
42
+ continue
43
+ pairs = join.concept_pairs or []
44
+ for key in pairs:
45
+ left = key.existing_datasource
46
+ logger.debug(f"adding left grain {left.grain} for join key {key.left}")
47
+ grain += left.grain
48
+ seen.add(left.name)
49
+ keys = [key.right for key in pairs]
50
+ join_grain = BuildGrain.from_concepts(keys)
51
+ if join_grain == join.right_datasource.grain:
52
+ logger.debug(f"irrelevant right join {join}, does not change grain")
53
+ else:
54
+ logger.debug(
55
+ f"join changes grain, adding {join.right_datasource.grain} to {grain}"
56
+ )
57
+ grain += join.right_datasource.grain
58
+ seen.add(join.right_datasource.name)
59
+ for x in qds.datasources:
60
+ # if we haven't seen it, it's still contributing to grain
61
+ # unless used ONLY in a subselect
62
+ # so the existence check is a [bad] proxy for that
63
+ if x.name not in seen and not (
64
+ qds.condition
65
+ and qds.condition.existence_arguments
66
+ and any(
67
+ [
68
+ c.address in block
69
+ for c in x.output_concepts
70
+ for block in qds.condition.existence_arguments
71
+ ]
72
+ )
73
+ ):
74
+ logger.debug(f"adding unjoined grain {x.grain} for datasource {x.name}")
75
+ grain += x.grain
76
+ return grain
77
+ else:
78
+ return node.grain or BuildGrain()
79
+
80
+
81
+ def check_if_group_required(
82
+ downstream_concepts: List[BuildConcept],
83
+ parents: list[QueryDatasource | BuildDatasource],
84
+ environment: BuildEnvironment,
85
+ depth: int = 0,
86
+ ) -> GroupRequiredResponse:
87
+ padding = "\t" * depth
88
+ target_grain = BuildGrain.from_concepts(
89
+ downstream_concepts,
90
+ environment=environment,
91
+ )
92
+
93
+ comp_grain = BuildGrain()
94
+ for source in parents:
95
+ # comp_grain += source.grain
96
+ comp_grain += calculate_effective_parent_grain(source)
97
+
98
+ # dynamically select if we need to group
99
+ # we must avoid grouping if we are already at grain
100
+ if comp_grain.abstract and not target_grain.abstract:
101
+ logger.info(
102
+ f"{padding}{LOGGER_PREFIX} Group requirement check: upstream grain is abstract, cannot determine grouping requirement, assuming group required"
103
+ )
104
+ return GroupRequiredResponse(target_grain, comp_grain, True)
105
+ if comp_grain.issubset(target_grain):
106
+
107
+ logger.info(
108
+ f"{padding}{LOGGER_PREFIX} Group requirement check: {comp_grain}, target: {target_grain}, grain is subset of target, no group node required"
109
+ )
110
+ return GroupRequiredResponse(target_grain, comp_grain, False)
111
+ # find out what extra is in the comp grain vs target grain
112
+ difference = [
113
+ environment.concepts[c] for c in (comp_grain - target_grain).components
114
+ ]
115
+ logger.info(
116
+ f"{padding}{LOGGER_PREFIX} Group requirement check: upstream grain: {comp_grain}, desired grain: {target_grain} from, difference {[x.address for x in difference]}"
117
+ )
118
+ for x in difference:
119
+ logger.info(
120
+ f"{padding}{LOGGER_PREFIX} Difference concept {x.address} purpose {x.purpose} keys {x.keys}"
121
+ )
122
+
123
+ # if the difference is all unique properties whose keys are in the source grain
124
+ # we can also suppress the group
125
+ if difference and all(
126
+ [
127
+ x.keys
128
+ and all(
129
+ environment.concepts[z].address in comp_grain.components for z in x.keys
130
+ )
131
+ for x in difference
132
+ ]
133
+ ):
134
+ logger.info(
135
+ f"{padding}{LOGGER_PREFIX} Group requirement check: skipped due to unique property validation"
136
+ )
137
+ return GroupRequiredResponse(target_grain, comp_grain, False)
138
+ if difference and all([x.purpose == Purpose.KEY for x in difference]):
139
+ logger.info(
140
+ f"{padding}{LOGGER_PREFIX} checking if downstream is unique properties of key"
141
+ )
142
+ replaced_grain_raw: list[set[str]] = [
143
+ (
144
+ x.keys or set()
145
+ if x.purpose == Purpose.UNIQUE_PROPERTY
146
+ else set([x.address])
147
+ )
148
+ for x in downstream_concepts
149
+ if x.address in target_grain.components
150
+ ]
151
+ # flatten the list of lists
152
+ replaced_grain = [item for sublist in replaced_grain_raw for item in sublist]
153
+ # if the replaced grain is a subset of the comp grain, we can skip the group
154
+ unique_grain_comp = BuildGrain.from_concepts(
155
+ replaced_grain, environment=environment
156
+ )
157
+ if comp_grain.issubset(unique_grain_comp):
158
+ logger.info(
159
+ f"{padding}{LOGGER_PREFIX} Group requirement check: skipped due to unique property validation"
160
+ )
161
+ return GroupRequiredResponse(target_grain, comp_grain, False)
162
+ logger.info(
163
+ f"{padding}{LOGGER_PREFIX} Checking for grain equivalence for filters and rowsets"
164
+ )
165
+ ngrain = []
166
+ for con in target_grain.components:
167
+ full = environment.concepts[con]
168
+ if full.derivation == Derivation.ROWSET:
169
+ ngrain.append(full.address.split(".", 1)[1])
170
+ elif full.derivation == Derivation.FILTER:
171
+ assert isinstance(full.lineage, BuildFilterItem)
172
+ if isinstance(full.lineage.content, BuildConcept):
173
+ ngrain.append(full.lineage.content.address)
174
+ else:
175
+ ngrain.append(full.address)
176
+ target_grain2 = BuildGrain.from_concepts(
177
+ ngrain,
178
+ environment=environment,
179
+ )
180
+ if comp_grain.issubset(target_grain2):
181
+ logger.info(
182
+ f"{padding}{LOGGER_PREFIX} Group requirement check: {comp_grain}, {target_grain2}, pre rowset grain is subset of target, no group node required"
183
+ )
184
+ return GroupRequiredResponse(target_grain2, comp_grain, False)
185
+
186
+ logger.info(f"{padding}{LOGGER_PREFIX} Group requirement check: group required")
187
+ return GroupRequiredResponse(target_grain, comp_grain, True)
188
+
189
+
190
+ def group_if_required_v2(
191
+ root: StrategyNode,
192
+ final: List[BuildConcept],
193
+ environment: BuildEnvironment,
194
+ where_injected: set[str] | None = None,
195
+ depth: int = 0,
196
+ ):
197
+ where_injected = where_injected or set()
198
+ required = check_if_group_required(
199
+ downstream_concepts=final,
200
+ parents=[root.resolve()],
201
+ environment=environment,
202
+ depth=depth,
203
+ )
204
+ targets = [
205
+ x
206
+ for x in root.output_concepts
207
+ if x.address in final or any(c in final for c in x.pseudonyms)
208
+ ]
209
+ if required.required:
210
+ if isinstance(root, MergeNode):
211
+ root.force_group = True
212
+ root.set_output_concepts(targets, rebuild=False, change_visibility=False)
213
+ root.rebuild_cache()
214
+ return root
215
+ elif isinstance(root, GroupNode) and root.source_type == SourceType.BASIC:
216
+ # we need to group this one more time
217
+ pass
218
+ elif isinstance(root, GroupNode):
219
+ if set(x.address for x in final) != set(
220
+ x.address for x in root.output_concepts
221
+ ):
222
+ allowed_outputs = [
223
+ x
224
+ for x in root.output_concepts
225
+ if not (
226
+ x.address in where_injected
227
+ and x.address not in (root.required_outputs or set())
228
+ )
229
+ ]
230
+
231
+ logger.info(
232
+ f"Adjusting group node outputs to remove injected concepts {where_injected}: remaining {allowed_outputs}"
233
+ )
234
+ root.set_output_concepts(allowed_outputs)
235
+ return root
236
+ return GroupNode(
237
+ output_concepts=targets,
238
+ input_concepts=targets,
239
+ environment=environment,
240
+ parents=[root],
241
+ partial_concepts=root.partial_concepts,
242
+ preexisting_conditions=root.preexisting_conditions,
243
+ )
244
+ elif isinstance(root, GroupNode):
245
+
246
+ return root
247
+ else:
248
+ root.set_output_concepts(targets, rebuild=False, change_visibility=False)
249
+ return root
250
+
251
+
252
+ def get_upstream_concepts(base: BuildConcept, nested: bool = False) -> set[str]:
253
+ upstream = set()
254
+ if nested:
255
+ upstream.add(base.address)
256
+ if not base.lineage:
257
+ return upstream
258
+ for x in base.lineage.concept_arguments:
259
+ # if it's derived from any value in a rowset, ALL rowset items are upstream
260
+ if x.derivation == Derivation.ROWSET:
261
+ assert isinstance(x.lineage, BuildRowsetItem), type(x.lineage)
262
+ for y in x.lineage.rowset.select.output_components:
263
+ upstream.add(f"{x.lineage.rowset.name}.{y.address}")
264
+ # upstream = upstream.union(get_upstream_concepts(y, nested=True))
265
+ upstream = upstream.union(get_upstream_concepts(x, nested=True))
266
+ return upstream
267
+
268
+
269
+ def evaluate_loop_condition_pushdown(
270
+ mandatory: list[BuildConcept],
271
+ conditions: BuildWhereClause | None,
272
+ depth: int,
273
+ force_no_condition_pushdown: bool,
274
+ forced_pushdown: list[BuildConcept]
275
+ ) -> BuildWhereClause | None:
276
+ # filter evaluation
277
+ # always pass the filter up when we aren't looking at all filter inputs
278
+ # or there are any non-filter complex types
279
+ if not conditions:
280
+ return None
281
+ # first, check if we *have* to push up conditions above complex derivations
282
+ if forced_pushdown:
283
+ logger.info(
284
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Force including conditions to push filtering above complex concepts {forced_pushdown} that are not condition row inputs {conditions.row_arguments} or parent"
285
+ )
286
+ return conditions
287
+ # otherwise, only prevent pushdown
288
+ # (forcing local condition evaluation)
289
+ # only if all condition inputs are here and we only have roots
290
+ should_evaluate_filter_on_this_level_not_push_down = all(
291
+ [x.address in mandatory for x in conditions.row_arguments]
292
+ ) and not any(
293
+ [
294
+ x.derivation not in (ROOT_DERIVATIONS + [Derivation.BASIC])
295
+ for x in mandatory
296
+ if x.address not in conditions.row_arguments
297
+ ]
298
+ )
299
+
300
+ if (
301
+ force_no_condition_pushdown
302
+ or should_evaluate_filter_on_this_level_not_push_down
303
+ ):
304
+ logger.info(
305
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Forcing condition evaluation at this level: all basic_no_agg: {should_evaluate_filter_on_this_level_not_push_down}"
306
+ )
307
+ return None
308
+
309
+ return conditions
310
+
311
+
312
+ def generate_candidates_restrictive(
313
+ priority_concept: BuildConcept,
314
+ candidates: list[BuildConcept],
315
+ exhausted: set[str],
316
+ # conditions_exist: bool,
317
+ ) -> list[BuildConcept]:
318
+ unselected_candidates = [
319
+ x for x in candidates if x.address != priority_concept.address
320
+ ]
321
+ local_candidates = [
322
+ x
323
+ for x in unselected_candidates
324
+ if x.address not in exhausted
325
+ and x.granularity != Granularity.SINGLE_ROW
326
+ and x.address not in priority_concept.pseudonyms
327
+ and priority_concept.address not in x.pseudonyms
328
+ ]
329
+
330
+ # if it's single row, joins are irrelevant. Fetch without keys.
331
+ if priority_concept.granularity == Granularity.SINGLE_ROW:
332
+ logger.info("Have single row concept, including only other single row optional")
333
+ optional = (
334
+ [
335
+ x
336
+ for x in unselected_candidates
337
+ if x.granularity == Granularity.SINGLE_ROW
338
+ and x.address not in priority_concept.pseudonyms
339
+ and priority_concept.address not in x.pseudonyms
340
+ ]
341
+ if priority_concept.derivation == Derivation.AGGREGATE
342
+ else []
343
+ )
344
+ return optional
345
+ return local_candidates
346
+
347
+
348
+ def get_priority_concept(
349
+ all_concepts: List[BuildConcept],
350
+ attempted_addresses: set[str],
351
+ found_concepts: set[str],
352
+ partial_concepts: set[str],
353
+ depth: int,
354
+ ) -> BuildConcept:
355
+ # optimized search for missing concepts
356
+ all_concepts_local = all_concepts
357
+ pass_one = sorted(
358
+ [
359
+ c
360
+ for c in all_concepts_local
361
+ if c.address not in attempted_addresses
362
+ and (c.address not in found_concepts or c.address in partial_concepts)
363
+ ],
364
+ key=lambda x: x.address,
365
+ )
366
+
367
+ priority = (
368
+ # then multiselects to remove them from scope
369
+ [c for c in pass_one if c.derivation == Derivation.MULTISELECT]
370
+ +
371
+ # then rowsets to remove them from scope, as they cannot get partials
372
+ [c for c in pass_one if c.derivation == Derivation.ROWSET]
373
+ +
374
+ # then rowsets to remove them from scope, as they cannot get partials
375
+ [c for c in pass_one if c.derivation == Derivation.UNION]
376
+ # we should be home-free here
377
+ + [c for c in pass_one if c.derivation == Derivation.BASIC]
378
+ +
379
+ # then aggregates to remove them from scope, as they cannot get partials
380
+ [c for c in pass_one if c.derivation == Derivation.AGGREGATE]
381
+ # then windows to remove them from scope, as they cannot get partials
382
+ + [c for c in pass_one if c.derivation == Derivation.WINDOW]
383
+ # then filters to remove them from scope, also cannot get partials
384
+ + [c for c in pass_one if c.derivation == Derivation.FILTER]
385
+ # unnests are weird?
386
+ + [c for c in pass_one if c.derivation == Derivation.UNNEST]
387
+ + [c for c in pass_one if c.derivation == Derivation.RECURSIVE]
388
+ + [c for c in pass_one if c.derivation == Derivation.GROUP_TO]
389
+ + [c for c in pass_one if c.derivation == Derivation.CONSTANT]
390
+ # finally our plain selects
391
+ + [
392
+ c for c in pass_one if c.derivation == Derivation.ROOT
393
+ ] # and any non-single row constants
394
+ )
395
+
396
+ priority += [c for c in pass_one if c.address not in [x.address for x in priority]]
397
+ final = []
398
+ # if any thing is derived from another concept
399
+ # get the derived copy first
400
+ # as this will usually resolve cleaner
401
+ for x in priority:
402
+ if any(
403
+ [
404
+ x.address
405
+ in get_upstream_concepts(
406
+ c,
407
+ )
408
+ for c in priority
409
+ ]
410
+ ):
411
+ logger.info(
412
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} delaying fetch of {x.address} as parent of another concept"
413
+ )
414
+ continue
415
+ final.append(x)
416
+ # then append anything we didn't get
417
+ for x2 in priority:
418
+ if x2 not in final:
419
+ final.append(x2)
420
+ if final:
421
+ return final[0]
422
+ raise ValueError(
423
+ f"Cannot resolve query. No remaining priority concepts, have attempted {attempted_addresses} out of {all_concepts} with found {found_concepts}"
424
+ )
425
+
426
+ def get_inputs_that_require_pushdown(conditions:BuildWhereClause | None, mandatory: list[BuildConcept]) -> list[BuildConcept]:
427
+ if not conditions:
428
+ return []
429
+ return [
430
+ x
431
+ for x in mandatory
432
+ if x.address not in conditions.row_arguments
433
+ and x.derivation not in ROOT_DERIVATIONS + [Derivation.BASIC, Derivation.ROWSET, Derivation.UNNEST]
434
+ ]
435
+
436
+ def get_loop_iteration_targets(
437
+ mandatory: list[BuildConcept],
438
+ conditions: BuildWhereClause | None,
439
+ attempted: set[str],
440
+ force_conditions: bool,
441
+ found: set[str],
442
+ partial: set[str],
443
+ depth: int,
444
+ materialized_canonical: set[str],
445
+ ) -> tuple[BuildConcept, List[BuildConcept], BuildWhereClause | None]:
446
+ # objectives
447
+ # 1. if we have complex types; push any conditions further up until we only have roots
448
+ # 2. if we only have roots left, push all condition inputs into the candidate list
449
+ # 3. from the final candidate list, select the highest priority concept to attempt next
450
+ force_pushdown_to_complex_input = False
451
+
452
+ pushdown_targets = get_inputs_that_require_pushdown(conditions, mandatory)
453
+ if pushdown_targets:
454
+ force_pushdown_to_complex_input = True
455
+ # a list of all non-materialized concepts, or all concepts
456
+ # if a pushdown is required
457
+ all_concepts_local: list[BuildConcept] = [
458
+ x
459
+ for x in mandatory
460
+ if force_pushdown_to_complex_input or (x.canonical_address not in materialized_canonical)
461
+ # keep Root/Constant
462
+ or x.derivation in (Derivation.ROOT, Derivation.CONSTANT)
463
+ ]
464
+ remaining_concrete = [x for x in mandatory if x.address not in all_concepts_local]
465
+
466
+ for x in remaining_concrete:
467
+ logger.info(
468
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Adding materialized concept {x.address} as root instead of derived."
469
+ )
470
+ all_concepts_local.append(x.with_materialized_source())
471
+
472
+ remaining = [x for x in all_concepts_local if x.address not in attempted]
473
+ conditions = evaluate_loop_condition_pushdown(
474
+ mandatory=all_concepts_local,
475
+ conditions=conditions,
476
+ depth=depth,
477
+ force_no_condition_pushdown=force_conditions,
478
+ forced_pushdown= pushdown_targets
479
+ )
480
+ local_all = [*all_concepts_local]
481
+
482
+ if all([x.derivation in (Derivation.ROOT,) for x in remaining]) and conditions:
483
+ logger.info(
484
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} All remaining mandatory concepts are roots or constants, injecting condition inputs into candidate list"
485
+ )
486
+ local_all = unique(
487
+ list(conditions.row_arguments) + remaining,
488
+ "address",
489
+ )
490
+ conditions = None
491
+ if conditions and force_conditions:
492
+ logger.info(
493
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} condition evaluation at this level forced"
494
+ )
495
+ local_all = unique(
496
+ list(conditions.row_arguments) + remaining,
497
+ "address",
498
+ )
499
+ # if we have a forced pushdown, also push them down while keeping them at this level too
500
+ conditions = conditions if force_pushdown_to_complex_input else None
501
+
502
+ priority_concept = get_priority_concept(
503
+ all_concepts=local_all,
504
+ attempted_addresses=attempted,
505
+ found_concepts=found,
506
+ partial_concepts=partial,
507
+ depth=depth,
508
+ )
509
+
510
+ optional = generate_candidates_restrictive(
511
+ priority_concept=priority_concept,
512
+ candidates=local_all,
513
+ exhausted=attempted,
514
+ # conditions_exist = conditions is not None,
515
+ # depth=depth,
516
+ )
517
+ return priority_concept, optional, conditions
@@ -0,0 +1,167 @@
1
+ from collections import defaultdict
2
+ from enum import Enum
3
+ from typing import List
4
+
5
+ from trilogy.core.models.build import (
6
+ BuildConcept,
7
+ BuildWhereClause,
8
+ )
9
+ from trilogy.core.models.build_environment import BuildEnvironment
10
+ from trilogy.core.processing.nodes import (
11
+ StrategyNode,
12
+ )
13
+ from trilogy.core.processing.utility import (
14
+ get_disconnected_components,
15
+ )
16
+
17
+
18
+ class ValidationResult(Enum):
19
+ COMPLETE = 1
20
+ DISCONNECTED = 2
21
+ INCOMPLETE = 3
22
+ INCOMPLETE_CONDITION = 4
23
+
24
+
25
+ def validate_concept(
26
+ concept: BuildConcept,
27
+ node: StrategyNode,
28
+ found_addresses: set[str],
29
+ non_partial_addresses: set[str],
30
+ partial_addresses: set[str],
31
+ virtual_addresses: set[str],
32
+ found_map: dict[str, set[BuildConcept]],
33
+ accept_partial: bool,
34
+ seen: set[str],
35
+ environment: BuildEnvironment,
36
+ ):
37
+ # logger.debug(
38
+ # f"Validating concept {concept.address} with accept_partial={accept_partial}"
39
+ # )
40
+ found_map[str(node)].add(concept)
41
+ seen.add(concept.address)
42
+ if concept not in node.partial_concepts:
43
+ found_addresses.add(concept.address)
44
+ non_partial_addresses.add(concept.address)
45
+ # remove it from our partial tracking
46
+ if concept.address in partial_addresses:
47
+ partial_addresses.remove(concept.address)
48
+ if concept.address in virtual_addresses:
49
+ virtual_addresses.remove(concept.address)
50
+ if concept in node.partial_concepts:
51
+ if concept.address in non_partial_addresses:
52
+ return None
53
+ partial_addresses.add(concept.address)
54
+ if accept_partial:
55
+ found_addresses.add(concept.address)
56
+ found_map[str(node)].add(concept)
57
+ for v_address in concept.pseudonyms:
58
+ if v_address in seen:
59
+ continue
60
+ if v_address in environment.alias_origin_lookup:
61
+ # logger.debug(
62
+ # f"Found alias origin for {v_address}: {environment.alias_origin_lookup[v_address]} mapped to {environment.concepts[v_address]}")
63
+ v = environment.alias_origin_lookup[v_address]
64
+ else:
65
+ v = environment.concepts[v_address]
66
+
67
+ if v.address in seen:
68
+
69
+ continue
70
+
71
+ if v.address == concept.address:
72
+
73
+ continue
74
+ validate_concept(
75
+ v,
76
+ node,
77
+ found_addresses,
78
+ non_partial_addresses,
79
+ partial_addresses,
80
+ virtual_addresses,
81
+ found_map,
82
+ accept_partial,
83
+ seen=seen,
84
+ environment=environment,
85
+ )
86
+
87
+
88
+ def validate_stack(
89
+ environment: BuildEnvironment,
90
+ stack: List[StrategyNode],
91
+ concepts: List[BuildConcept],
92
+ mandatory_with_filter: List[BuildConcept],
93
+ conditions: BuildWhereClause | None = None,
94
+ accept_partial: bool = False,
95
+ ) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
96
+ found_map: dict[str, set[BuildConcept]] = defaultdict(set)
97
+ found_addresses: set[str] = set()
98
+ non_partial_addresses: set[str] = set()
99
+ partial_addresses: set[str] = set()
100
+ virtual_addresses: set[str] = set()
101
+ seen: set[str] = set()
102
+
103
+ for node in stack:
104
+ resolved = node.resolve()
105
+
106
+ for concept in resolved.output_concepts:
107
+ if concept.address in resolved.hidden_concepts:
108
+ continue
109
+
110
+ validate_concept(
111
+ concept,
112
+ node,
113
+ found_addresses,
114
+ non_partial_addresses,
115
+ partial_addresses,
116
+ virtual_addresses,
117
+ found_map,
118
+ accept_partial,
119
+ seen,
120
+ environment,
121
+ )
122
+ for concept in node.virtual_output_concepts:
123
+ if concept.address in non_partial_addresses:
124
+ continue
125
+ found_addresses.add(concept.address)
126
+ virtual_addresses.add(concept.address)
127
+ if not conditions:
128
+ conditions_met = True
129
+ else:
130
+ conditions_met = all(
131
+ [node.preexisting_conditions == conditions.conditional for node in stack]
132
+ ) or all([c.address in found_addresses for c in mandatory_with_filter])
133
+ # zip in those we know we found
134
+ if not all([c.address in found_addresses for c in concepts]) or not conditions_met:
135
+ if not all([c.address in found_addresses for c in concepts]):
136
+ return (
137
+ ValidationResult.INCOMPLETE,
138
+ found_addresses,
139
+ {c.address for c in concepts if c.address not in found_addresses},
140
+ partial_addresses,
141
+ virtual_addresses,
142
+ )
143
+ return (
144
+ ValidationResult.INCOMPLETE_CONDITION,
145
+ found_addresses,
146
+ {c.address for c in concepts if c.address not in mandatory_with_filter},
147
+ partial_addresses,
148
+ virtual_addresses,
149
+ )
150
+
151
+ graph_count, _ = get_disconnected_components(found_map)
152
+ if graph_count in (0, 1):
153
+ return (
154
+ ValidationResult.COMPLETE,
155
+ found_addresses,
156
+ set(),
157
+ partial_addresses,
158
+ virtual_addresses,
159
+ )
160
+ # if we have too many subgraphs, we need to keep searching
161
+ return (
162
+ ValidationResult.DISCONNECTED,
163
+ found_addresses,
164
+ set(),
165
+ partial_addresses,
166
+ virtual_addresses,
167
+ )