pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.148.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.148.dist-info/RECORD +206 -0
  6. pytrilogy-0.3.148.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2662 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +434 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +786 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1431 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +376 -0
  117. trilogy/dialect/enums.py +149 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/file_state_store.py +0 -0
  130. trilogy/execution/state/sqllite_state_store.py +0 -0
  131. trilogy/execution/state/state_store.py +301 -0
  132. trilogy/executor.py +656 -0
  133. trilogy/hooks/__init__.py +4 -0
  134. trilogy/hooks/base_hook.py +40 -0
  135. trilogy/hooks/graph_hook.py +135 -0
  136. trilogy/hooks/query_debugger.py +166 -0
  137. trilogy/metadata/__init__.py +0 -0
  138. trilogy/parser.py +10 -0
  139. trilogy/parsing/README.md +21 -0
  140. trilogy/parsing/__init__.py +0 -0
  141. trilogy/parsing/common.py +1069 -0
  142. trilogy/parsing/config.py +5 -0
  143. trilogy/parsing/exceptions.py +8 -0
  144. trilogy/parsing/helpers.py +1 -0
  145. trilogy/parsing/parse_engine.py +2863 -0
  146. trilogy/parsing/render.py +773 -0
  147. trilogy/parsing/trilogy.lark +544 -0
  148. trilogy/py.typed +0 -0
  149. trilogy/render.py +45 -0
  150. trilogy/scripts/README.md +9 -0
  151. trilogy/scripts/__init__.py +0 -0
  152. trilogy/scripts/agent.py +41 -0
  153. trilogy/scripts/agent_info.py +306 -0
  154. trilogy/scripts/common.py +430 -0
  155. trilogy/scripts/dependency/Cargo.lock +617 -0
  156. trilogy/scripts/dependency/Cargo.toml +39 -0
  157. trilogy/scripts/dependency/README.md +131 -0
  158. trilogy/scripts/dependency/build.sh +25 -0
  159. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  160. trilogy/scripts/dependency/src/lib.rs +16 -0
  161. trilogy/scripts/dependency/src/main.rs +770 -0
  162. trilogy/scripts/dependency/src/parser.rs +435 -0
  163. trilogy/scripts/dependency/src/preql.pest +208 -0
  164. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  165. trilogy/scripts/dependency/src/resolver.rs +716 -0
  166. trilogy/scripts/dependency/tests/base.preql +3 -0
  167. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  168. trilogy/scripts/dependency/tests/customer.preql +6 -0
  169. trilogy/scripts/dependency/tests/main.preql +9 -0
  170. trilogy/scripts/dependency/tests/orders.preql +7 -0
  171. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  172. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  173. trilogy/scripts/dependency.py +323 -0
  174. trilogy/scripts/display.py +555 -0
  175. trilogy/scripts/environment.py +59 -0
  176. trilogy/scripts/fmt.py +32 -0
  177. trilogy/scripts/ingest.py +472 -0
  178. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  179. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  180. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  181. trilogy/scripts/ingest_helpers/typing.py +161 -0
  182. trilogy/scripts/init.py +105 -0
  183. trilogy/scripts/parallel_execution.py +748 -0
  184. trilogy/scripts/plan.py +189 -0
  185. trilogy/scripts/refresh.py +106 -0
  186. trilogy/scripts/run.py +79 -0
  187. trilogy/scripts/serve.py +202 -0
  188. trilogy/scripts/serve_helpers/__init__.py +41 -0
  189. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  190. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  191. trilogy/scripts/serve_helpers/models.py +38 -0
  192. trilogy/scripts/single_execution.py +131 -0
  193. trilogy/scripts/testing.py +129 -0
  194. trilogy/scripts/trilogy.py +75 -0
  195. trilogy/std/__init__.py +0 -0
  196. trilogy/std/color.preql +3 -0
  197. trilogy/std/date.preql +13 -0
  198. trilogy/std/display.preql +18 -0
  199. trilogy/std/geography.preql +22 -0
  200. trilogy/std/metric.preql +15 -0
  201. trilogy/std/money.preql +67 -0
  202. trilogy/std/net.preql +14 -0
  203. trilogy/std/ranking.preql +7 -0
  204. trilogy/std/report.preql +5 -0
  205. trilogy/std/semantic.preql +6 -0
  206. trilogy/utility.py +34 -0
@@ -0,0 +1,251 @@
1
+ from trilogy.constants import CONFIG, logger
2
+ from trilogy.core.enums import BooleanOperator, Derivation
3
+ from trilogy.core.models.build import (
4
+ BuildConditional,
5
+ )
6
+ from trilogy.core.models.execute import CTE, RecursiveCTE, UnionCTE
7
+ from trilogy.core.optimizations import (
8
+ HideUnusedConcepts,
9
+ InlineDatasource,
10
+ OptimizationRule,
11
+ PredicatePushdown,
12
+ PredicatePushdownRemove,
13
+ )
14
+ from trilogy.core.processing.utility import sort_select_output
15
+ from trilogy.core.statements.author import MultiSelectStatement, SelectStatement
16
+
17
+ MAX_OPTIMIZATION_LOOPS = 100
18
+
19
+
20
+ # other optimizations may make a CTE a pure passthrough
21
+ # remove those
22
+ # def is_locally_irrelevant(cte: CTE) -> CTE | bool:
23
+ # if not len(cte.parent_ctes) == 1:
24
+ # return False
25
+ # parent = cte.parent_ctes[0]
26
+ # if not parent.output_columns == cte.output_columns:
27
+ # return False
28
+ # if cte.condition is not None:
29
+ # return False
30
+ # if cte.group_to_grain:
31
+ # return False
32
+ # if len(cte.joins)>1:
33
+ # return False
34
+ # return parent
35
+
36
+
37
+ def reorder_ctes(
38
+ input: list[CTE],
39
+ ):
40
+ import networkx as nx
41
+
42
+ # Create a directed graph
43
+ G = nx.DiGraph()
44
+ mapping: dict[str, CTE] = {}
45
+ for cte in input:
46
+ mapping[cte.name] = cte
47
+ for parent in cte.parent_ctes:
48
+ G.add_edge(parent.name, cte.name)
49
+ # Perform topological sort (only works for DAGs)
50
+ try:
51
+ topological_order = list(nx.topological_sort(G))
52
+ if not topological_order:
53
+ return input
54
+ return [mapping[x] for x in topological_order]
55
+ except nx.NetworkXUnfeasible as e:
56
+ logger.error(
57
+ "The graph is not a DAG (contains cycles) and cannot be topologically sorted."
58
+ )
59
+ raise e
60
+
61
+
62
+ def filter_irrelevant_ctes(
63
+ input: list[CTE | UnionCTE],
64
+ root_cte: CTE | UnionCTE,
65
+ ):
66
+ relevant_ctes = set()
67
+
68
+ def recurse(cte: CTE | UnionCTE, inverse_map: dict[str, list[CTE | UnionCTE]]):
69
+ # TODO: revisit this
70
+ # if parent := is_locally_irrelevant(cte):
71
+ # logger.info(
72
+ # f"[Optimization][Irrelevent CTE filtering] Removing redundant CTE {cte.name} and replacing with {parent.name}"
73
+ # )
74
+ # for child in inverse_map.get(cte.name, []):
75
+ # child.parent_ctes = [
76
+ # x for x in child.parent_ctes if x.name != cte.name
77
+ # ] + [parent]
78
+ # for x in child.source_map:
79
+ # if cte.name in child.source_map[x]:
80
+ # child.source_map[x].remove(cte.name)
81
+ # child.source_map[x].append(parent.name)
82
+ # for x2 in child.existence_source_map:
83
+ # if cte.name in child.existence_source_map[x2]:
84
+ # child.existence_source_map[x2].remove(cte.name)
85
+ # child.existence_source_map[x2].append(parent.name)
86
+ # else:
87
+ relevant_ctes.add(cte.name)
88
+
89
+ for parent in cte.parent_ctes:
90
+ if parent.name in relevant_ctes:
91
+ logger.info(
92
+ f"[Optimization][Irrelevent CTE filtering] Already visited {parent.name} when visting {cte.name}, potential recursive dag"
93
+ )
94
+ continue
95
+
96
+ recurse(parent, inverse_map)
97
+ if isinstance(cte, UnionCTE):
98
+ for internal in cte.internal_ctes:
99
+ recurse(internal, inverse_map)
100
+
101
+ inverse_map = gen_inverse_map(input)
102
+ recurse(root_cte, inverse_map)
103
+ final = [cte for cte in input if cte.name in relevant_ctes]
104
+ filtered = [cte for cte in input if cte.name not in relevant_ctes]
105
+ if filtered:
106
+ logger.info(
107
+ f"[Optimization][Irrelevent CTE filtering] Removing redundant CTEs {[x.name for x in filtered]}"
108
+ )
109
+ if len(final) == len(input):
110
+ return input
111
+ return filter_irrelevant_ctes(final, root_cte)
112
+
113
+
114
+ def gen_inverse_map(input: list[CTE | UnionCTE]) -> dict[str, list[CTE | UnionCTE]]:
115
+ inverse_map: dict[str, list[CTE | UnionCTE]] = {}
116
+ for cte in input:
117
+ if isinstance(cte, UnionCTE):
118
+ for internal in cte.internal_ctes:
119
+ if internal.name not in inverse_map:
120
+ inverse_map[internal.name] = []
121
+ inverse_map[internal.name].append(cte)
122
+ else:
123
+ for parent in cte.parent_ctes:
124
+ if parent.name not in inverse_map:
125
+ inverse_map[parent.name] = []
126
+ inverse_map[parent.name].append(cte)
127
+
128
+ return inverse_map
129
+
130
+
131
+ SENSITIVE_DERIVATIONS = [
132
+ Derivation.UNNEST,
133
+ Derivation.WINDOW,
134
+ Derivation.RECURSIVE,
135
+ ]
136
+
137
+
138
+ def is_direct_return_eligible(cte: CTE | UnionCTE) -> CTE | UnionCTE | None:
139
+ # if isinstance(select, (PersistStatement, MultiSelectStatement)):
140
+ # return False
141
+ if len(cte.parent_ctes) != 1:
142
+ return None
143
+ direct_parent = cte.parent_ctes[0]
144
+ if isinstance(direct_parent, (UnionCTE, RecursiveCTE)):
145
+ return None
146
+
147
+ output_addresses = set([x.address for x in cte.output_columns])
148
+ parent_output_addresses = set([x.address for x in direct_parent.output_columns])
149
+ if not output_addresses.issubset(parent_output_addresses):
150
+ return None
151
+ if not direct_parent.grain == cte.grain:
152
+ logger.info("[Direct Return] grain mismatch, cannot early exit")
153
+ return None
154
+
155
+ assert isinstance(cte, CTE)
156
+ derived_concepts = [
157
+ c for c in cte.source.output_concepts if c not in cte.source.input_concepts
158
+ ]
159
+
160
+ parent_derived_concepts = [
161
+ c
162
+ for c in direct_parent.source.output_concepts
163
+ if c not in direct_parent.source.input_concepts
164
+ ]
165
+ condition_arguments = cte.condition.row_arguments if cte.condition else []
166
+ for x in derived_concepts:
167
+ if x.derivation in SENSITIVE_DERIVATIONS:
168
+ return None
169
+ for x in parent_derived_concepts:
170
+ if x.address not in condition_arguments:
171
+ continue
172
+ if x.derivation in SENSITIVE_DERIVATIONS:
173
+ return None
174
+ for x in condition_arguments:
175
+ # if it's derived in the parent
176
+ if x.address in parent_derived_concepts:
177
+ if x.derivation in SENSITIVE_DERIVATIONS:
178
+ return None
179
+ # this maybe needs to be recursive if we flatten a ton of derivation
180
+ # into one CTE
181
+ if not x.lineage:
182
+ continue
183
+ for z in x.lineage.concept_arguments:
184
+ # if it was preexisting in the parent, it's safe
185
+ if z.address in direct_parent.source.input_concepts:
186
+ continue
187
+ # otherwise if it's dangerous, play it safe.
188
+ if z.derivation in SENSITIVE_DERIVATIONS:
189
+ return None
190
+ logger.info(
191
+ f"[Optimization][EarlyReturn] Removing redundant output CTE {cte.name} with derived_concepts {[x.address for x in derived_concepts]}"
192
+ )
193
+ return direct_parent
194
+
195
+
196
+ def optimize_ctes(
197
+ input: list[CTE | UnionCTE],
198
+ root_cte: CTE | UnionCTE,
199
+ select: SelectStatement | MultiSelectStatement,
200
+ ) -> list[CTE | UnionCTE]:
201
+ direct_parent: CTE | UnionCTE | None = root_cte
202
+ while CONFIG.optimizations.direct_return and (
203
+ direct_parent := is_direct_return_eligible(root_cte)
204
+ ):
205
+ direct_parent.order_by = root_cte.order_by
206
+ direct_parent.limit = root_cte.limit
207
+ direct_parent.hidden_concepts = root_cte.hidden_concepts.union(
208
+ direct_parent.hidden_concepts
209
+ )
210
+ if root_cte.condition:
211
+ if direct_parent.condition:
212
+ direct_parent.condition = BuildConditional(
213
+ left=direct_parent.condition,
214
+ operator=BooleanOperator.AND,
215
+ right=root_cte.condition,
216
+ )
217
+ else:
218
+ direct_parent.condition = root_cte.condition
219
+ root_cte = direct_parent
220
+
221
+ sort_select_output(root_cte, select)
222
+
223
+ REGISTERED_RULES: list["OptimizationRule"] = []
224
+
225
+ if CONFIG.optimizations.datasource_inlining:
226
+ REGISTERED_RULES.append(InlineDatasource())
227
+ if CONFIG.optimizations.predicate_pushdown:
228
+ REGISTERED_RULES.append(PredicatePushdown())
229
+ if CONFIG.optimizations.predicate_pushdown:
230
+ REGISTERED_RULES.append(PredicatePushdownRemove())
231
+ if CONFIG.optimizations.hide_unused_concepts:
232
+ REGISTERED_RULES.append(HideUnusedConcepts())
233
+ for rule in REGISTERED_RULES:
234
+ loops = 0
235
+ complete = False
236
+ while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
237
+ actions_taken = False
238
+ # assume we go through all CTEs once
239
+ look_at = [root_cte, *reversed(input)]
240
+ inverse_map = gen_inverse_map(look_at)
241
+ for cte in look_at:
242
+ opt = rule.optimize(cte, inverse_map)
243
+ actions_taken = actions_taken or opt
244
+ complete = not actions_taken
245
+ loops += 1
246
+ input = reorder_ctes(filter_irrelevant_ctes(input, root_cte))
247
+ logger.info(
248
+ f"[Optimization] Finished checking for {type(rule).__name__} after {loops} loop(s)"
249
+ )
250
+
251
+ return reorder_ctes(filter_irrelevant_ctes(input, root_cte))
@@ -0,0 +1,12 @@
1
+ from .base_optimization import OptimizationRule
2
+ from .hide_unused_concept import HideUnusedConcepts
3
+ from .inline_datasource import InlineDatasource
4
+ from .predicate_pushdown import PredicatePushdown, PredicatePushdownRemove
5
+
6
+ __all__ = [
7
+ "OptimizationRule",
8
+ "InlineDatasource",
9
+ "PredicatePushdown",
10
+ "PredicatePushdownRemove",
11
+ "HideUnusedConcepts",
12
+ ]
@@ -0,0 +1,17 @@
1
+ from abc import ABC
2
+
3
+ from trilogy.constants import logger
4
+ from trilogy.core.models.execute import CTE, UnionCTE
5
+
6
+
7
+ class OptimizationRule(ABC):
8
+ def optimize(
9
+ self, cte: CTE | UnionCTE, inverse_map: dict[str, list[CTE | UnionCTE]]
10
+ ) -> bool:
11
+ raise NotImplementedError
12
+
13
+ def log(self, message: str):
14
+ logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
15
+
16
+ def debug(self, message: str):
17
+ logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
@@ -0,0 +1,47 @@
1
+ from trilogy.core.models.build import (
2
+ BuildConcept,
3
+ )
4
+ from trilogy.core.models.execute import CTE, UnionCTE
5
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
6
+
7
+
8
+ class HideUnusedConcepts(OptimizationRule):
9
+ def __init__(self, *args, **kwargs) -> None:
10
+ super().__init__(*args, **kwargs)
11
+
12
+ def optimize(
13
+ self, cte: CTE | UnionCTE, inverse_map: dict[str, list[CTE | UnionCTE]]
14
+ ) -> bool:
15
+ used = set()
16
+ from trilogy.dialect.base import BaseDialect
17
+
18
+ renderer = BaseDialect()
19
+ children = inverse_map.get(cte.name, [])
20
+ if not children:
21
+ return False
22
+ for v in children:
23
+ self.log(f"Analyzing usage of {cte.name} in {v.name}")
24
+ renderer.render_cte(v)
25
+ used = renderer.used_map.get(cte.name, set())
26
+ self.log(f"Used concepts for {cte.name}: {used} from {renderer.used_map}")
27
+ add_to_hidden: list[BuildConcept] = []
28
+ for concept in cte.output_columns:
29
+ if concept.address not in used:
30
+ add_to_hidden.append(concept)
31
+ newly_hidden = [
32
+ x.address for x in add_to_hidden if x.address not in cte.hidden_concepts
33
+ ]
34
+ non_hidden = [
35
+ x for x in cte.output_columns if x.address not in cte.hidden_concepts
36
+ ]
37
+ if not newly_hidden or len(non_hidden) <= 1:
38
+ return False
39
+ self.log(
40
+ f"Hiding unused concepts {[x.address for x in add_to_hidden]} from {cte.name} (used: {used}, all: {[x.address for x in cte.output_columns]})"
41
+ )
42
+ candidates = [x.address for x in cte.output_columns if x.address not in used]
43
+ if len(candidates) == len(set([x.address for x in cte.output_columns])):
44
+ # pop one out
45
+ candidates.pop()
46
+ cte.hidden_concepts = set(candidates)
47
+ return True
@@ -0,0 +1,102 @@
1
+ from collections import defaultdict
2
+
3
+ from trilogy.constants import CONFIG
4
+ from trilogy.core.models.build import BuildDatasource
5
+ from trilogy.core.models.execute import CTE, RecursiveCTE, UnionCTE
6
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
7
+
8
+
9
+ class InlineDatasource(OptimizationRule):
10
+ def __init__(self):
11
+ super().__init__()
12
+ self.candidates = defaultdict(lambda: set())
13
+ self.count = defaultdict(lambda: 0)
14
+
15
+ def optimize(
16
+ self, cte: CTE | UnionCTE, inverse_map: dict[str, list[CTE | UnionCTE]]
17
+ ) -> bool:
18
+ if isinstance(cte, UnionCTE):
19
+ return any(
20
+ self.optimize(x, inverse_map=inverse_map) for x in cte.internal_ctes
21
+ )
22
+ if isinstance(cte, RecursiveCTE):
23
+ return False
24
+ if not cte.parent_ctes:
25
+ return False
26
+
27
+ self.debug(
28
+ f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
29
+ )
30
+ to_inline: list[CTE] = []
31
+ force_group = False
32
+ for parent_cte in cte.parent_ctes:
33
+ if isinstance(parent_cte, UnionCTE):
34
+ continue
35
+ if isinstance(parent_cte, RecursiveCTE):
36
+ continue
37
+ if not parent_cte.is_root_datasource:
38
+ self.debug(f"Cannot inline: parent {parent_cte.name} is not root")
39
+ continue
40
+ if parent_cte.parent_ctes:
41
+ self.debug(f"Cannot inline: parent {parent_cte.name} has parents")
42
+ continue
43
+ if parent_cte.condition:
44
+ self.debug(
45
+ f"Cannot inline: parent {parent_cte.name} has condition, cannot be inlined"
46
+ )
47
+ continue
48
+ raw_root = parent_cte.source.datasources[0]
49
+ if not isinstance(raw_root, BuildDatasource):
50
+ self.debug(f"Cannot inline: Parent {parent_cte.name} is not datasource")
51
+ continue
52
+ root: BuildDatasource = raw_root
53
+ if not root.can_be_inlined:
54
+ self.debug(
55
+ f"Cannot inline: Parent {parent_cte.name} datasource is not inlineable"
56
+ )
57
+ continue
58
+ root_outputs = {x.address for x in root.output_concepts}
59
+ inherited = {
60
+ x for x, v in cte.source_map.items() if v and parent_cte.name in v
61
+ }
62
+ if not inherited.issubset(root_outputs):
63
+ cte_missing = inherited - root_outputs
64
+ self.log(
65
+ f"Cannot inline: Not all required inputs to {parent_cte.name} are found on datasource, missing {cte_missing}"
66
+ )
67
+ continue
68
+ if not root.grain.issubset(parent_cte.grain):
69
+ self.log(
70
+ f"Cannot inline: {parent_cte.name} is at wrong grain to inline ({root.grain} vs {parent_cte.grain})"
71
+ )
72
+ continue
73
+ to_inline.append(parent_cte)
74
+
75
+ optimized = False
76
+ for replaceable in to_inline:
77
+ if replaceable.name not in self.candidates[cte.name]:
78
+ self.candidates[cte.name].add(replaceable.name)
79
+ self.count[replaceable.source.identifier] += 1
80
+ return True
81
+ if (
82
+ self.count[replaceable.source.identifier]
83
+ > CONFIG.optimizations.constant_inline_cutoff
84
+ ):
85
+ self.log(
86
+ f"Skipping inlining raw datasource {replaceable.source.identifier} ({replaceable.name}) due to multiple references"
87
+ )
88
+ continue
89
+ if not replaceable.source.datasources[0].grain.issubset(replaceable.grain):
90
+ self.log(
91
+ f"Forcing group ({parent_cte.grain} being replaced by inlined source {root.grain})"
92
+ )
93
+ force_group = True
94
+ result = cte.inline_parent_datasource(replaceable, force_group=force_group)
95
+ if result:
96
+ self.log(
97
+ f"Inlined parent {replaceable.name} with {replaceable.source.identifier}"
98
+ )
99
+ optimized = True
100
+ else:
101
+ self.log(f"Failed to inline {replaceable.name}")
102
+ return optimized
@@ -0,0 +1,245 @@
1
+ from trilogy.core.enums import (
2
+ BooleanOperator,
3
+ SourceType,
4
+ )
5
+ from trilogy.core.models.build import (
6
+ BuildComparison,
7
+ BuildConceptArgs,
8
+ BuildConditional,
9
+ BuildDatasource,
10
+ BuildParenthetical,
11
+ BuildWindowItem,
12
+ )
13
+ from trilogy.core.models.execute import CTE, UnionCTE
14
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
15
+ from trilogy.core.processing.utility import is_scalar_condition
16
+ from trilogy.utility import unique
17
+
18
+
19
+ def is_child_of(a, comparison):
20
+ base = comparison == a
21
+ if base:
22
+ return True
23
+ if isinstance(comparison, BuildConditional):
24
+ return (
25
+ is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
26
+ ) and comparison.operator == BooleanOperator.AND
27
+ return base
28
+
29
+
30
+ class PredicatePushdown(OptimizationRule):
31
+ def __init__(self, *args, **kwargs) -> None:
32
+ super().__init__(*args, **kwargs)
33
+ self.complete: dict[str, bool] = {}
34
+
35
+ def _check_parent(
36
+ self,
37
+ cte: CTE | UnionCTE,
38
+ parent_cte: CTE | UnionCTE,
39
+ candidate: BuildConditional | BuildComparison | BuildParenthetical | None,
40
+ inverse_map: dict[str, list[CTE | UnionCTE]],
41
+ ):
42
+ if not isinstance(candidate, BuildConceptArgs):
43
+ return False
44
+ if not isinstance(parent_cte, CTE):
45
+ return False
46
+ row_conditions = {x.address for x in candidate.row_arguments}
47
+ existence_conditions = {
48
+ y.address for x in candidate.existence_arguments for y in x
49
+ }
50
+ all_inputs = {x.address for x in candidate.concept_arguments}
51
+ if is_child_of(candidate, parent_cte.condition):
52
+ return False
53
+ non_materialized = [k for k, v in parent_cte.source_map.items() if v == []]
54
+ concrete = [
55
+ x for x in parent_cte.output_columns if x.address in non_materialized
56
+ ]
57
+ if any(isinstance(x.lineage, BuildWindowItem) for x in concrete):
58
+ self.debug(
59
+ f"CTE {parent_cte.name} has window clause calculation, cannot push up to this without changing results"
60
+ )
61
+ return False
62
+ materialized = {k for k, v in parent_cte.source_map.items() if v != []}
63
+
64
+ if not row_conditions or not materialized:
65
+ return False
66
+ output_addresses = {x.address for x in parent_cte.output_columns}
67
+ # if any of the existence conditions are created on the asset, we can't push up to it
68
+ if existence_conditions and existence_conditions.intersection(output_addresses):
69
+ return False
70
+ if existence_conditions:
71
+ self.log(
72
+ f"Not pushing up existence {candidate} to {parent_cte.name} as it is a filter node"
73
+ )
74
+ if parent_cte.source.source_type == SourceType.FILTER:
75
+ return False
76
+ # if it's a root datasource, we can filter on _any_ of the output concepts
77
+ if parent_cte.is_root_datasource:
78
+ extra_check = {
79
+ x.address for x in parent_cte.source.datasources[0].output_concepts
80
+ }
81
+ if row_conditions.issubset(extra_check):
82
+ for x in row_conditions:
83
+ if x not in materialized:
84
+ materialized.add(x)
85
+ parent_cte.source_map[x] = [
86
+ parent_cte.source.datasources[0].name
87
+ ]
88
+ if row_conditions.issubset(materialized):
89
+ children = inverse_map.get(parent_cte.name, [])
90
+ if all([is_child_of(candidate, child.condition) for child in children]):
91
+ self.log(
92
+ f"All concepts [{row_conditions}] and existence conditions [{existence_conditions}] not block pushup of [{output_addresses}]found on {parent_cte.name} with existing {parent_cte.condition} and all it's {len(children)} children include same filter; pushing up {candidate}"
93
+ )
94
+ if parent_cte.condition and not is_scalar_condition(
95
+ parent_cte.condition
96
+ ):
97
+ self.log("Parent condition is not scalar, not safe to push up")
98
+ return False
99
+ if parent_cte.condition:
100
+ parent_cte.condition = BuildConditional(
101
+ left=parent_cte.condition,
102
+ operator=BooleanOperator.AND,
103
+ right=candidate,
104
+ )
105
+ else:
106
+ parent_cte.condition = candidate
107
+ # promote up existence sources
108
+ if all_inputs.difference(row_conditions):
109
+ for x in all_inputs.difference(row_conditions):
110
+ if x not in parent_cte.source_map and x in cte.source_map:
111
+ sources = [
112
+ parent
113
+ for parent in cte.parent_ctes
114
+ if parent.name in cte.source_map[x]
115
+ ]
116
+ parent_cte.source_map[x] = cte.source_map[x]
117
+ parent_cte.parent_ctes = unique(
118
+ parent_cte.parent_ctes + sources, "name"
119
+ )
120
+ return True
121
+ self.debug(
122
+ f"conditions {row_conditions} not subset of parent {parent_cte.name} parent has {materialized} "
123
+ )
124
+ return False
125
+
126
+ def optimize(
127
+ self, cte: CTE | UnionCTE, inverse_map: dict[str, list[CTE | UnionCTE]]
128
+ ) -> bool:
129
+ # TODO - pushdown through unions
130
+ if isinstance(cte, UnionCTE):
131
+ return False
132
+ optimized = False
133
+
134
+ if not cte.parent_ctes:
135
+ self.debug(f"No parent CTEs for {cte.name}")
136
+ return False
137
+
138
+ if not cte.condition:
139
+ self.debug(f"No CTE condition for {cte.name}")
140
+ return False
141
+
142
+ if self.complete.get(cte.name):
143
+ self.debug("Have done this CTE before")
144
+ return False
145
+
146
+ self.debug(
147
+ f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
148
+ )
149
+ if isinstance(cte.condition, BuildConditional):
150
+ candidates = cte.condition.decompose()
151
+ else:
152
+ candidates = [cte.condition]
153
+ self.debug(
154
+ f"Have {len(candidates)} candidates to try to push down from parent {type(cte.condition)}"
155
+ )
156
+ optimized = False
157
+ for candidate in candidates:
158
+ if not is_scalar_condition(candidate):
159
+ self.debug(
160
+ f"Skipping {candidate} as not a basic [no aggregate, etc] condition"
161
+ )
162
+ continue
163
+ self.debug(
164
+ f"Checking candidate {candidate}, {type(candidate)}, scalar: {is_scalar_condition(candidate)}"
165
+ )
166
+ for parent_cte in cte.parent_ctes:
167
+ local_pushdown = self._check_parent(
168
+ cte=cte,
169
+ parent_cte=parent_cte,
170
+ candidate=candidate,
171
+ inverse_map=inverse_map,
172
+ )
173
+ optimized = optimized or local_pushdown
174
+ if local_pushdown:
175
+ # taint a CTE again when something is pushed up to it.
176
+ self.complete[parent_cte.name] = False
177
+ self.debug(
178
+ f"Pushed down {candidate} from {cte.name} to {parent_cte.name}"
179
+ )
180
+
181
+ self.complete[cte.name] = True
182
+ return optimized
183
+
184
+
185
+ class PredicatePushdownRemove(OptimizationRule):
186
+ def __init__(self, *args, **kwargs) -> None:
187
+ super().__init__(*args, **kwargs)
188
+ self.complete: dict[str, bool] = {}
189
+
190
+ def optimize(
191
+ self, cte: CTE | UnionCTE, inverse_map: dict[str, list[CTE | UnionCTE]]
192
+ ) -> bool:
193
+ if isinstance(cte, UnionCTE):
194
+ return False
195
+ optimized = False
196
+
197
+ if not cte.parent_ctes:
198
+ self.debug(f"No parent CTEs for {cte.name}")
199
+
200
+ return False
201
+
202
+ if not cte.condition:
203
+ self.debug(f"No CTE condition for {cte.name}")
204
+ return False
205
+
206
+ parent_filter_status = {
207
+ parent.name: is_child_of(cte.condition, parent.condition)
208
+ for parent in cte.parent_ctes
209
+ }
210
+ # flatten existnce argument tuples to a list
211
+
212
+ flattened_existence = [
213
+ x.address for y in cte.condition.existence_arguments for x in y
214
+ ]
215
+
216
+ existence_only = [
217
+ parent.name
218
+ for parent in cte.parent_ctes
219
+ if all([x.address in flattened_existence for x in parent.output_columns])
220
+ and len(flattened_existence) > 0
221
+ ]
222
+ if all(
223
+ [
224
+ value
225
+ for key, value in parent_filter_status.items()
226
+ if key not in existence_only
227
+ ]
228
+ ) and not any([isinstance(x, BuildDatasource) for x in cte.source.datasources]):
229
+ self.log(
230
+ f"All parents of {cte.name} have same filter or are existence only inputs, removing filter from {cte.name}"
231
+ )
232
+ cte.condition = None
233
+ # remove any "parent" CTEs that provided only existence inputs
234
+ if existence_only:
235
+ original = [y.name for y in cte.parent_ctes]
236
+ cte.parent_ctes = [
237
+ x for x in cte.parent_ctes if x.name not in existence_only
238
+ ]
239
+ self.log(
240
+ f"new parents for {cte.name} are {[x.name for x in cte.parent_ctes]}, vs {original}"
241
+ )
242
+ return True
243
+
244
+ self.complete[cte.name] = True
245
+ return optimized