pytrilogy 0.0.2.57__py3-none-any.whl → 0.0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1845 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +696 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +17 -22
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +181 -146
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +51 -45
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +13 -10
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +59 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +19 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +92 -77
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +55 -40
  45. trilogy/core/processing/nodes/merge_node.py +47 -38
  46. trilogy/core/processing/nodes/select_node_v2.py +54 -40
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +108 -80
  51. trilogy/core/query_processor.py +67 -49
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +152 -111
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/base_hook.py +6 -4
  65. trilogy/hooks/query_debugger.py +110 -93
  66. trilogy/parser.py +1 -1
  67. trilogy/parsing/common.py +303 -64
  68. trilogy/parsing/parse_engine.py +263 -617
  69. trilogy/parsing/render.py +50 -26
  70. trilogy/scripts/trilogy.py +2 -1
  71. pytrilogy-0.0.2.57.dist-info/RECORD +0 -87
  72. trilogy/core/models.py +0 -4960
  73. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
  74. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
  75. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,9 @@
1
1
  from trilogy.constants import CONFIG, logger
2
- from trilogy.core.enums import BooleanOperator, PurposeLineage
3
- from trilogy.core.models import (
4
- CTE,
5
- Conditional,
6
- MultiSelectStatement,
7
- SelectStatement,
8
- UnionCTE,
2
+ from trilogy.core.enums import BooleanOperator, Derivation
3
+ from trilogy.core.models.build import (
4
+ BuildConditional,
9
5
  )
6
+ from trilogy.core.models.execute import CTE, UnionCTE
10
7
  from trilogy.core.optimizations import (
11
8
  InlineConstant,
12
9
  InlineDatasource,
@@ -15,6 +12,7 @@ from trilogy.core.optimizations import (
15
12
  PredicatePushdownRemove,
16
13
  )
17
14
  from trilogy.core.processing.utility import sort_select_output
15
+ from trilogy.core.statements.author import MultiSelectStatement, SelectStatement
18
16
 
19
17
  MAX_OPTIMIZATION_LOOPS = 100
20
18
 
@@ -136,33 +134,29 @@ def is_direct_return_eligible(cte: CTE | UnionCTE) -> CTE | UnionCTE | None:
136
134
 
137
135
  assert isinstance(cte, CTE)
138
136
  derived_concepts = [
139
- c
140
- for c in cte.source.output_concepts + cte.source.hidden_concepts
141
- if c not in cte.source.input_concepts
137
+ c for c in cte.source.output_concepts if c not in cte.source.input_concepts
142
138
  ]
143
139
 
144
140
  parent_derived_concepts = [
145
141
  c
146
142
  for c in direct_parent.source.output_concepts
147
- + direct_parent.source.hidden_concepts
148
143
  if c not in direct_parent.source.input_concepts
149
144
  ]
150
145
  condition_arguments = cte.condition.row_arguments if cte.condition else []
151
146
  for x in derived_concepts:
152
- if x.derivation == PurposeLineage.WINDOW:
147
+ if x.derivation == Derivation.WINDOW:
153
148
  return None
154
- if x.derivation == PurposeLineage.UNNEST:
149
+ if x.derivation == Derivation.UNNEST:
155
150
  return None
156
- if x.derivation == PurposeLineage.AGGREGATE:
151
+ if x.derivation == Derivation.AGGREGATE:
157
152
  return None
158
153
  for x in parent_derived_concepts:
159
154
  if x.address not in condition_arguments:
160
155
  continue
161
- if x.derivation == PurposeLineage.UNNEST:
156
+ if x.derivation == Derivation.UNNEST:
162
157
  return None
163
- if x.derivation == PurposeLineage.WINDOW:
158
+ if x.derivation == Derivation.WINDOW:
164
159
  return None
165
-
166
160
  logger.info(
167
161
  f"[Optimization][EarlyReturn] Removing redundant output CTE with derived_concepts {[x.address for x in derived_concepts]}"
168
162
  )
@@ -180,12 +174,12 @@ def optimize_ctes(
180
174
  ):
181
175
  direct_parent.order_by = root_cte.order_by
182
176
  direct_parent.limit = root_cte.limit
183
- direct_parent.hidden_concepts = (
184
- root_cte.hidden_concepts + direct_parent.hidden_concepts
177
+ direct_parent.hidden_concepts = root_cte.hidden_concepts.union(
178
+ direct_parent.hidden_concepts
185
179
  )
186
180
  if root_cte.condition:
187
181
  if direct_parent.condition:
188
- direct_parent.condition = Conditional(
182
+ direct_parent.condition = BuildConditional(
189
183
  left=direct_parent.condition,
190
184
  operator=BooleanOperator.AND,
191
185
  right=root_cte.condition,
@@ -197,14 +191,15 @@ def optimize_ctes(
197
191
  sort_select_output(root_cte, select)
198
192
 
199
193
  REGISTERED_RULES: list["OptimizationRule"] = []
200
- if CONFIG.optimizations.constant_inlining:
201
- REGISTERED_RULES.append(InlineConstant())
194
+
202
195
  if CONFIG.optimizations.datasource_inlining:
203
196
  REGISTERED_RULES.append(InlineDatasource())
204
197
  if CONFIG.optimizations.predicate_pushdown:
205
198
  REGISTERED_RULES.append(PredicatePushdown())
206
199
  if CONFIG.optimizations.predicate_pushdown:
207
200
  REGISTERED_RULES.append(PredicatePushdownRemove())
201
+ if CONFIG.optimizations.constant_inlining:
202
+ REGISTERED_RULES.append(InlineConstant())
208
203
  for rule in REGISTERED_RULES:
209
204
  loops = 0
210
205
  complete = False
@@ -1,7 +1,7 @@
1
1
  from abc import ABC
2
2
 
3
3
  from trilogy.constants import logger
4
- from trilogy.core.models import CTE, UnionCTE
4
+ from trilogy.core.models.execute import CTE, UnionCTE
5
5
 
6
6
 
7
7
  class OptimizationRule(ABC):
@@ -1,7 +1,7 @@
1
- from trilogy.core.enums import PurposeLineage
2
- from trilogy.core.models import (
1
+ from trilogy.core.enums import Derivation
2
+ from trilogy.core.models.build import BuildConcept
3
+ from trilogy.core.models.execute import (
3
4
  CTE,
4
- Concept,
5
5
  UnionCTE,
6
6
  )
7
7
  from trilogy.core.optimizations.base_optimization import OptimizationRule
@@ -14,11 +14,11 @@ class InlineConstant(OptimizationRule):
14
14
  if isinstance(cte, UnionCTE):
15
15
  return any(self.optimize(x, inverse_map) for x in cte.internal_ctes)
16
16
 
17
- to_inline: list[Concept] = []
17
+ to_inline: list[BuildConcept] = []
18
18
  for x in cte.source.input_concepts:
19
19
  if x.address not in cte.source_map:
20
20
  continue
21
- if x.derivation == PurposeLineage.CONSTANT:
21
+ if x.derivation == Derivation.CONSTANT:
22
22
  self.log(f"Found constant {x.address} on {cte.name}")
23
23
  to_inline.append(x)
24
24
  if to_inline:
@@ -27,7 +27,7 @@ class InlineConstant(OptimizationRule):
27
27
  self.log(f"Attempting to inline constant {c.address} on {cte.name}")
28
28
  test = cte.inline_constant(c)
29
29
  if test:
30
- self.log(f"Successfully inlined constant to {cte.name}")
30
+ self.log(f"Successfully inlined constant {c.address} to {cte.name}")
31
31
  inlined = True
32
32
  else:
33
33
  self.log(f"Could not inline constant to {cte.name}")
@@ -1,9 +1,11 @@
1
1
  from collections import defaultdict
2
2
 
3
3
  from trilogy.constants import CONFIG
4
- from trilogy.core.models import (
4
+
5
+ # from trilogy.core.models.datasource import Datasource
6
+ from trilogy.core.models.build import BuildDatasource
7
+ from trilogy.core.models.execute import (
5
8
  CTE,
6
- Datasource,
7
9
  UnionCTE,
8
10
  )
9
11
  from trilogy.core.optimizations.base_optimization import OptimizationRule
@@ -35,21 +37,25 @@ class InlineDatasource(OptimizationRule):
35
37
  if isinstance(parent_cte, UnionCTE):
36
38
  continue
37
39
  if not parent_cte.is_root_datasource:
38
- self.debug(f"parent {parent_cte.name} is not root")
40
+ self.debug(f"Cannot inline: parent {parent_cte.name} is not root")
39
41
  continue
40
42
  if parent_cte.parent_ctes:
41
- self.debug(f"parent {parent_cte.name} has parents")
43
+ self.debug(f"Cannot inline: parent {parent_cte.name} has parents")
42
44
  continue
43
45
  if parent_cte.condition:
44
- self.debug(f"parent {parent_cte.name} has condition, cannot be inlined")
46
+ self.debug(
47
+ f"Cannot inline: parent {parent_cte.name} has condition, cannot be inlined"
48
+ )
45
49
  continue
46
50
  raw_root = parent_cte.source.datasources[0]
47
- if not isinstance(raw_root, Datasource):
48
- self.debug(f"Parent {parent_cte.name} is not datasource")
51
+ if not isinstance(raw_root, BuildDatasource):
52
+ self.debug(f"Cannot inline: Parent {parent_cte.name} is not datasource")
49
53
  continue
50
- root: Datasource = raw_root
54
+ root: BuildDatasource = raw_root
51
55
  if not root.can_be_inlined:
52
- self.debug(f"Parent {parent_cte.name} datasource is not inlineable")
56
+ self.debug(
57
+ f"Cannot inline: Parent {parent_cte.name} datasource is not inlineable"
58
+ )
53
59
  continue
54
60
  root_outputs = {x.address for x in root.output_concepts}
55
61
  inherited = {
@@ -58,12 +64,12 @@ class InlineDatasource(OptimizationRule):
58
64
  if not inherited.issubset(root_outputs):
59
65
  cte_missing = inherited - root_outputs
60
66
  self.log(
61
- f"Not all {parent_cte.name} require inputs are found on datasource, missing {cte_missing}"
67
+ f"Cannot inline: Not all required inputs to {parent_cte.name} are found on datasource, missing {cte_missing}"
62
68
  )
63
69
  continue
64
70
  if not root.grain.issubset(parent_cte.grain):
65
71
  self.log(
66
- f"{parent_cte.name} is at wrong grain to inline ({root.grain} vs {parent_cte.grain})"
72
+ f"Cannot inline: {parent_cte.name} is at wrong grain to inline ({root.grain} vs {parent_cte.grain})"
67
73
  )
68
74
  continue
69
75
  to_inline.append(parent_cte)
@@ -1,14 +1,15 @@
1
- from trilogy.core.models import (
2
- CTE,
1
+ from trilogy.core.enums import (
3
2
  BooleanOperator,
4
- Comparison,
5
- ConceptArgs,
6
- Conditional,
7
- Datasource,
8
- Parenthetical,
9
- UnionCTE,
10
- WindowItem,
11
3
  )
4
+ from trilogy.core.models.build import (
5
+ BuildComparison,
6
+ BuildConceptArgs,
7
+ BuildConditional,
8
+ BuildDatasource,
9
+ BuildParenthetical,
10
+ BuildWindowItem,
11
+ )
12
+ from trilogy.core.models.execute import CTE, UnionCTE
12
13
  from trilogy.core.optimizations.base_optimization import OptimizationRule
13
14
  from trilogy.core.processing.utility import is_scalar_condition
14
15
  from trilogy.utility import unique
@@ -18,7 +19,7 @@ def is_child_of(a, comparison):
18
19
  base = comparison == a
19
20
  if base:
20
21
  return True
21
- if isinstance(comparison, Conditional):
22
+ if isinstance(comparison, BuildConditional):
22
23
  return (
23
24
  is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
24
25
  ) and comparison.operator == BooleanOperator.AND
@@ -34,10 +35,10 @@ class PredicatePushdown(OptimizationRule):
34
35
  self,
35
36
  cte: CTE | UnionCTE,
36
37
  parent_cte: CTE | UnionCTE,
37
- candidate: Conditional | Comparison | Parenthetical | None,
38
+ candidate: BuildConditional | BuildComparison | BuildParenthetical | None,
38
39
  inverse_map: dict[str, list[CTE | UnionCTE]],
39
40
  ):
40
- if not isinstance(candidate, ConceptArgs):
41
+ if not isinstance(candidate, BuildConceptArgs):
41
42
  return False
42
43
  if not isinstance(parent_cte, CTE):
43
44
  return False
@@ -52,7 +53,7 @@ class PredicatePushdown(OptimizationRule):
52
53
  concrete = [
53
54
  x for x in parent_cte.output_columns if x.address in non_materialized
54
55
  ]
55
- if any(isinstance(x.lineage, WindowItem) for x in concrete):
56
+ if any(isinstance(x.lineage, BuildWindowItem) for x in concrete):
56
57
  self.debug(
57
58
  f"CTE {parent_cte.name} has window clause calculation, cannot push up to this without changing results"
58
59
  )
@@ -88,7 +89,7 @@ class PredicatePushdown(OptimizationRule):
88
89
  self.log("Parent condition is not scalar, not safe to push up")
89
90
  return False
90
91
  if parent_cte.condition:
91
- parent_cte.condition = Conditional(
92
+ parent_cte.condition = BuildConditional(
92
93
  left=parent_cte.condition,
93
94
  operator=BooleanOperator.AND,
94
95
  right=candidate,
@@ -137,7 +138,7 @@ class PredicatePushdown(OptimizationRule):
137
138
  self.debug(
138
139
  f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
139
140
  )
140
- if isinstance(cte.condition, Conditional):
141
+ if isinstance(cte.condition, BuildConditional):
141
142
  candidates = cte.condition.decompose()
142
143
  else:
143
144
  candidates = [cte.condition]
@@ -216,7 +217,7 @@ class PredicatePushdownRemove(OptimizationRule):
216
217
  for key, value in parent_filter_status.items()
217
218
  if key not in existence_only
218
219
  ]
219
- ) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
220
+ ) and not any([isinstance(x, BuildDatasource) for x in cte.source.datasources]):
220
221
  self.log(
221
222
  f"All parents of {cte.name} have same filter or are existence only inputs, removing filter from {cte.name}"
222
223
  )