pytrilogy 0.0.1.109__py3-none-any.whl → 0.0.1.110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.109
3
+ Version: 0.0.1.110
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,6 +1,6 @@
1
- trilogy/__init__.py,sha256=e2cU9lfgy43E_xTR0vpKBnQaJvz7-8qF7hjum9oQk-k,292
1
+ trilogy/__init__.py,sha256=zz6RmV7fp8n1Ezl-E5jUW92Ns-EMfac6jy3fX5zQuzc,292
2
2
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- trilogy/constants.py,sha256=LxiK2TiVQPEa6tXkxWk9DJHOR3zsGNSqgQuqtOf66cw,518
3
+ trilogy/constants.py,sha256=C6mwa0BpVyENkcWi-dwqHorCP85ScTXDemfHFiSmLQ8,737
4
4
  trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
5
5
  trilogy/executor.py,sha256=xF6wzbhP6a3wz4nrxsRCKeKF7qytUQEL75oI3BGJ2hQ,8744
6
6
  trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
@@ -8,7 +8,7 @@ trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
9
9
  trilogy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  trilogy/core/constants.py,sha256=LL8NLvxb3HRnAjvofyLRXqQJijLcYiXAQYQzGarVD-g,128
11
- trilogy/core/enums.py,sha256=KEZQTzJ8tlGIukuUwQUIG1FTHOP1B4i0EeCgFjfsbDw,5394
11
+ trilogy/core/enums.py,sha256=XSvq2yPxn9oJ18nhn7UERgIV1IXZDRiSWaGpvtU34eE,5416
12
12
  trilogy/core/env_processor.py,sha256=SU-jpaGfoWLe9sGTeQYG1qjVnwGQ7TwctmnJRlfzluc,1459
13
13
  trilogy/core/environment_helpers.py,sha256=mzBDHhdF9ssZ_-LY8CcaM_ddfJavkpRYrFImUd3cjXI,5972
14
14
  trilogy/core/ergonomics.py,sha256=w3gwXdgrxNHCuaRdyKg73t6F36tj-wIjQf47WZkHmJk,1465
@@ -16,8 +16,8 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
16
16
  trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
17
17
  trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
18
18
  trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
19
- trilogy/core/models.py,sha256=AwVZNiDN1hM0BeEquEyrfTnuVBPDR8UuTnoFUhAaqUo,109648
20
- trilogy/core/optimization.py,sha256=chfzpLVJo9eg8H4e2hdnpRqWMDTQ3tJWPdDfGESa-EU,4510
19
+ trilogy/core/models.py,sha256=FNAMbqJrHh-KfC8QbBe3anWVdNxP-uMjs--MrbJM8QM,109943
20
+ trilogy/core/optimization.py,sha256=5n5HMGGdTGkDZf479lZPpv2angLSxklZJ0D4DBrFDeA,8411
21
21
  trilogy/core/query_processor.py,sha256=6BqLYPwyFkRtueTIRFZi3IcVFTpbpGRNowayhSn3_AY,11805
22
22
  trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  trilogy/core/processing/concept_strategies_v3.py,sha256=27lZXFLgDEF3sh2MUR7HX_atVz7TC1fJB7z3oxa1TcY,22610
@@ -32,7 +32,7 @@ trilogy/core/processing/node_generators/group_node.py,sha256=xWI1xNIXEOj6jlRGD9h
32
32
  trilogy/core/processing/node_generators/group_to_node.py,sha256=BzPdYwzoo8gRMH7BDffTTXq4z-mjfCEzvfB5I-P0_nw,2941
33
33
  trilogy/core/processing/node_generators/multiselect_node.py,sha256=vP84dnLQy6dtypi6mUbt9sMAcmmrTgQ1Oz4GI6X1IEo,6421
34
34
  trilogy/core/processing/node_generators/node_merge_node.py,sha256=sQQ9jhw1oAJh649DBAJX6U7r_E_piFS95mxKvm7pxqQ,5818
35
- trilogy/core/processing/node_generators/rowset_node.py,sha256=zlSRd58V4fDqz1Km65cWblOrEFpXAT3jlSvv6NKC3pc,4909
35
+ trilogy/core/processing/node_generators/rowset_node.py,sha256=BYTpXyiFJwoFp_n8kKE-HUdnwgaK_58n0rSFOz2jOVM,5141
36
36
  trilogy/core/processing/node_generators/select_node.py,sha256=xeCqIUEubrf3u_QQfbGdf1BG4fO0HYQ64hiFur8NUqY,20080
37
37
  trilogy/core/processing/node_generators/unnest_node.py,sha256=s1VXQZSf1LnX3ISeQ5JzmzmCKUw30-5OK_f0YTB9_48,1031
38
38
  trilogy/core/processing/node_generators/window_node.py,sha256=ekazi5eXxnShpcp-qukXNG4DHFdULoXrX-YWUWLNEpM,2527
@@ -40,7 +40,7 @@ trilogy/core/processing/nodes/__init__.py,sha256=gzKxGSduIQ5QwpMWrmwSYiE8sg2mWej
40
40
  trilogy/core/processing/nodes/base_node.py,sha256=Du7hRjVVOAiGb0okytzKIa_TQqhwTNYGU8PGNnrE1xs,9142
41
41
  trilogy/core/processing/nodes/filter_node.py,sha256=DqSRv8voEajPZqzeeiIsxuv4ubvsmeQcCW6x_v2CmOk,1359
42
42
  trilogy/core/processing/nodes/group_node.py,sha256=Y_NWB_AwFrE-YithjZ7lYYDN4e0el4su3ICq2EIr3HA,3837
43
- trilogy/core/processing/nodes/merge_node.py,sha256=baLDHCJiX5tk1dsVTm1KebJKPyy1w3WGMfN5wdm0BRw,12759
43
+ trilogy/core/processing/nodes/merge_node.py,sha256=uo1AfLjA02EiIpJku8T0TBjN2D-IyP6NVq5m7BfiZbQ,12413
44
44
  trilogy/core/processing/nodes/select_node_v2.py,sha256=tAADeVruch-flFiedbY1zi7ukMG2RpWecvxxZ5aL3ZU,6354
45
45
  trilogy/core/processing/nodes/unnest_node.py,sha256=t4kY3a_dR3iXistPemStfdw0uJfnxwTcoQg1HiDa3xo,1501
46
46
  trilogy/core/processing/nodes/window_node.py,sha256=QjAWgqBZqFSRCPwc7JBmgQJobWW50rsHI0pjJe0Zzg0,926
@@ -69,9 +69,9 @@ trilogy/parsing/parse_engine.py,sha256=iOqKUCyLeHyFVwwAt-XTSJGHia4zzLUN6bYDuIfJ1
69
69
  trilogy/parsing/render.py,sha256=fxjpq2FZLgllw_d4cru-t_IXNPAz2DmYkT7v9ED0XRI,11540
70
70
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
72
- pytrilogy-0.0.1.109.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
73
- pytrilogy-0.0.1.109.dist-info/METADATA,sha256=-m-LMyjvq4whCY7LpRM9zFqpn3hqH4-DuuP0JYOFfwQ,7882
74
- pytrilogy-0.0.1.109.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
75
- pytrilogy-0.0.1.109.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
76
- pytrilogy-0.0.1.109.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
77
- pytrilogy-0.0.1.109.dist-info/RECORD,,
72
+ pytrilogy-0.0.1.110.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
73
+ pytrilogy-0.0.1.110.dist-info/METADATA,sha256=FbH2jc_Eg9QjfxlbLhTAqBzFoCxe4aM-6EbE4HDC4TM,7882
74
+ pytrilogy-0.0.1.110.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
75
+ pytrilogy-0.0.1.110.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
76
+ pytrilogy-0.0.1.110.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
77
+ pytrilogy-0.0.1.110.dist-info/RECORD,,
trilogy/__init__.py CHANGED
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.1.109"
7
+ __version__ = "0.0.1.110"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
trilogy/constants.py CHANGED
@@ -1,8 +1,8 @@
1
1
  from logging import getLogger
2
- from dataclasses import dataclass
2
+ from dataclasses import dataclass, field
3
3
  from enum import Enum
4
4
 
5
- logger = getLogger("preql")
5
+ logger = getLogger("trilogy")
6
6
 
7
7
  DEFAULT_NAMESPACE = "local"
8
8
 
@@ -18,12 +18,20 @@ class MagicConstants(Enum):
18
18
  NULL_VALUE = MagicConstants.NULL
19
19
 
20
20
 
21
+ @dataclass
22
+ class Optimizations:
23
+ predicate_pushdown: bool = True
24
+ datasource_inlining: bool = True
25
+ direct_return: bool = True
26
+
27
+
21
28
  # TODO: support loading from environments
22
29
  @dataclass
23
30
  class Config:
24
31
  strict_mode: bool = True
25
32
  human_identifiers: bool = True
26
33
  inline_datasources: bool = True
34
+ optimizations: Optimizations = field(default_factory=Optimizations)
27
35
 
28
36
 
29
37
  CONFIG = Config()
trilogy/core/enums.py CHANGED
@@ -263,6 +263,7 @@ class SourceType(Enum):
263
263
  WINDOW = "window"
264
264
  UNNEST = "unnest"
265
265
  CONSTANT = "constant"
266
+ ROWSET = "rowset"
266
267
 
267
268
 
268
269
  class ShowCategory(Enum):
trilogy/core/models.py CHANGED
@@ -1941,6 +1941,9 @@ class QueryDatasource(BaseModel):
1941
1941
  ),
1942
1942
  join_derived_concepts=self.join_derived_concepts,
1943
1943
  force_group=self.force_group,
1944
+ hidden_concepts=unique(
1945
+ self.hidden_concepts + other.hidden_concepts, "address"
1946
+ ),
1944
1947
  )
1945
1948
 
1946
1949
  return qds
@@ -2101,6 +2104,9 @@ class CTE(BaseModel):
2101
2104
  self.source.output_concepts = unique(
2102
2105
  self.source.output_concepts + other.source.output_concepts, "address"
2103
2106
  )
2107
+ self.hidden_concepts = unique(
2108
+ self.hidden_concepts + other.hidden_concepts, "address"
2109
+ )
2104
2110
  return self
2105
2111
 
2106
2112
  @property
@@ -2728,6 +2734,9 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2728
2734
  def __repr__(self):
2729
2735
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
2730
2736
 
2737
+ def __str__(self):
2738
+ return self.__repr__()
2739
+
2731
2740
  def with_namespace(self, namespace: str):
2732
2741
  return self.__class__(
2733
2742
  left=(
@@ -4,24 +4,32 @@ from trilogy.core.models import (
4
4
  PersistStatement,
5
5
  Datasource,
6
6
  MultiSelectStatement,
7
+ Conditional,
8
+ BooleanOperator,
7
9
  )
8
10
  from trilogy.core.enums import PurposeLineage
9
- from trilogy.constants import logger
11
+ from trilogy.constants import logger, CONFIG
10
12
  from abc import ABC
11
13
 
12
14
 
15
+ REGISTERED_RULES: list["OptimizationRule"] = []
16
+
17
+
13
18
  class OptimizationRule(ABC):
14
19
 
15
- def optimize(self, cte: CTE) -> bool:
20
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
16
21
  raise NotImplementedError
17
22
 
18
23
  def log(self, message: str):
19
24
  logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
20
25
 
26
+ def debug(self, message: str):
27
+ logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
28
+
21
29
 
22
30
  class InlineDatasource(OptimizationRule):
23
31
 
24
- def optimize(self, cte: CTE) -> bool:
32
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
25
33
  if not cte.parent_ctes:
26
34
  return False
27
35
 
@@ -60,10 +68,98 @@ class InlineDatasource(OptimizationRule):
60
68
  return optimized
61
69
 
62
70
 
63
- REGISTERED_RULES: list[OptimizationRule] = [InlineDatasource()]
71
+ # This will be used in the future for more complex condition decomposition
72
+ def decompose_condition(conditional: Conditional):
73
+ chunks = []
74
+ if conditional.operator == BooleanOperator.AND:
75
+ for val in [conditional.left, conditional.right]:
76
+ if isinstance(val, Conditional):
77
+ chunks.extend(decompose_condition(val))
78
+ else:
79
+ chunks.append(val)
80
+ else:
81
+ chunks.append(conditional)
82
+ return chunks
83
+
84
+
85
+ def is_child_of(a, comparison):
86
+ if isinstance(comparison, Conditional):
87
+ return (
88
+ is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
89
+ ) and comparison.operator == BooleanOperator.AND
90
+ return comparison == a
91
+
92
+
93
+ class PredicatePushdown(OptimizationRule):
94
+
95
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
96
+
97
+ if not cte.parent_ctes:
98
+ self.debug(f"No parent CTEs for {cte.name}")
99
+
100
+ return False
101
+
102
+ optimized = False
103
+ if not cte.condition:
104
+ self.debug(f"No CTE condition for {cte.name}")
105
+ return False
106
+ self.log(
107
+ f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
108
+ )
109
+ if isinstance(cte.condition, Conditional):
110
+ candidates = decompose_condition(cte.condition)
111
+ else:
112
+ candidates = [cte.condition]
113
+ logger.info(f"Have {len(candidates)} candidates to try to push down")
114
+ for candidate in candidates:
115
+ conditions = {x.address for x in candidate.concept_arguments}
116
+ for parent_cte in cte.parent_ctes:
117
+ materialized = {k for k, v in parent_cte.source_map.items() if v != ""}
118
+ if conditions.issubset(materialized):
119
+ if all(
120
+ [
121
+ is_child_of(candidate, child.condition)
122
+ for child in inverse_map[parent_cte.name]
123
+ ]
124
+ ):
125
+ self.log(
126
+ f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
127
+ )
128
+ if parent_cte.condition:
129
+ parent_cte.condition = Conditional(
130
+ left=parent_cte.condition,
131
+ operator=BooleanOperator.AND,
132
+ right=candidate,
133
+ )
134
+ else:
135
+ parent_cte.condition = candidate
136
+ optimized = True
137
+ else:
138
+ logger.info("conditions not subset of parent materialized")
139
+
140
+ if all(
141
+ [
142
+ is_child_of(cte.condition, parent_cte.condition)
143
+ for parent_cte in cte.parent_ctes
144
+ ]
145
+ ):
146
+ self.log("All parents have same filter, removing filter")
147
+ cte.condition = None
148
+ optimized = True
149
+
150
+ return optimized
151
+
152
+
153
+ if CONFIG.optimizations.datasource_inlining:
154
+ REGISTERED_RULES.append(InlineDatasource())
155
+ if CONFIG.optimizations.predicate_pushdown:
156
+ REGISTERED_RULES.append(PredicatePushdown())
64
157
 
65
158
 
66
- def filter_irrelevant_ctes(input: list[CTE], root_cte: CTE):
159
+ def filter_irrelevant_ctes(
160
+ input: list[CTE],
161
+ root_cte: CTE,
162
+ ):
67
163
  relevant_ctes = set()
68
164
 
69
165
  def recurse(cte: CTE):
@@ -75,6 +171,16 @@ def filter_irrelevant_ctes(input: list[CTE], root_cte: CTE):
75
171
  return [cte for cte in input if cte.name in relevant_ctes]
76
172
 
77
173
 
174
+ def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
175
+ inverse_map: dict[str, list[CTE]] = {}
176
+ for cte in input:
177
+ for parent in cte.parent_ctes:
178
+ if parent.name not in inverse_map:
179
+ inverse_map[parent.name] = []
180
+ inverse_map[parent.name].append(cte)
181
+ return inverse_map
182
+
183
+
78
184
  def is_direct_return_eligible(
79
185
  cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
80
186
  ) -> bool:
@@ -126,7 +232,8 @@ def optimize_ctes(
126
232
  actions_taken = False
127
233
  for rule in REGISTERED_RULES:
128
234
  for cte in input:
129
- actions_taken = rule.optimize(cte)
235
+ inverse_map = gen_inverse_map(input)
236
+ actions_taken = rule.optimize(cte, inverse_map)
130
237
  complete = not actions_taken
131
238
 
132
239
  if is_direct_return_eligible(root_cte, select):
@@ -12,7 +12,7 @@ from typing import List
12
12
 
13
13
  from trilogy.core.enums import JoinType, PurposeLineage
14
14
  from trilogy.constants import logger
15
- from trilogy.core.processing.utility import padding
15
+ from trilogy.core.processing.utility import padding, unique
16
16
  from trilogy.core.processing.node_generators.common import concept_to_relevant_joins
17
17
 
18
18
 
@@ -40,7 +40,7 @@ def gen_rowset_node(
40
40
  else:
41
41
  targets = select.output_components
42
42
  node: StrategyNode = source_concepts(
43
- mandatory_list=targets,
43
+ mandatory_list=unique(targets, "address"),
44
44
  environment=environment,
45
45
  g=g,
46
46
  depth=depth + 1,
@@ -52,14 +52,19 @@ def gen_rowset_node(
52
52
  )
53
53
  return None
54
54
  node.conditions = select.where_clause.conditional if select.where_clause else None
55
- # rebuild any cached info with the new condition clause
56
-
57
55
  enrichment = set([x.address for x in local_optional])
58
56
  rowset_relevant = [
59
57
  x
60
58
  for x in rowset.derived_concepts
61
59
  # if x.address == concept.address or x.address in enrichment
62
60
  ]
61
+ select_hidden = set([x.address for x in select.hidden_components])
62
+ rowset_hidden = [
63
+ x
64
+ for x in rowset.derived_concepts
65
+ if isinstance(x.lineage, RowsetItem)
66
+ and x.lineage.content.address in select_hidden
67
+ ]
63
68
  additional_relevant = [
64
69
  x for x in select.output_components if x.address in enrichment
65
70
  ]
@@ -71,7 +76,7 @@ def gen_rowset_node(
71
76
  if select.where_clause:
72
77
  for item in additional_relevant:
73
78
  node.partial_concepts.append(item)
74
- node.hidden_concepts = [
79
+ node.hidden_concepts = rowset_hidden + [
75
80
  x
76
81
  for x in node.output_concepts
77
82
  if x.address not in [y.address for y in local_optional + [concept]]
@@ -282,6 +282,7 @@ class MergeNode(StrategyNode):
282
282
  if c.address in [x.address for x in self.output_concepts]
283
283
  ]
284
284
  )
285
+
285
286
  logger.info(
286
287
  f"{self.logging_prefix}{LOGGER_PREFIX} has pre grain {pregrain} and final merge node grain {grain}"
287
288
  )
@@ -307,9 +308,6 @@ class MergeNode(StrategyNode):
307
308
  f"{self.logging_prefix}{LOGGER_PREFIX} no parents include full grain {grain} and pregrain {pregrain} does not match, assume must group to grain. Have {[str(d.grain) for d in final_datasets]}"
308
309
  )
309
310
  force_group = True
310
- # Grain<returns.customer.id,returns.store.id,returns.item.id,returns.store_sales.ticket_number>
311
- # Grain<returns.customer.id,returns.store.id,returns.return_date.id,returns.item.id,returns.store_sales.ticket_number>
312
- # Grain<returns.customer.id,returns.store.id,returns.item.id,returns.store_sales.ticket_number>
313
311
  else:
314
312
  force_group = None
315
313