pytrilogy 0.0.1.115__py3-none-any.whl → 0.0.1.116__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.115
3
+ Version: 0.0.1.116
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -15,17 +15,17 @@ Description-Content-Type: text/markdown
15
15
  License-File: LICENSE.md
16
16
  Requires-Dist: lark
17
17
  Requires-Dist: jinja2
18
- Requires-Dist: sqlalchemy <2.0.0
18
+ Requires-Dist: sqlalchemy<2.0.0
19
19
  Requires-Dist: networkx
20
20
  Requires-Dist: pyodbc
21
21
  Requires-Dist: pydantic
22
22
  Requires-Dist: duckdb-engine
23
23
  Provides-Extra: bigquery
24
- Requires-Dist: sqlalchemy-bigquery ; extra == 'bigquery'
24
+ Requires-Dist: sqlalchemy-bigquery; extra == "bigquery"
25
25
  Provides-Extra: postgres
26
- Requires-Dist: psycopg2-binary ; extra == 'postgres'
26
+ Requires-Dist: psycopg2-binary; extra == "postgres"
27
27
  Provides-Extra: snowflake
28
- Requires-Dist: snowflake-sqlalchemy ; extra == 'snowflake'
28
+ Requires-Dist: snowflake-sqlalchemy; extra == "snowflake"
29
29
 
30
30
  ## Trilogy
31
31
  [![Website](https://img.shields.io/badge/INTRO-WEB-orange?)](https://trilogydata.dev/)
@@ -1,8 +1,8 @@
1
- trilogy/__init__.py,sha256=FLkaJwdfgAlanF07q1NrFtMnJiCyOuGWPWbUpfXnttg,292
1
+ trilogy/__init__.py,sha256=0jWazkuszqLHMaM49JtN8aEur-f9lwCZ5PqZbNhO10E,292
2
2
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- trilogy/constants.py,sha256=DJi3ESttmvqgy6fPRXiaQzqJVye6jYwf6XM89NHv0_M,735
3
+ trilogy/constants.py,sha256=u2dNxhwy0v-6HrvG1GcpDVvuhzdTH5fuyYNCxDPlr2E,770
4
4
  trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
5
- trilogy/executor.py,sha256=_ZbjrKsUdWL52tWgpxqZnmccAuPXcIPEPN_dDSLNeAQ,9696
5
+ trilogy/executor.py,sha256=auuDykCHeqlRWIHOfBfgIIIntEctWaUC-VPJr1DQbYk,10217
6
6
  trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
7
7
  trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
@@ -16,9 +16,14 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
16
16
  trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
17
17
  trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
18
18
  trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
19
- trilogy/core/models.py,sha256=2qkebdlVsHeGp1foupR6LJ5q9YewlZcZq6utxdLBqIw,111565
20
- trilogy/core/optimization.py,sha256=942MnGRzscAHcG9LsfMslIRRQBslbIiPHnAvJ3w8YRg,9157
19
+ trilogy/core/models.py,sha256=_k_ZqrsZ6HYNz3CJ8yiOVXMej1pHb_QO0KZNxkDyvno,113767
20
+ trilogy/core/optimization.py,sha256=1xAFn7aw8skqDFUQCel5xJc0hVUYHs-oW1DckN8Z4n4,4043
21
21
  trilogy/core/query_processor.py,sha256=clIRJ6IcsqIVBPKFsxt8bqCLsLyajvAu02MUIcKQhTo,15713
22
+ trilogy/core/optimizations/__init__.py,sha256=pxRzNzd2g8oRMy4f_ub5va6bNS2pd4hnyp9JBzTKc1E,300
23
+ trilogy/core/optimizations/base_optimization.py,sha256=tWWT-xnTbnEU-mNi_isMNbywm8B9WTRsNFwGpeh3rqE,468
24
+ trilogy/core/optimizations/inline_constant.py,sha256=neZOFjX7M2pzQ-8m-f8nApy_MfJuowX6SzcGwGFt5w4,927
25
+ trilogy/core/optimizations/inline_datasource.py,sha256=BSp54fwF4RRwInd-09pggemC7JuXj-uqGzi32ufeqYo,2171
26
+ trilogy/core/optimizations/predicate_pushdown.py,sha256=s5BMLADcQy4_UsMCchHTsInaJUtLGh1l2kTpqzAzcs4,3199
22
27
  trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
28
  trilogy/core/processing/concept_strategies_v3.py,sha256=MYrpNMidqvPOg123RekOcqVTjcj03i_538gBo0MzoWE,23432
24
29
  trilogy/core/processing/graph_utils.py,sha256=ulCJ4hYAISbUxLD6VM2fah9RBPGIXSEHEPeRBSFl0Rs,1197
@@ -45,7 +50,7 @@ trilogy/core/processing/nodes/select_node_v2.py,sha256=ERCflBFzKpD5SzweMevnJLyQn
45
50
  trilogy/core/processing/nodes/unnest_node.py,sha256=JFtm90IVM-46aCYkTNIaJah6v9ApAfonjVhcVM1HmDE,1903
46
51
  trilogy/core/processing/nodes/window_node.py,sha256=X7qxLUKd3tekjUUsmH_4vz5b-U89gMnGd04VBxuu2Ns,1280
47
52
  trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- trilogy/dialect/base.py,sha256=iy2jb43CvJ0xSozyNVLuJSCaZ4Uzb5o--2p2-ZKNzSM,30991
53
+ trilogy/dialect/base.py,sha256=ro9fKV6RrPggUVmJPlIEdySLsR3Pq5rm0UMQho6Bx_k,31518
49
54
  trilogy/dialect/bigquery.py,sha256=9vxQn2BMv_oTGQSWQpoN5ho_OgqMWaHH9e-5vQVf44c,2906
50
55
  trilogy/dialect/common.py,sha256=zWrYmvevlXznocw9uGHmY5Ws1rp_kICm9zA_ulTe4eg,2165
51
56
  trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
@@ -54,7 +59,7 @@ trilogy/dialect/enums.py,sha256=4NdpsydBpDn6jnh0JzFz5VvQEtnShErWtWHVyT6TNpw,3948
54
59
  trilogy/dialect/postgres.py,sha256=r47xbCA7nfEYENofiVfLZ-SnReNfDmUmW4OSHVkkP4E,3206
55
60
  trilogy/dialect/presto.py,sha256=8zjRn8AeYXZQGuUi-afyBWLet8o-LSt6gm5IH7bTdiw,2987
56
61
  trilogy/dialect/snowflake.py,sha256=N3HknYgN-fjD7BLX1Ucj-ss_ku2Ox8DgLsF3BIHutHo,2941
57
- trilogy/dialect/sql_server.py,sha256=UrLeA9bxiFJ4qpGsqVJqBybQCyJhetMebe8IzQW1q9s,2900
62
+ trilogy/dialect/sql_server.py,sha256=HX68vNTrcDaTnOxe6Zbx_PBgrO42e2VuThxO6CYQ2cY,3026
58
63
  trilogy/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
64
  trilogy/hooks/base_hook.py,sha256=Xkb-A2qCHozYjum0A36zOy5PwTVwrP3NLDF0U2GpgHo,1100
60
65
  trilogy/hooks/graph_hook.py,sha256=i-Tv9sxZU0sMc-God8bLLz-nAg4-wYafogZtHaU8LXw,801
@@ -65,14 +70,14 @@ trilogy/parsing/common.py,sha256=iR3fiiZ7w8VJuUGrQ0v06XGDXov81f4z1ZlFnj6y40E,580
65
70
  trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
66
71
  trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
67
72
  trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
68
- trilogy/parsing/parse_engine.py,sha256=9j0mUN6D48iH39YRPRTp5JsDaB9YLFaF-xmF62ALMSA,56256
69
- trilogy/parsing/render.py,sha256=fxjpq2FZLgllw_d4cru-t_IXNPAz2DmYkT7v9ED0XRI,11540
70
- trilogy/parsing/trilogy.lark,sha256=GpjGQaDmK7GOTIqK1n4lMbDXqpt_w3EkqlYhlFw3HEA,10679
73
+ trilogy/parsing/parse_engine.py,sha256=cT1IoLP0LzykNX0UfH9-eu8PzjfzSNCtZ_hZvf58TRg,56761
74
+ trilogy/parsing/render.py,sha256=cOKHvl2ZrpkD4lpng9iXc37gc3XyZ1e51woPxB7QqsI,11672
75
+ trilogy/parsing/trilogy.lark,sha256=xYpVKneNhXruxIyzFqcK1b8-NSqzzEwtPrTxmpXd_Vc,10786
71
76
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
77
  trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
73
- pytrilogy-0.0.1.115.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
74
- pytrilogy-0.0.1.115.dist-info/METADATA,sha256=6lmw6TXDHhgLTQoHRIqZxy3YK7oq0hQPMtV9Z9wzk0g,7882
75
- pytrilogy-0.0.1.115.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
76
- pytrilogy-0.0.1.115.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
77
- pytrilogy-0.0.1.115.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
78
- pytrilogy-0.0.1.115.dist-info/RECORD,,
78
+ pytrilogy-0.0.1.116.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
79
+ pytrilogy-0.0.1.116.dist-info/METADATA,sha256=kyUsh-1RPnsedKLexiwJCKTl9ZzZHl5Uu4LUQcV9hj0,7878
80
+ pytrilogy-0.0.1.116.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
81
+ pytrilogy-0.0.1.116.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
82
+ pytrilogy-0.0.1.116.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
83
+ pytrilogy-0.0.1.116.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.1.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
trilogy/__init__.py CHANGED
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.1.115"
7
+ __version__ = "0.0.1.116"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
trilogy/constants.py CHANGED
@@ -22,6 +22,7 @@ NULL_VALUE = MagicConstants.NULL
22
22
  class Optimizations:
23
23
  predicate_pushdown: bool = True
24
24
  datasource_inlining: bool = True
25
+ constant_inlining: bool = True
25
26
  direct_return: bool = True
26
27
 
27
28
 
trilogy/core/models.py CHANGED
@@ -1165,11 +1165,17 @@ class OrderBy(Namespaced, BaseModel):
1165
1165
  return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
1166
1166
 
1167
1167
 
1168
+ class RawSQLStatement(BaseModel):
1169
+ text: str
1170
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1171
+
1172
+
1168
1173
  class SelectStatement(Namespaced, BaseModel):
1169
1174
  selection: List[SelectItem]
1170
1175
  where_clause: Optional["WhereClause"] = None
1171
1176
  order_by: Optional[OrderBy] = None
1172
1177
  limit: Optional[int] = None
1178
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1173
1179
 
1174
1180
  def __str__(self):
1175
1181
  from trilogy.parsing.render import render_query
@@ -1371,6 +1377,7 @@ class MultiSelectStatement(Namespaced, BaseModel):
1371
1377
  where_clause: Optional["WhereClause"] = None
1372
1378
  order_by: Optional[OrderBy] = None
1373
1379
  limit: Optional[int] = None
1380
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1374
1381
 
1375
1382
  def __repr__(self):
1376
1383
  return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
@@ -2038,6 +2045,40 @@ class CTE(BaseModel):
2038
2045
  def validate_output_columns(cls, v):
2039
2046
  return unique(v, "address")
2040
2047
 
2048
+ def inline_constant(self, concept: Concept):
2049
+ if not concept.derivation == PurposeLineage.CONSTANT:
2050
+ return False
2051
+ if not isinstance(concept.lineage, Function):
2052
+ return False
2053
+ if not concept.lineage.operator == FunctionType.CONSTANT:
2054
+ return False
2055
+ # remove the constant
2056
+ removed: set = set()
2057
+ if concept.address in self.source_map:
2058
+ removed = removed.union(self.source_map[concept.address])
2059
+ del self.source_map[concept.address]
2060
+ # if we've entirely removed the need to join to someplace to get the concept
2061
+ # drop the join as well.
2062
+ for removed_cte in removed:
2063
+ still_required = any([removed_cte in x for x in self.source_map.values()])
2064
+ if not still_required:
2065
+ self.joins = [
2066
+ join
2067
+ for join in self.joins
2068
+ if not isinstance(join, Join)
2069
+ or (
2070
+ join.right_cte.name != removed_cte
2071
+ and join.left_cte.name != removed_cte
2072
+ )
2073
+ ]
2074
+ self.parent_ctes = [
2075
+ x for x in self.parent_ctes if x.name != removed_cte
2076
+ ]
2077
+ if removed_cte == self.base_name_override:
2078
+ candidates = [x.name for x in self.parent_ctes]
2079
+ self.base_name_override = candidates[0] if candidates else None
2080
+ self.base_alias_override = candidates[0] if candidates else None
2081
+
2041
2082
  def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
2042
2083
  qds_being_inlined = parent.source
2043
2084
  ds_being_inlined = qds_being_inlined.datasources[0]
@@ -2186,6 +2227,11 @@ class CTE(BaseModel):
2186
2227
  and not self.group_to_grain
2187
2228
  ):
2188
2229
  return False
2230
+ # if we don't need to source any concepts from anywhere
2231
+ # render without from
2232
+ # most likely to happen from inlining constants
2233
+ if not any([v for v in self.source_map.values()]):
2234
+ return False
2189
2235
  if (
2190
2236
  len(self.source.datasources) == 1
2191
2237
  and self.source.datasources[0].name == CONSTANT_DATASET
@@ -3184,6 +3230,10 @@ class ProcessedShowStatement(BaseModel):
3184
3230
  output_values: List[Union[Concept, Datasource, ProcessedQuery]]
3185
3231
 
3186
3232
 
3233
+ class ProcessedRawSQLStatement(BaseModel):
3234
+ text: str
3235
+
3236
+
3187
3237
  class Limit(BaseModel):
3188
3238
  count: int
3189
3239
 
@@ -3386,6 +3436,7 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3386
3436
  class PersistStatement(BaseModel):
3387
3437
  datasource: Datasource
3388
3438
  select: SelectStatement
3439
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
3389
3440
 
3390
3441
  @property
3391
3442
  def identifier(self):
@@ -2,155 +2,21 @@ from trilogy.core.models import (
2
2
  CTE,
3
3
  SelectStatement,
4
4
  PersistStatement,
5
- Datasource,
6
5
  MultiSelectStatement,
7
6
  Conditional,
8
7
  BooleanOperator,
9
8
  )
10
9
  from trilogy.core.enums import PurposeLineage
11
10
  from trilogy.constants import logger, CONFIG
12
- from abc import ABC
13
-
14
-
15
- class OptimizationRule(ABC):
16
-
17
- def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
18
- raise NotImplementedError
19
-
20
- def log(self, message: str):
21
- logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
22
-
23
- def debug(self, message: str):
24
- logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
25
-
26
-
27
- class InlineDatasource(OptimizationRule):
28
-
29
- def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
30
- if not cte.parent_ctes:
31
- return False
32
-
33
- optimized = False
34
- self.log(
35
- f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
36
- )
37
- to_inline: list[CTE] = []
38
- force_group = False
39
- for parent_cte in cte.parent_ctes:
40
- if not parent_cte.is_root_datasource:
41
- self.log(f"parent {parent_cte.name} is not root")
42
- continue
43
- if parent_cte.parent_ctes:
44
- self.log(f"parent {parent_cte.name} has parents")
45
- continue
46
- raw_root = parent_cte.source.datasources[0]
47
- if not isinstance(raw_root, Datasource):
48
- self.log(f"parent {parent_cte.name} is not datasource")
49
- continue
50
- root: Datasource = raw_root
51
- if not root.can_be_inlined:
52
- self.log(f"parent {parent_cte.name} datasource is not inlineable")
53
- continue
54
- root_outputs = {x.address for x in root.output_concepts}
55
- cte_outputs = {x.address for x in parent_cte.output_columns}
56
- grain_components = {x.address for x in root.grain.components}
57
- if not cte_outputs.issubset(root_outputs):
58
- self.log(f"Not all {parent_cte.name} outputs are found on datasource")
59
- continue
60
- if not grain_components.issubset(cte_outputs):
61
- self.log("Not all datasource components in cte outputs, forcing group")
62
- force_group = True
63
- to_inline.append(parent_cte)
64
-
65
- for replaceable in to_inline:
66
-
67
- result = cte.inline_parent_datasource(replaceable, force_group=force_group)
68
- if result:
69
- self.log(f"Inlined parent {replaceable.name}")
70
- else:
71
- self.log(f"Failed to inline {replaceable.name}")
72
- return optimized
73
-
74
-
75
- def decompose_condition(conditional: Conditional):
76
- chunks = []
77
- if conditional.operator == BooleanOperator.AND:
78
- for val in [conditional.left, conditional.right]:
79
- if isinstance(val, Conditional):
80
- chunks.extend(decompose_condition(val))
81
- else:
82
- chunks.append(val)
83
- else:
84
- chunks.append(conditional)
85
- return chunks
86
-
87
-
88
- def is_child_of(a, comparison):
89
- if isinstance(comparison, Conditional):
90
- return (
91
- is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
92
- ) and comparison.operator == BooleanOperator.AND
93
- return comparison == a
94
-
95
-
96
- class PredicatePushdown(OptimizationRule):
97
-
98
- def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
99
-
100
- if not cte.parent_ctes:
101
- self.debug(f"No parent CTEs for {cte.name}")
102
-
103
- return False
104
-
105
- optimized = False
106
- if not cte.condition:
107
- self.debug(f"No CTE condition for {cte.name}")
108
- return False
109
- self.log(
110
- f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
111
- )
112
- if isinstance(cte.condition, Conditional):
113
- candidates = cte.condition.decompose()
114
- else:
115
- candidates = [cte.condition]
116
- logger.info(f"Have {len(candidates)} candidates to try to push down")
117
- for candidate in candidates:
118
- conditions = {x.address for x in candidate.concept_arguments}
119
- for parent_cte in cte.parent_ctes:
120
- materialized = {k for k, v in parent_cte.source_map.items() if v != []}
121
- if conditions.issubset(materialized):
122
- if all(
123
- [
124
- is_child_of(candidate, child.condition)
125
- for child in inverse_map[parent_cte.name]
126
- ]
127
- ):
128
- self.log(
129
- f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
130
- )
131
- if parent_cte.condition:
132
- parent_cte.condition = Conditional(
133
- left=parent_cte.condition,
134
- operator=BooleanOperator.AND,
135
- right=candidate,
136
- )
137
- else:
138
- parent_cte.condition = candidate
139
- optimized = True
140
- else:
141
- logger.info("conditions not subset of parent materialized")
11
+ from trilogy.core.optimizations import (
12
+ OptimizationRule,
13
+ InlineConstant,
14
+ PredicatePushdown,
15
+ InlineDatasource,
16
+ )
142
17
 
143
- if all(
144
- [
145
- is_child_of(cte.condition, parent_cte.condition)
146
- for parent_cte in cte.parent_ctes
147
- ]
148
- ):
149
- self.log("All parents have same filter, removing filter")
150
- cte.condition = None
151
- optimized = True
152
18
 
153
- return optimized
19
+ MAX_OPTIMIZATION_LOOPS = 100
154
20
 
155
21
 
156
22
  def filter_irrelevant_ctes(
@@ -232,14 +98,17 @@ def optimize_ctes(
232
98
  REGISTERED_RULES.append(InlineDatasource())
233
99
  if CONFIG.optimizations.predicate_pushdown:
234
100
  REGISTERED_RULES.append(PredicatePushdown())
235
-
236
- while not complete:
101
+ if CONFIG.optimizations.constant_inlining:
102
+ REGISTERED_RULES.append(InlineConstant())
103
+ loops = 0
104
+ while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
237
105
  actions_taken = False
238
106
  for rule in REGISTERED_RULES:
239
107
  for cte in input:
240
108
  inverse_map = gen_inverse_map(input)
241
109
  actions_taken = rule.optimize(cte, inverse_map)
242
110
  complete = not actions_taken
111
+ loops += 1
243
112
 
244
113
  if CONFIG.optimizations.direct_return and is_direct_return_eligible(
245
114
  root_cte, select
@@ -0,0 +1,11 @@
1
+ from .inline_constant import InlineConstant
2
+ from .inline_datasource import InlineDatasource
3
+ from .predicate_pushdown import PredicatePushdown
4
+ from .base_optimization import OptimizationRule
5
+
6
+ __all__ = [
7
+ "OptimizationRule",
8
+ "InlineConstant",
9
+ "InlineDatasource",
10
+ "PredicatePushdown",
11
+ ]
@@ -0,0 +1,17 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ )
4
+ from trilogy.constants import logger
5
+ from abc import ABC
6
+
7
+
8
+ class OptimizationRule(ABC):
9
+
10
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
11
+ raise NotImplementedError
12
+
13
+ def log(self, message: str):
14
+ logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
15
+
16
+ def debug(self, message: str):
17
+ logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
@@ -0,0 +1,29 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ Concept,
4
+ )
5
+ from trilogy.core.enums import PurposeLineage
6
+
7
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
8
+
9
+
10
+ class InlineConstant(OptimizationRule):
11
+
12
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
13
+
14
+ to_inline: list[Concept] = []
15
+ for x in cte.source.input_concepts:
16
+ if x.address not in cte.source_map:
17
+ continue
18
+ if x.derivation == PurposeLineage.CONSTANT:
19
+ self.log(f"Found constant {x.address} on {cte.name}")
20
+ to_inline.append(x)
21
+ if to_inline:
22
+ inlined = False
23
+ for c in to_inline:
24
+ self.log(f"Inlining constant {c.address} on {cte.name}")
25
+ test = cte.inline_constant(c)
26
+ if test:
27
+ inlined = True
28
+ return inlined
29
+ return False
@@ -0,0 +1,54 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ Datasource,
4
+ )
5
+
6
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
7
+
8
+
9
+ class InlineDatasource(OptimizationRule):
10
+
11
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
12
+ if not cte.parent_ctes:
13
+ return False
14
+
15
+ optimized = False
16
+ self.log(
17
+ f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
18
+ )
19
+ to_inline: list[CTE] = []
20
+ force_group = False
21
+ for parent_cte in cte.parent_ctes:
22
+ if not parent_cte.is_root_datasource:
23
+ self.log(f"parent {parent_cte.name} is not root")
24
+ continue
25
+ if parent_cte.parent_ctes:
26
+ self.log(f"parent {parent_cte.name} has parents")
27
+ continue
28
+ raw_root = parent_cte.source.datasources[0]
29
+ if not isinstance(raw_root, Datasource):
30
+ self.log(f"parent {parent_cte.name} is not datasource")
31
+ continue
32
+ root: Datasource = raw_root
33
+ if not root.can_be_inlined:
34
+ self.log(f"parent {parent_cte.name} datasource is not inlineable")
35
+ continue
36
+ root_outputs = {x.address for x in root.output_concepts}
37
+ cte_outputs = {x.address for x in parent_cte.output_columns}
38
+ grain_components = {x.address for x in root.grain.components}
39
+ if not cte_outputs.issubset(root_outputs):
40
+ self.log(f"Not all {parent_cte.name} outputs are found on datasource")
41
+ continue
42
+ if not grain_components.issubset(cte_outputs):
43
+ self.log("Not all datasource components in cte outputs, forcing group")
44
+ force_group = True
45
+ to_inline.append(parent_cte)
46
+
47
+ for replaceable in to_inline:
48
+
49
+ result = cte.inline_parent_datasource(replaceable, force_group=force_group)
50
+ if result:
51
+ self.log(f"Inlined parent {replaceable.name}")
52
+ else:
53
+ self.log(f"Failed to inline {replaceable.name}")
54
+ return optimized
@@ -0,0 +1,88 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ Conditional,
4
+ BooleanOperator,
5
+ )
6
+ from trilogy.constants import logger
7
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
8
+
9
+
10
+ def decompose_condition(conditional: Conditional):
11
+ chunks = []
12
+ if conditional.operator == BooleanOperator.AND:
13
+ for val in [conditional.left, conditional.right]:
14
+ if isinstance(val, Conditional):
15
+ chunks.extend(decompose_condition(val))
16
+ else:
17
+ chunks.append(val)
18
+ else:
19
+ chunks.append(conditional)
20
+ return chunks
21
+
22
+
23
+ def is_child_of(a, comparison):
24
+ if isinstance(comparison, Conditional):
25
+ return (
26
+ is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
27
+ ) and comparison.operator == BooleanOperator.AND
28
+ return comparison == a
29
+
30
+
31
+ class PredicatePushdown(OptimizationRule):
32
+
33
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
34
+
35
+ if not cte.parent_ctes:
36
+ self.debug(f"No parent CTEs for {cte.name}")
37
+
38
+ return False
39
+
40
+ optimized = False
41
+ if not cte.condition:
42
+ self.debug(f"No CTE condition for {cte.name}")
43
+ return False
44
+ self.log(
45
+ f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
46
+ )
47
+ if isinstance(cte.condition, Conditional):
48
+ candidates = cte.condition.decompose()
49
+ else:
50
+ candidates = [cte.condition]
51
+ logger.info(f"Have {len(candidates)} candidates to try to push down")
52
+ for candidate in candidates:
53
+ conditions = {x.address for x in candidate.concept_arguments}
54
+ for parent_cte in cte.parent_ctes:
55
+ materialized = {k for k, v in parent_cte.source_map.items() if v != []}
56
+ if conditions.issubset(materialized):
57
+ if all(
58
+ [
59
+ is_child_of(candidate, child.condition)
60
+ for child in inverse_map[parent_cte.name]
61
+ ]
62
+ ):
63
+ self.log(
64
+ f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
65
+ )
66
+ if parent_cte.condition:
67
+ parent_cte.condition = Conditional(
68
+ left=parent_cte.condition,
69
+ operator=BooleanOperator.AND,
70
+ right=candidate,
71
+ )
72
+ else:
73
+ parent_cte.condition = candidate
74
+ optimized = True
75
+ else:
76
+ logger.info("conditions not subset of parent materialized")
77
+
78
+ if all(
79
+ [
80
+ is_child_of(cte.condition, parent_cte.condition)
81
+ for parent_cte in cte.parent_ctes
82
+ ]
83
+ ):
84
+ self.log("All parents have same filter, removing filter")
85
+ cte.condition = None
86
+ optimized = True
87
+
88
+ return optimized
trilogy/dialect/base.py CHANGED
@@ -44,6 +44,8 @@ from trilogy.core.models import (
44
44
  RowsetDerivationStatement,
45
45
  ConceptDeclarationStatement,
46
46
  ImportStatement,
47
+ RawSQLStatement,
48
+ ProcessedRawSQLStatement,
47
49
  )
48
50
  from trilogy.core.query_processor import process_query, process_persist
49
51
  from trilogy.dialect.common import render_join
@@ -558,11 +560,20 @@ class BaseDialect:
558
560
  | RowsetDerivationStatement
559
561
  | MergeStatement
560
562
  | ImportStatement
563
+ | RawSQLStatement
561
564
  ],
562
565
  hooks: Optional[List[BaseHook]] = None,
563
- ) -> List[ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement]:
566
+ ) -> List[
567
+ ProcessedQuery
568
+ | ProcessedQueryPersist
569
+ | ProcessedShowStatement
570
+ | ProcessedRawSQLStatement
571
+ ]:
564
572
  output: List[
565
- ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
573
+ ProcessedQuery
574
+ | ProcessedQueryPersist
575
+ | ProcessedShowStatement
576
+ | ProcessedRawSQLStatement
566
577
  ] = []
567
578
  for statement in statements:
568
579
  if isinstance(statement, PersistStatement):
@@ -604,6 +615,8 @@ class BaseDialect:
604
615
  )
605
616
  else:
606
617
  raise NotImplementedError(type(statement))
618
+ elif isinstance(statement, RawSQLStatement):
619
+ output.append(ProcessedRawSQLStatement(text=statement.text))
607
620
  elif isinstance(
608
621
  statement,
609
622
  (
@@ -619,10 +632,18 @@ class BaseDialect:
619
632
  return output
620
633
 
621
634
  def compile_statement(
622
- self, query: ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
635
+ self,
636
+ query: (
637
+ ProcessedQuery
638
+ | ProcessedQueryPersist
639
+ | ProcessedShowStatement
640
+ | ProcessedRawSQLStatement
641
+ ),
623
642
  ) -> str:
624
643
  if isinstance(query, ProcessedShowStatement):
625
644
  return ";\n".join([str(x) for x in query.output_values])
645
+ elif isinstance(query, ProcessedRawSQLStatement):
646
+ return query.text
626
647
  select_columns: Dict[str, str] = {}
627
648
  cte_output_map = {}
628
649
  selected = set()
@@ -9,6 +9,7 @@ from trilogy.core.models import (
9
9
  ProcessedQuery,
10
10
  ProcessedQueryPersist,
11
11
  ProcessedShowStatement,
12
+ ProcessedRawSQLStatement,
12
13
  )
13
14
  from trilogy.dialect.base import BaseDialect
14
15
 
@@ -81,7 +82,13 @@ class SqlServerDialect(BaseDialect):
81
82
  SQL_TEMPLATE = TSQL_TEMPLATE
82
83
 
83
84
  def compile_statement(
84
- self, query: ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
85
+ self,
86
+ query: (
87
+ ProcessedQuery
88
+ | ProcessedQueryPersist
89
+ | ProcessedShowStatement
90
+ | ProcessedRawSQLStatement
91
+ ),
85
92
  ) -> str:
86
93
  base = super().compile_statement(query)
87
94
  if isinstance(base, (ProcessedQuery, ProcessedQueryPersist)):
trilogy/executor.py CHANGED
@@ -9,6 +9,8 @@ from trilogy.core.models import (
9
9
  ProcessedQuery,
10
10
  ProcessedShowStatement,
11
11
  ProcessedQueryPersist,
12
+ ProcessedRawSQLStatement,
13
+ RawSQLStatement,
12
14
  MultiSelectStatement,
13
15
  SelectStatement,
14
16
  PersistStatement,
@@ -112,6 +114,10 @@ class Executor(object):
112
114
  )
113
115
  return self.execute_query(sql[0])
114
116
 
117
+ @execute_query.register
118
+ def _(self, query: RawSQLStatement) -> CursorResult:
119
+ return self.execute_raw_sql(query.text)
120
+
115
121
  @execute_query.register
116
122
  def _(self, query: ProcessedShowStatement) -> CursorResult:
117
123
  return generate_result_set(
@@ -123,6 +129,10 @@ class Executor(object):
123
129
  ],
124
130
  )
125
131
 
132
+ @execute_query.register
133
+ def _(self, query: ProcessedRawSQLStatement) -> CursorResult:
134
+ return self.execute_raw_sql(query.text)
135
+
126
136
  @execute_query.register
127
137
  def _(self, query: ProcessedQuery) -> CursorResult:
128
138
  sql = self.generator.compile_statement(query)
@@ -195,7 +205,12 @@ class Executor(object):
195
205
 
196
206
  def parse_text(
197
207
  self, command: str, persist: bool = False
198
- ) -> List[ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement]:
208
+ ) -> List[
209
+ ProcessedQuery
210
+ | ProcessedQueryPersist
211
+ | ProcessedShowStatement
212
+ | ProcessedRawSQLStatement
213
+ ]:
199
214
  """Process a preql text command"""
200
215
  _, parsed = parse_text(command, self.environment)
201
216
  generatable = [
@@ -208,6 +223,7 @@ class Executor(object):
208
223
  PersistStatement,
209
224
  MultiSelectStatement,
210
225
  ShowStatement,
226
+ RawSQLStatement,
211
227
  ),
212
228
  )
213
229
  ]
@@ -222,10 +238,13 @@ class Executor(object):
222
238
  sql.append(x)
223
239
  return sql
224
240
 
225
- def parse_text_generator(
226
- self, command: str, persist: bool = False
227
- ) -> Generator[
228
- ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement, None, None
241
+ def parse_text_generator(self, command: str, persist: bool = False) -> Generator[
242
+ ProcessedQuery
243
+ | ProcessedQueryPersist
244
+ | ProcessedShowStatement
245
+ | ProcessedRawSQLStatement,
246
+ None,
247
+ None,
229
248
  ]:
230
249
  """Process a preql text command"""
231
250
  _, parsed = parse_text(command, self.environment)
@@ -239,6 +258,7 @@ class Executor(object):
239
258
  PersistStatement,
240
259
  MultiSelectStatement,
241
260
  ShowStatement,
261
+ RawSQLStatement,
242
262
  ),
243
263
  )
244
264
  ]
@@ -82,6 +82,7 @@ from trilogy.core.models import (
82
82
  Parenthetical,
83
83
  PersistStatement,
84
84
  Query,
85
+ RawSQLStatement,
85
86
  SelectStatement,
86
87
  SelectItem,
87
88
  WhereClause,
@@ -585,8 +586,11 @@ class ParseToObjects(Transformer):
585
586
  # namespace=self.environment.namespace,
586
587
  return Grain(components=[self.environment.concepts[a] for a in args[0]])
587
588
 
589
+ def MULTILINE_STRING(self, args) -> str:
590
+ return args[3:-3]
591
+
588
592
  def raw_column_assignment(self, args):
589
- return RawColumnExpr(text=args[0][3:-3])
593
+ return RawColumnExpr(text=args[0])
590
594
 
591
595
  @v_args(meta=True)
592
596
  def datasource(self, meta: Meta, args):
@@ -756,6 +760,10 @@ class ParseToObjects(Transformer):
756
760
  self.environment.add_concept(new, meta=meta)
757
761
  return merge
758
762
 
763
+ @v_args(meta=True)
764
+ def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
765
+ return RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
766
+
759
767
  def import_statement(self, args: list[str]) -> ImportStatement:
760
768
  alias = args[-1]
761
769
  path = args[0].split(".")
@@ -822,7 +830,11 @@ class ParseToObjects(Transformer):
822
830
  address=Address(location=address),
823
831
  grain=grain,
824
832
  )
825
- return PersistStatement(select=select, datasource=new_datasource)
833
+ return PersistStatement(
834
+ select=select,
835
+ datasource=new_datasource,
836
+ meta=Metadata(line_number=meta.line),
837
+ )
826
838
 
827
839
  @v_args(meta=True)
828
840
  def align_item(self, meta: Meta, args) -> AlignItem:
@@ -864,6 +876,7 @@ class ParseToObjects(Transformer):
864
876
  where_clause=where,
865
877
  order_by=order_by,
866
878
  limit=limit,
879
+ meta=Metadata(line_number=meta.line),
867
880
  )
868
881
  for concept in multi.derived_concepts:
869
882
  self.environment.add_concept(concept, meta=meta)
@@ -887,7 +900,11 @@ class ParseToObjects(Transformer):
887
900
  if not select_items:
888
901
  raise ValueError("Malformed select, missing select items")
889
902
  output = SelectStatement(
890
- selection=select_items, where_clause=where, limit=limit, order_by=order_by
903
+ selection=select_items,
904
+ where_clause=where,
905
+ limit=limit,
906
+ order_by=order_by,
907
+ meta=Metadata(line_number=meta.line),
891
908
  )
892
909
  for item in select_items:
893
910
  # we don't know the grain of an aggregate at assignment time
@@ -912,7 +929,7 @@ class ParseToObjects(Transformer):
912
929
 
913
930
  @v_args(meta=True)
914
931
  def query(self, meta: Meta, args):
915
- return Query(text=args[0][3:-3])
932
+ return Query(text=args[0])
916
933
 
917
934
  def where(self, args):
918
935
  root = args[0]
@@ -1692,6 +1709,7 @@ def parse_text(text: str, environment: Optional[Environment] = None) -> Tuple[
1692
1709
  | SelectStatement
1693
1710
  | PersistStatement
1694
1711
  | ShowStatement
1712
+ | RawSQLStatement
1695
1713
  | None
1696
1714
  ],
1697
1715
  ]:
trilogy/parsing/render.py CHANGED
@@ -38,6 +38,7 @@ from trilogy.core.models import (
38
38
  OrderBy,
39
39
  AlignClause,
40
40
  AlignItem,
41
+ RawSQLStatement,
41
42
  )
42
43
  from trilogy.core.enums import Modifier
43
44
 
@@ -183,6 +184,10 @@ class Renderer:
183
184
  def _(self, arg: "Address"):
184
185
  return f"address {arg.location}"
185
186
 
187
+ @to_string.register
188
+ def _(self, arg: "RawSQLStatement"):
189
+ return f"raw_sql('''{arg.text}''');"
190
+
186
191
  @to_string.register
187
192
  def _(self, arg: "MagicConstants"):
188
193
  if arg == MagicConstants.NULL:
@@ -9,6 +9,7 @@
9
9
  | rowset_derivation_statement
10
10
  | import_statement
11
11
  | merge_statement
12
+ | rawsql_statement
12
13
 
13
14
  _TERMINATOR: ";"i /\s*/
14
15
 
@@ -71,6 +72,9 @@
71
72
  // merge statemment
72
73
  merge_statement: "merge" IDENTIFIER ("," IDENTIFIER)* ","?
73
74
 
75
+ // raw sql statement
76
+ rawsql_statement: "raw_sql"i "(" MULTILINE_STRING ")"
77
+
74
78
  // FUNCTION blocks
75
79
  function: raw_function
76
80
  function_binding_item: IDENTIFIER ":" data_type