pytrilogy 0.0.1.115__py3-none-any.whl → 0.0.1.117__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.1.115
3
+ Version: 0.0.1.117
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -15,17 +15,17 @@ Description-Content-Type: text/markdown
15
15
  License-File: LICENSE.md
16
16
  Requires-Dist: lark
17
17
  Requires-Dist: jinja2
18
- Requires-Dist: sqlalchemy <2.0.0
18
+ Requires-Dist: sqlalchemy<2.0.0
19
19
  Requires-Dist: networkx
20
20
  Requires-Dist: pyodbc
21
21
  Requires-Dist: pydantic
22
22
  Requires-Dist: duckdb-engine
23
23
  Provides-Extra: bigquery
24
- Requires-Dist: sqlalchemy-bigquery ; extra == 'bigquery'
24
+ Requires-Dist: sqlalchemy-bigquery; extra == "bigquery"
25
25
  Provides-Extra: postgres
26
- Requires-Dist: psycopg2-binary ; extra == 'postgres'
26
+ Requires-Dist: psycopg2-binary; extra == "postgres"
27
27
  Provides-Extra: snowflake
28
- Requires-Dist: snowflake-sqlalchemy ; extra == 'snowflake'
28
+ Requires-Dist: snowflake-sqlalchemy; extra == "snowflake"
29
29
 
30
30
  ## Trilogy
31
31
  [![Website](https://img.shields.io/badge/INTRO-WEB-orange?)](https://trilogydata.dev/)
@@ -1,8 +1,8 @@
1
- trilogy/__init__.py,sha256=FLkaJwdfgAlanF07q1NrFtMnJiCyOuGWPWbUpfXnttg,292
1
+ trilogy/__init__.py,sha256=1wHuIjygzuHzrQfHKZQ2GJsmw4tMvEXzwEJMg9Lb3Zc,292
2
2
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- trilogy/constants.py,sha256=DJi3ESttmvqgy6fPRXiaQzqJVye6jYwf6XM89NHv0_M,735
3
+ trilogy/constants.py,sha256=u2dNxhwy0v-6HrvG1GcpDVvuhzdTH5fuyYNCxDPlr2E,770
4
4
  trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
5
- trilogy/executor.py,sha256=_ZbjrKsUdWL52tWgpxqZnmccAuPXcIPEPN_dDSLNeAQ,9696
5
+ trilogy/executor.py,sha256=auuDykCHeqlRWIHOfBfgIIIntEctWaUC-VPJr1DQbYk,10217
6
6
  trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
7
7
  trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
@@ -13,12 +13,17 @@ trilogy/core/env_processor.py,sha256=SU-jpaGfoWLe9sGTeQYG1qjVnwGQ7TwctmnJRlfzluc
13
13
  trilogy/core/environment_helpers.py,sha256=mzBDHhdF9ssZ_-LY8CcaM_ddfJavkpRYrFImUd3cjXI,5972
14
14
  trilogy/core/ergonomics.py,sha256=w3gwXdgrxNHCuaRdyKg73t6F36tj-wIjQf47WZkHmJk,1465
15
15
  trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,561
16
- trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
16
+ trilogy/core/functions.py,sha256=hXp-b29w3vNHQHpTU-VPXJqJaLferNwa681xQ3pf8R0,9129
17
17
  trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
18
18
  trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
19
- trilogy/core/models.py,sha256=2qkebdlVsHeGp1foupR6LJ5q9YewlZcZq6utxdLBqIw,111565
20
- trilogy/core/optimization.py,sha256=942MnGRzscAHcG9LsfMslIRRQBslbIiPHnAvJ3w8YRg,9157
19
+ trilogy/core/models.py,sha256=U8gYAvRoob4uj3f-j3N5EwkbBs_tKmcA48IGwmhqrbM,114203
20
+ trilogy/core/optimization.py,sha256=oM3Ry7UpbpTSm2xNkmWx70OHd2V2vWRjM72sZpsZfb8,4116
21
21
  trilogy/core/query_processor.py,sha256=clIRJ6IcsqIVBPKFsxt8bqCLsLyajvAu02MUIcKQhTo,15713
22
+ trilogy/core/optimizations/__init__.py,sha256=pxRzNzd2g8oRMy4f_ub5va6bNS2pd4hnyp9JBzTKc1E,300
23
+ trilogy/core/optimizations/base_optimization.py,sha256=tWWT-xnTbnEU-mNi_isMNbywm8B9WTRsNFwGpeh3rqE,468
24
+ trilogy/core/optimizations/inline_constant.py,sha256=neZOFjX7M2pzQ-8m-f8nApy_MfJuowX6SzcGwGFt5w4,927
25
+ trilogy/core/optimizations/inline_datasource.py,sha256=BSp54fwF4RRwInd-09pggemC7JuXj-uqGzi32ufeqYo,2171
26
+ trilogy/core/optimizations/predicate_pushdown.py,sha256=sIojWvoYp6k_ANCyVqxCpEyLY_GLmzsG-Sghj0cbk3k,4135
22
27
  trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
28
  trilogy/core/processing/concept_strategies_v3.py,sha256=MYrpNMidqvPOg123RekOcqVTjcj03i_538gBo0MzoWE,23432
24
29
  trilogy/core/processing/graph_utils.py,sha256=ulCJ4hYAISbUxLD6VM2fah9RBPGIXSEHEPeRBSFl0Rs,1197
@@ -45,16 +50,16 @@ trilogy/core/processing/nodes/select_node_v2.py,sha256=ERCflBFzKpD5SzweMevnJLyQn
45
50
  trilogy/core/processing/nodes/unnest_node.py,sha256=JFtm90IVM-46aCYkTNIaJah6v9ApAfonjVhcVM1HmDE,1903
46
51
  trilogy/core/processing/nodes/window_node.py,sha256=X7qxLUKd3tekjUUsmH_4vz5b-U89gMnGd04VBxuu2Ns,1280
47
52
  trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- trilogy/dialect/base.py,sha256=iy2jb43CvJ0xSozyNVLuJSCaZ4Uzb5o--2p2-ZKNzSM,30991
53
+ trilogy/dialect/base.py,sha256=ycsxbUL68DsodOsHEgEoNLKMn5vgRN3sDIRfiH9fQDs,31719
49
54
  trilogy/dialect/bigquery.py,sha256=9vxQn2BMv_oTGQSWQpoN5ho_OgqMWaHH9e-5vQVf44c,2906
50
55
  trilogy/dialect/common.py,sha256=zWrYmvevlXznocw9uGHmY5Ws1rp_kICm9zA_ulTe4eg,2165
51
56
  trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
52
57
  trilogy/dialect/duckdb.py,sha256=Ddyt68sr8IL2HnZMenyytoD65FXwY_O2pz1McyS0bis,3075
53
58
  trilogy/dialect/enums.py,sha256=4NdpsydBpDn6jnh0JzFz5VvQEtnShErWtWHVyT6TNpw,3948
54
59
  trilogy/dialect/postgres.py,sha256=r47xbCA7nfEYENofiVfLZ-SnReNfDmUmW4OSHVkkP4E,3206
55
- trilogy/dialect/presto.py,sha256=8zjRn8AeYXZQGuUi-afyBWLet8o-LSt6gm5IH7bTdiw,2987
60
+ trilogy/dialect/presto.py,sha256=UxBodRiV3szpFcQlcjoJaGXEwAhZJf_OT7dHczYvO80,3092
56
61
  trilogy/dialect/snowflake.py,sha256=N3HknYgN-fjD7BLX1Ucj-ss_ku2Ox8DgLsF3BIHutHo,2941
57
- trilogy/dialect/sql_server.py,sha256=UrLeA9bxiFJ4qpGsqVJqBybQCyJhetMebe8IzQW1q9s,2900
62
+ trilogy/dialect/sql_server.py,sha256=HX68vNTrcDaTnOxe6Zbx_PBgrO42e2VuThxO6CYQ2cY,3026
58
63
  trilogy/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
64
  trilogy/hooks/base_hook.py,sha256=Xkb-A2qCHozYjum0A36zOy5PwTVwrP3NLDF0U2GpgHo,1100
60
65
  trilogy/hooks/graph_hook.py,sha256=i-Tv9sxZU0sMc-God8bLLz-nAg4-wYafogZtHaU8LXw,801
@@ -65,14 +70,14 @@ trilogy/parsing/common.py,sha256=iR3fiiZ7w8VJuUGrQ0v06XGDXov81f4z1ZlFnj6y40E,580
65
70
  trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
66
71
  trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
67
72
  trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
68
- trilogy/parsing/parse_engine.py,sha256=9j0mUN6D48iH39YRPRTp5JsDaB9YLFaF-xmF62ALMSA,56256
69
- trilogy/parsing/render.py,sha256=fxjpq2FZLgllw_d4cru-t_IXNPAz2DmYkT7v9ED0XRI,11540
70
- trilogy/parsing/trilogy.lark,sha256=GpjGQaDmK7GOTIqK1n4lMbDXqpt_w3EkqlYhlFw3HEA,10679
73
+ trilogy/parsing/parse_engine.py,sha256=F1ok96qT6EhKRKV1Q_YzfHxMFtNV8qAXopK8NaePgU4,57080
74
+ trilogy/parsing/render.py,sha256=TnLf5fg4wimpd9EvhLU-FMDwpyW9pesoedBZ0RrmWD4,11810
75
+ trilogy/parsing/trilogy.lark,sha256=1AZbQGpNmpm4KamAXA5IWcuOr2B8Gb8kUJcAOmKf_zY,10862
71
76
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
77
  trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
73
- pytrilogy-0.0.1.115.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
74
- pytrilogy-0.0.1.115.dist-info/METADATA,sha256=6lmw6TXDHhgLTQoHRIqZxy3YK7oq0hQPMtV9Z9wzk0g,7882
75
- pytrilogy-0.0.1.115.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
76
- pytrilogy-0.0.1.115.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
77
- pytrilogy-0.0.1.115.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
78
- pytrilogy-0.0.1.115.dist-info/RECORD,,
78
+ pytrilogy-0.0.1.117.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
79
+ pytrilogy-0.0.1.117.dist-info/METADATA,sha256=jrHvRWl_dtmpVu_aw08SS-whIymZ6l051tcpYmPQPD0,7878
80
+ pytrilogy-0.0.1.117.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
81
+ pytrilogy-0.0.1.117.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
82
+ pytrilogy-0.0.1.117.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
83
+ pytrilogy-0.0.1.117.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.1.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
trilogy/__init__.py CHANGED
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.1.115"
7
+ __version__ = "0.0.1.117"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
trilogy/constants.py CHANGED
@@ -22,6 +22,7 @@ NULL_VALUE = MagicConstants.NULL
22
22
  class Optimizations:
23
23
  predicate_pushdown: bool = True
24
24
  datasource_inlining: bool = True
25
+ constant_inlining: bool = True
25
26
  direct_return: bool = True
26
27
 
27
28
 
trilogy/core/functions.py CHANGED
@@ -9,6 +9,7 @@ from trilogy.core.models import (
9
9
  ListType,
10
10
  StructType,
11
11
  MapType,
12
+ NumericType,
12
13
  )
13
14
  from trilogy.core.enums import FunctionType, Purpose, Granularity, DatePart
14
15
  from trilogy.core.exceptions import InvalidSyntaxException
@@ -21,7 +22,9 @@ def create_function_derived_concept(
21
22
  namespace: str,
22
23
  operator: FunctionType,
23
24
  arguments: list[Concept],
24
- output_type: Optional[DataType | ListType | StructType | MapType] = None,
25
+ output_type: Optional[
26
+ DataType | ListType | StructType | MapType | NumericType
27
+ ] = None,
25
28
  output_purpose: Optional[Purpose] = None,
26
29
  ) -> Concept:
27
30
  purpose = (
@@ -56,9 +59,7 @@ def argument_to_purpose(arg) -> Purpose:
56
59
  return Purpose.PROPERTY
57
60
  elif isinstance(arg, Concept):
58
61
  return arg.purpose
59
- elif isinstance(arg, (int, float, str, bool, list)):
60
- return Purpose.CONSTANT
61
- elif isinstance(arg, DataType):
62
+ elif isinstance(arg, (int, float, str, bool, list, NumericType, DataType)):
62
63
  return Purpose.CONSTANT
63
64
  elif isinstance(arg, DatePart):
64
65
  return Purpose.CONSTANT
trilogy/core/models.py CHANGED
@@ -105,7 +105,9 @@ def get_concept_arguments(expr) -> List["Concept"]:
105
105
  return output
106
106
 
107
107
 
108
- ALL_TYPES = Union["DataType", "MapType", "ListType", "StructType", "Concept"]
108
+ ALL_TYPES = Union[
109
+ "DataType", "MapType", "ListType", "NumericType", "StructType", "Concept"
110
+ ]
109
111
 
110
112
  NAMESPACED_TYPES = Union[
111
113
  "WindowItem",
@@ -176,6 +178,19 @@ class DataType(Enum):
176
178
  return self
177
179
 
178
180
 
181
+ class NumericType(BaseModel):
182
+ precision: int = 20
183
+ scale: int = 5
184
+
185
+ @property
186
+ def data_type(self):
187
+ return DataType.NUMERIC
188
+
189
+ @property
190
+ def value(self):
191
+ return self.data_type.value
192
+
193
+
179
194
  class ListType(BaseModel):
180
195
  model_config = ConfigDict(frozen=True)
181
196
  type: ALL_TYPES
@@ -192,7 +207,9 @@ class ListType(BaseModel):
192
207
  return self.data_type.value
193
208
 
194
209
  @property
195
- def value_data_type(self) -> DataType | StructType | MapType | ListType:
210
+ def value_data_type(
211
+ self,
212
+ ) -> DataType | StructType | MapType | ListType | NumericType:
196
213
  if isinstance(self.type, Concept):
197
214
  return self.type.datatype
198
215
  return self.type
@@ -270,7 +287,7 @@ def empty_grain() -> Grain:
270
287
 
271
288
  class Concept(Namespaced, SelectGrain, BaseModel):
272
289
  name: str
273
- datatype: DataType | ListType | StructType | MapType
290
+ datatype: DataType | ListType | StructType | MapType | NumericType
274
291
  purpose: Purpose
275
292
  metadata: Optional[Metadata] = Field(
276
293
  default_factory=lambda: Metadata(description=None, line_number=None),
@@ -790,12 +807,12 @@ class LooseConceptList(BaseModel):
790
807
  class Function(Namespaced, SelectGrain, BaseModel):
791
808
  operator: FunctionType
792
809
  arg_count: int = Field(default=1)
793
- output_datatype: DataType | ListType | StructType | MapType
810
+ output_datatype: DataType | ListType | StructType | MapType | NumericType
794
811
  output_purpose: Purpose
795
812
  valid_inputs: Optional[
796
813
  Union[
797
- Set[DataType | ListType | StructType],
798
- List[Set[DataType | ListType | StructType]],
814
+ Set[DataType | ListType | StructType | NumericType],
815
+ List[Set[DataType | ListType | StructType] | NumericType],
799
816
  ]
800
817
  ] = None
801
818
  arguments: Sequence[
@@ -808,6 +825,7 @@ class Function(Namespaced, SelectGrain, BaseModel):
808
825
  str,
809
826
  DataType,
810
827
  ListType,
828
+ NumericType,
811
829
  DatePart,
812
830
  "Parenthetical",
813
831
  CaseWhen,
@@ -1165,11 +1183,17 @@ class OrderBy(Namespaced, BaseModel):
1165
1183
  return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
1166
1184
 
1167
1185
 
1186
+ class RawSQLStatement(BaseModel):
1187
+ text: str
1188
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1189
+
1190
+
1168
1191
  class SelectStatement(Namespaced, BaseModel):
1169
1192
  selection: List[SelectItem]
1170
1193
  where_clause: Optional["WhereClause"] = None
1171
1194
  order_by: Optional[OrderBy] = None
1172
1195
  limit: Optional[int] = None
1196
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1173
1197
 
1174
1198
  def __str__(self):
1175
1199
  from trilogy.parsing.render import render_query
@@ -1371,6 +1395,7 @@ class MultiSelectStatement(Namespaced, BaseModel):
1371
1395
  where_clause: Optional["WhereClause"] = None
1372
1396
  order_by: Optional[OrderBy] = None
1373
1397
  limit: Optional[int] = None
1398
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1374
1399
 
1375
1400
  def __repr__(self):
1376
1401
  return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
@@ -1492,7 +1517,7 @@ class DatasourceMetadata(BaseModel):
1492
1517
 
1493
1518
  class MergeStatement(Namespaced, BaseModel):
1494
1519
  concepts: List[Concept]
1495
- datatype: DataType | ListType | StructType | MapType
1520
+ datatype: DataType | ListType | StructType | MapType | NumericType
1496
1521
 
1497
1522
  @cached_property
1498
1523
  def concepts_lcl(self):
@@ -2038,6 +2063,40 @@ class CTE(BaseModel):
2038
2063
  def validate_output_columns(cls, v):
2039
2064
  return unique(v, "address")
2040
2065
 
2066
+ def inline_constant(self, concept: Concept):
2067
+ if not concept.derivation == PurposeLineage.CONSTANT:
2068
+ return False
2069
+ if not isinstance(concept.lineage, Function):
2070
+ return False
2071
+ if not concept.lineage.operator == FunctionType.CONSTANT:
2072
+ return False
2073
+ # remove the constant
2074
+ removed: set = set()
2075
+ if concept.address in self.source_map:
2076
+ removed = removed.union(self.source_map[concept.address])
2077
+ del self.source_map[concept.address]
2078
+ # if we've entirely removed the need to join to someplace to get the concept
2079
+ # drop the join as well.
2080
+ for removed_cte in removed:
2081
+ still_required = any([removed_cte in x for x in self.source_map.values()])
2082
+ if not still_required:
2083
+ self.joins = [
2084
+ join
2085
+ for join in self.joins
2086
+ if not isinstance(join, Join)
2087
+ or (
2088
+ join.right_cte.name != removed_cte
2089
+ and join.left_cte.name != removed_cte
2090
+ )
2091
+ ]
2092
+ self.parent_ctes = [
2093
+ x for x in self.parent_ctes if x.name != removed_cte
2094
+ ]
2095
+ if removed_cte == self.base_name_override:
2096
+ candidates = [x.name for x in self.parent_ctes]
2097
+ self.base_name_override = candidates[0] if candidates else None
2098
+ self.base_alias_override = candidates[0] if candidates else None
2099
+
2041
2100
  def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
2042
2101
  qds_being_inlined = parent.source
2043
2102
  ds_being_inlined = qds_being_inlined.datasources[0]
@@ -2186,6 +2245,11 @@ class CTE(BaseModel):
2186
2245
  and not self.group_to_grain
2187
2246
  ):
2188
2247
  return False
2248
+ # if we don't need to source any concepts from anywhere
2249
+ # render without from
2250
+ # most likely to happen from inlining constants
2251
+ if not any([v for v in self.source_map.values()]):
2252
+ return False
2189
2253
  if (
2190
2254
  len(self.source.datasources) == 1
2191
2255
  and self.source.datasources[0].name == CONSTANT_DATASET
@@ -3184,6 +3248,10 @@ class ProcessedShowStatement(BaseModel):
3184
3248
  output_values: List[Union[Concept, Datasource, ProcessedQuery]]
3185
3249
 
3186
3250
 
3251
+ class ProcessedRawSQLStatement(BaseModel):
3252
+ text: str
3253
+
3254
+
3187
3255
  class Limit(BaseModel):
3188
3256
  count: int
3189
3257
 
@@ -3386,6 +3454,7 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3386
3454
  class PersistStatement(BaseModel):
3387
3455
  datasource: Datasource
3388
3456
  select: SelectStatement
3457
+ meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
3389
3458
 
3390
3459
  @property
3391
3460
  def identifier(self):
@@ -3447,7 +3516,7 @@ def list_to_wrapper(args):
3447
3516
  return ListWrapper(args, type=types[0])
3448
3517
 
3449
3518
 
3450
- def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3519
+ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType | NumericType:
3451
3520
  if isinstance(arg, Function):
3452
3521
  return arg.output_datatype
3453
3522
  elif isinstance(arg, Concept):
@@ -3460,6 +3529,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
3460
3529
  return DataType.STRING
3461
3530
  elif isinstance(arg, float):
3462
3531
  return DataType.FLOAT
3532
+ elif isinstance(arg, NumericType):
3533
+ return arg
3463
3534
  elif isinstance(arg, ListWrapper):
3464
3535
  return ListType(type=arg.type)
3465
3536
  elif isinstance(arg, AggregateWrapper):
@@ -2,155 +2,21 @@ from trilogy.core.models import (
2
2
  CTE,
3
3
  SelectStatement,
4
4
  PersistStatement,
5
- Datasource,
6
5
  MultiSelectStatement,
7
6
  Conditional,
8
7
  BooleanOperator,
9
8
  )
10
9
  from trilogy.core.enums import PurposeLineage
11
10
  from trilogy.constants import logger, CONFIG
12
- from abc import ABC
13
-
14
-
15
- class OptimizationRule(ABC):
16
-
17
- def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
18
- raise NotImplementedError
19
-
20
- def log(self, message: str):
21
- logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
22
-
23
- def debug(self, message: str):
24
- logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
25
-
26
-
27
- class InlineDatasource(OptimizationRule):
28
-
29
- def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
30
- if not cte.parent_ctes:
31
- return False
32
-
33
- optimized = False
34
- self.log(
35
- f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
36
- )
37
- to_inline: list[CTE] = []
38
- force_group = False
39
- for parent_cte in cte.parent_ctes:
40
- if not parent_cte.is_root_datasource:
41
- self.log(f"parent {parent_cte.name} is not root")
42
- continue
43
- if parent_cte.parent_ctes:
44
- self.log(f"parent {parent_cte.name} has parents")
45
- continue
46
- raw_root = parent_cte.source.datasources[0]
47
- if not isinstance(raw_root, Datasource):
48
- self.log(f"parent {parent_cte.name} is not datasource")
49
- continue
50
- root: Datasource = raw_root
51
- if not root.can_be_inlined:
52
- self.log(f"parent {parent_cte.name} datasource is not inlineable")
53
- continue
54
- root_outputs = {x.address for x in root.output_concepts}
55
- cte_outputs = {x.address for x in parent_cte.output_columns}
56
- grain_components = {x.address for x in root.grain.components}
57
- if not cte_outputs.issubset(root_outputs):
58
- self.log(f"Not all {parent_cte.name} outputs are found on datasource")
59
- continue
60
- if not grain_components.issubset(cte_outputs):
61
- self.log("Not all datasource components in cte outputs, forcing group")
62
- force_group = True
63
- to_inline.append(parent_cte)
64
-
65
- for replaceable in to_inline:
66
-
67
- result = cte.inline_parent_datasource(replaceable, force_group=force_group)
68
- if result:
69
- self.log(f"Inlined parent {replaceable.name}")
70
- else:
71
- self.log(f"Failed to inline {replaceable.name}")
72
- return optimized
73
-
74
-
75
- def decompose_condition(conditional: Conditional):
76
- chunks = []
77
- if conditional.operator == BooleanOperator.AND:
78
- for val in [conditional.left, conditional.right]:
79
- if isinstance(val, Conditional):
80
- chunks.extend(decompose_condition(val))
81
- else:
82
- chunks.append(val)
83
- else:
84
- chunks.append(conditional)
85
- return chunks
86
-
87
-
88
- def is_child_of(a, comparison):
89
- if isinstance(comparison, Conditional):
90
- return (
91
- is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
92
- ) and comparison.operator == BooleanOperator.AND
93
- return comparison == a
94
-
95
-
96
- class PredicatePushdown(OptimizationRule):
97
-
98
- def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
99
-
100
- if not cte.parent_ctes:
101
- self.debug(f"No parent CTEs for {cte.name}")
102
-
103
- return False
104
-
105
- optimized = False
106
- if not cte.condition:
107
- self.debug(f"No CTE condition for {cte.name}")
108
- return False
109
- self.log(
110
- f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
111
- )
112
- if isinstance(cte.condition, Conditional):
113
- candidates = cte.condition.decompose()
114
- else:
115
- candidates = [cte.condition]
116
- logger.info(f"Have {len(candidates)} candidates to try to push down")
117
- for candidate in candidates:
118
- conditions = {x.address for x in candidate.concept_arguments}
119
- for parent_cte in cte.parent_ctes:
120
- materialized = {k for k, v in parent_cte.source_map.items() if v != []}
121
- if conditions.issubset(materialized):
122
- if all(
123
- [
124
- is_child_of(candidate, child.condition)
125
- for child in inverse_map[parent_cte.name]
126
- ]
127
- ):
128
- self.log(
129
- f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
130
- )
131
- if parent_cte.condition:
132
- parent_cte.condition = Conditional(
133
- left=parent_cte.condition,
134
- operator=BooleanOperator.AND,
135
- right=candidate,
136
- )
137
- else:
138
- parent_cte.condition = candidate
139
- optimized = True
140
- else:
141
- logger.info("conditions not subset of parent materialized")
11
+ from trilogy.core.optimizations import (
12
+ OptimizationRule,
13
+ InlineConstant,
14
+ PredicatePushdown,
15
+ InlineDatasource,
16
+ )
142
17
 
143
- if all(
144
- [
145
- is_child_of(cte.condition, parent_cte.condition)
146
- for parent_cte in cte.parent_ctes
147
- ]
148
- ):
149
- self.log("All parents have same filter, removing filter")
150
- cte.condition = None
151
- optimized = True
152
18
 
153
- return optimized
19
+ MAX_OPTIMIZATION_LOOPS = 100
154
20
 
155
21
 
156
22
  def filter_irrelevant_ctes(
@@ -184,7 +50,9 @@ def is_direct_return_eligible(
184
50
  if isinstance(select, (PersistStatement, MultiSelectStatement)):
185
51
  return False
186
52
  derived_concepts = [
187
- c for c in cte.source.output_concepts if c not in cte.source.input_concepts
53
+ c
54
+ for c in cte.source.output_concepts + cte.source.hidden_concepts
55
+ if c not in cte.source.input_concepts
188
56
  ]
189
57
  eligible = True
190
58
  conditions = (
@@ -224,23 +92,9 @@ def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
224
92
 
225
93
  def optimize_ctes(
226
94
  input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
227
- ):
95
+ ) -> list[CTE]:
228
96
  complete = False
229
97
  REGISTERED_RULES: list["OptimizationRule"] = []
230
-
231
- if CONFIG.optimizations.datasource_inlining:
232
- REGISTERED_RULES.append(InlineDatasource())
233
- if CONFIG.optimizations.predicate_pushdown:
234
- REGISTERED_RULES.append(PredicatePushdown())
235
-
236
- while not complete:
237
- actions_taken = False
238
- for rule in REGISTERED_RULES:
239
- for cte in input:
240
- inverse_map = gen_inverse_map(input)
241
- actions_taken = rule.optimize(cte, inverse_map)
242
- complete = not actions_taken
243
-
244
98
  if CONFIG.optimizations.direct_return and is_direct_return_eligible(
245
99
  root_cte, select
246
100
  ):
@@ -258,5 +112,20 @@ def optimize_ctes(
258
112
  root_cte.condition = select.where_clause.conditional
259
113
  root_cte.requires_nesting = False
260
114
  sort_select_output(root_cte, select)
115
+ if CONFIG.optimizations.datasource_inlining:
116
+ REGISTERED_RULES.append(InlineDatasource())
117
+ if CONFIG.optimizations.predicate_pushdown:
118
+ REGISTERED_RULES.append(PredicatePushdown())
119
+ if CONFIG.optimizations.constant_inlining:
120
+ REGISTERED_RULES.append(InlineConstant())
121
+ loops = 0
122
+ while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
123
+ actions_taken = False
124
+ for rule in REGISTERED_RULES:
125
+ for cte in input:
126
+ inverse_map = gen_inverse_map(input)
127
+ actions_taken = actions_taken or rule.optimize(cte, inverse_map)
128
+ complete = not actions_taken
129
+ loops += 1
261
130
 
262
131
  return filter_irrelevant_ctes(input, root_cte)
@@ -0,0 +1,11 @@
1
+ from .inline_constant import InlineConstant
2
+ from .inline_datasource import InlineDatasource
3
+ from .predicate_pushdown import PredicatePushdown
4
+ from .base_optimization import OptimizationRule
5
+
6
+ __all__ = [
7
+ "OptimizationRule",
8
+ "InlineConstant",
9
+ "InlineDatasource",
10
+ "PredicatePushdown",
11
+ ]
@@ -0,0 +1,17 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ )
4
+ from trilogy.constants import logger
5
+ from abc import ABC
6
+
7
+
8
+ class OptimizationRule(ABC):
9
+
10
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
11
+ raise NotImplementedError
12
+
13
+ def log(self, message: str):
14
+ logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
15
+
16
+ def debug(self, message: str):
17
+ logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
@@ -0,0 +1,29 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ Concept,
4
+ )
5
+ from trilogy.core.enums import PurposeLineage
6
+
7
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
8
+
9
+
10
+ class InlineConstant(OptimizationRule):
11
+
12
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
13
+
14
+ to_inline: list[Concept] = []
15
+ for x in cte.source.input_concepts:
16
+ if x.address not in cte.source_map:
17
+ continue
18
+ if x.derivation == PurposeLineage.CONSTANT:
19
+ self.log(f"Found constant {x.address} on {cte.name}")
20
+ to_inline.append(x)
21
+ if to_inline:
22
+ inlined = False
23
+ for c in to_inline:
24
+ self.log(f"Inlining constant {c.address} on {cte.name}")
25
+ test = cte.inline_constant(c)
26
+ if test:
27
+ inlined = True
28
+ return inlined
29
+ return False
@@ -0,0 +1,54 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ Datasource,
4
+ )
5
+
6
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
7
+
8
+
9
+ class InlineDatasource(OptimizationRule):
10
+
11
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
12
+ if not cte.parent_ctes:
13
+ return False
14
+
15
+ optimized = False
16
+ self.log(
17
+ f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
18
+ )
19
+ to_inline: list[CTE] = []
20
+ force_group = False
21
+ for parent_cte in cte.parent_ctes:
22
+ if not parent_cte.is_root_datasource:
23
+ self.log(f"parent {parent_cte.name} is not root")
24
+ continue
25
+ if parent_cte.parent_ctes:
26
+ self.log(f"parent {parent_cte.name} has parents")
27
+ continue
28
+ raw_root = parent_cte.source.datasources[0]
29
+ if not isinstance(raw_root, Datasource):
30
+ self.log(f"parent {parent_cte.name} is not datasource")
31
+ continue
32
+ root: Datasource = raw_root
33
+ if not root.can_be_inlined:
34
+ self.log(f"parent {parent_cte.name} datasource is not inlineable")
35
+ continue
36
+ root_outputs = {x.address for x in root.output_concepts}
37
+ cte_outputs = {x.address for x in parent_cte.output_columns}
38
+ grain_components = {x.address for x in root.grain.components}
39
+ if not cte_outputs.issubset(root_outputs):
40
+ self.log(f"Not all {parent_cte.name} outputs are found on datasource")
41
+ continue
42
+ if not grain_components.issubset(cte_outputs):
43
+ self.log("Not all datasource components in cte outputs, forcing group")
44
+ force_group = True
45
+ to_inline.append(parent_cte)
46
+
47
+ for replaceable in to_inline:
48
+
49
+ result = cte.inline_parent_datasource(replaceable, force_group=force_group)
50
+ if result:
51
+ self.log(f"Inlined parent {replaceable.name}")
52
+ else:
53
+ self.log(f"Failed to inline {replaceable.name}")
54
+ return optimized
@@ -0,0 +1,105 @@
1
+ from trilogy.core.models import (
2
+ CTE,
3
+ Conditional,
4
+ BooleanOperator,
5
+ Datasource,
6
+ )
7
+ from trilogy.core.optimizations.base_optimization import OptimizationRule
8
+
9
+
10
+ def decompose_condition(conditional: Conditional):
11
+ chunks = []
12
+ if conditional.operator == BooleanOperator.AND:
13
+ for val in [conditional.left, conditional.right]:
14
+ if isinstance(val, Conditional):
15
+ chunks.extend(decompose_condition(val))
16
+ else:
17
+ chunks.append(val)
18
+ else:
19
+ chunks.append(conditional)
20
+ return chunks
21
+
22
+
23
+ def is_child_of(a, comparison):
24
+ if isinstance(comparison, Conditional):
25
+ return (
26
+ is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
27
+ ) and comparison.operator == BooleanOperator.AND
28
+ return comparison == a
29
+
30
+
31
+ class PredicatePushdown(OptimizationRule):
32
+
33
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
34
+
35
+ if not cte.parent_ctes:
36
+ self.debug(f"No parent CTEs for {cte.name}")
37
+
38
+ return False
39
+
40
+ optimized = False
41
+ if not cte.condition:
42
+ self.debug(f"No CTE condition for {cte.name}")
43
+ return False
44
+ self.log(
45
+ f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
46
+ )
47
+ if isinstance(cte.condition, Conditional):
48
+ candidates = cte.condition.decompose()
49
+ else:
50
+ candidates = [cte.condition]
51
+ self.log(f"Have {len(candidates)} candidates to try to push down")
52
+ for candidate in candidates:
53
+ conditions = {x.address for x in candidate.concept_arguments}
54
+ for parent_cte in cte.parent_ctes:
55
+ if is_child_of(cte.condition, parent_cte.condition):
56
+ continue
57
+ materialized = {k for k, v in parent_cte.source_map.items() if v != []}
58
+ # if it's a root datasource, we can filter on _any_ of the output concepts
59
+ if parent_cte.is_root_datasource:
60
+ extra_check = {
61
+ x.address
62
+ for x in parent_cte.source.datasources[0].output_concepts
63
+ }
64
+ if conditions.issubset(extra_check):
65
+ for x in conditions:
66
+ if x not in materialized:
67
+ materialized.add(x)
68
+ parent_cte.source_map[x] = [
69
+ parent_cte.source.datasources[0].name
70
+ ]
71
+ if conditions.issubset(materialized):
72
+ if all(
73
+ [
74
+ is_child_of(candidate, child.condition)
75
+ for child in inverse_map.get(parent_cte.name, [])
76
+ ]
77
+ ):
78
+ self.log(
79
+ f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
80
+ )
81
+ if parent_cte.condition:
82
+ parent_cte.condition = Conditional(
83
+ left=parent_cte.condition,
84
+ operator=BooleanOperator.AND,
85
+ right=candidate,
86
+ )
87
+ else:
88
+ parent_cte.condition = candidate
89
+ optimized = True
90
+ else:
91
+ self.log(
92
+ f"conditions {conditions} not subset of parent {parent_cte.name} parent has {materialized} "
93
+ )
94
+
95
+ if all(
96
+ [
97
+ is_child_of(cte.condition, parent_cte.condition)
98
+ for parent_cte in cte.parent_ctes
99
+ ]
100
+ ) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
101
+ self.log("All parents have same filter, removing filter")
102
+ cte.condition = None
103
+ optimized = True
104
+
105
+ return optimized
trilogy/dialect/base.py CHANGED
@@ -44,6 +44,9 @@ from trilogy.core.models import (
44
44
  RowsetDerivationStatement,
45
45
  ConceptDeclarationStatement,
46
46
  ImportStatement,
47
+ RawSQLStatement,
48
+ ProcessedRawSQLStatement,
49
+ NumericType,
47
50
  )
48
51
  from trilogy.core.query_processor import process_query, process_persist
49
52
  from trilogy.dialect.common import render_join
@@ -95,6 +98,7 @@ DATATYPE_MAP = {
95
98
  DataType.INTEGER: "int",
96
99
  DataType.FLOAT: "float",
97
100
  DataType.BOOL: "bool",
101
+ DataType.NUMERIC: "numeric",
98
102
  }
99
103
 
100
104
 
@@ -332,6 +336,7 @@ class BaseDialect:
332
336
  Parenthetical,
333
337
  AggregateWrapper,
334
338
  MagicConstants,
339
+ NumericType,
335
340
  ListType,
336
341
  ListWrapper[int],
337
342
  ListWrapper[str],
@@ -437,6 +442,8 @@ class BaseDialect:
437
442
  return str(e.value)
438
443
  elif isinstance(e, DatePart):
439
444
  return str(e.value)
445
+ elif isinstance(e, NumericType):
446
+ return f"{self.DATATYPE_MAP[DataType.NUMERIC]}({e.precision},{e.scale})"
440
447
  elif isinstance(e, MagicConstants):
441
448
  if e == MagicConstants.NULL:
442
449
  return "null"
@@ -558,11 +565,20 @@ class BaseDialect:
558
565
  | RowsetDerivationStatement
559
566
  | MergeStatement
560
567
  | ImportStatement
568
+ | RawSQLStatement
561
569
  ],
562
570
  hooks: Optional[List[BaseHook]] = None,
563
- ) -> List[ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement]:
571
+ ) -> List[
572
+ ProcessedQuery
573
+ | ProcessedQueryPersist
574
+ | ProcessedShowStatement
575
+ | ProcessedRawSQLStatement
576
+ ]:
564
577
  output: List[
565
- ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
578
+ ProcessedQuery
579
+ | ProcessedQueryPersist
580
+ | ProcessedShowStatement
581
+ | ProcessedRawSQLStatement
566
582
  ] = []
567
583
  for statement in statements:
568
584
  if isinstance(statement, PersistStatement):
@@ -604,6 +620,8 @@ class BaseDialect:
604
620
  )
605
621
  else:
606
622
  raise NotImplementedError(type(statement))
623
+ elif isinstance(statement, RawSQLStatement):
624
+ output.append(ProcessedRawSQLStatement(text=statement.text))
607
625
  elif isinstance(
608
626
  statement,
609
627
  (
@@ -619,10 +637,18 @@ class BaseDialect:
619
637
  return output
620
638
 
621
639
  def compile_statement(
622
- self, query: ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
640
+ self,
641
+ query: (
642
+ ProcessedQuery
643
+ | ProcessedQueryPersist
644
+ | ProcessedShowStatement
645
+ | ProcessedRawSQLStatement
646
+ ),
623
647
  ) -> str:
624
648
  if isinstance(query, ProcessedShowStatement):
625
649
  return ";\n".join([str(x) for x in query.output_values])
650
+ elif isinstance(query, ProcessedRawSQLStatement):
651
+ return query.text
626
652
  select_columns: Dict[str, str] = {}
627
653
  cte_output_map = {}
628
654
  selected = set()
trilogy/dialect/presto.py CHANGED
@@ -4,6 +4,7 @@ from jinja2 import Template
4
4
 
5
5
  from trilogy.core.enums import FunctionType, WindowType
6
6
  from trilogy.dialect.base import BaseDialect
7
+ from trilogy.core.models import DataType
7
8
 
8
9
 
9
10
  WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
@@ -26,7 +27,7 @@ FUNCTION_MAP = {
26
27
  FunctionType.WEEK: lambda x: f"EXTRACT(WEEK from {x[0]})",
27
28
  FunctionType.QUARTER: lambda x: f"EXTRACT(QUARTER from {x[0]})",
28
29
  # math
29
- FunctionType.DIVIDE: lambda x: f"SAFE_DIVIDE({x[0]},{x[1]})",
30
+ FunctionType.DIVIDE: lambda x: f"{x[0]}/{x[1]}",
30
31
  FunctionType.DATE_ADD: lambda x: f"DATE_ADD('{x[1]}', {x[2]}, {x[0]})",
31
32
  FunctionType.CURRENT_DATE: lambda x: "CURRENT_DATE",
32
33
  FunctionType.CURRENT_DATETIME: lambda x: "CURRENT_TIMESTAMP",
@@ -80,6 +81,7 @@ class PrestoDialect(BaseDialect):
80
81
  }
81
82
  QUOTE_CHARACTER = '"'
82
83
  SQL_TEMPLATE = SQL_TEMPLATE
84
+ DATATYPE_MAP = {**BaseDialect.DATATYPE_MAP, DataType.NUMERIC: "DECIMAL"}
83
85
 
84
86
 
85
87
  class TrinoDialect(PrestoDialect):
@@ -9,6 +9,7 @@ from trilogy.core.models import (
9
9
  ProcessedQuery,
10
10
  ProcessedQueryPersist,
11
11
  ProcessedShowStatement,
12
+ ProcessedRawSQLStatement,
12
13
  )
13
14
  from trilogy.dialect.base import BaseDialect
14
15
 
@@ -81,7 +82,13 @@ class SqlServerDialect(BaseDialect):
81
82
  SQL_TEMPLATE = TSQL_TEMPLATE
82
83
 
83
84
  def compile_statement(
84
- self, query: ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
85
+ self,
86
+ query: (
87
+ ProcessedQuery
88
+ | ProcessedQueryPersist
89
+ | ProcessedShowStatement
90
+ | ProcessedRawSQLStatement
91
+ ),
85
92
  ) -> str:
86
93
  base = super().compile_statement(query)
87
94
  if isinstance(base, (ProcessedQuery, ProcessedQueryPersist)):
trilogy/executor.py CHANGED
@@ -9,6 +9,8 @@ from trilogy.core.models import (
9
9
  ProcessedQuery,
10
10
  ProcessedShowStatement,
11
11
  ProcessedQueryPersist,
12
+ ProcessedRawSQLStatement,
13
+ RawSQLStatement,
12
14
  MultiSelectStatement,
13
15
  SelectStatement,
14
16
  PersistStatement,
@@ -112,6 +114,10 @@ class Executor(object):
112
114
  )
113
115
  return self.execute_query(sql[0])
114
116
 
117
+ @execute_query.register
118
+ def _(self, query: RawSQLStatement) -> CursorResult:
119
+ return self.execute_raw_sql(query.text)
120
+
115
121
  @execute_query.register
116
122
  def _(self, query: ProcessedShowStatement) -> CursorResult:
117
123
  return generate_result_set(
@@ -123,6 +129,10 @@ class Executor(object):
123
129
  ],
124
130
  )
125
131
 
132
+ @execute_query.register
133
+ def _(self, query: ProcessedRawSQLStatement) -> CursorResult:
134
+ return self.execute_raw_sql(query.text)
135
+
126
136
  @execute_query.register
127
137
  def _(self, query: ProcessedQuery) -> CursorResult:
128
138
  sql = self.generator.compile_statement(query)
@@ -195,7 +205,12 @@ class Executor(object):
195
205
 
196
206
  def parse_text(
197
207
  self, command: str, persist: bool = False
198
- ) -> List[ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement]:
208
+ ) -> List[
209
+ ProcessedQuery
210
+ | ProcessedQueryPersist
211
+ | ProcessedShowStatement
212
+ | ProcessedRawSQLStatement
213
+ ]:
199
214
  """Process a preql text command"""
200
215
  _, parsed = parse_text(command, self.environment)
201
216
  generatable = [
@@ -208,6 +223,7 @@ class Executor(object):
208
223
  PersistStatement,
209
224
  MultiSelectStatement,
210
225
  ShowStatement,
226
+ RawSQLStatement,
211
227
  ),
212
228
  )
213
229
  ]
@@ -222,10 +238,13 @@ class Executor(object):
222
238
  sql.append(x)
223
239
  return sql
224
240
 
225
- def parse_text_generator(
226
- self, command: str, persist: bool = False
227
- ) -> Generator[
228
- ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement, None, None
241
+ def parse_text_generator(self, command: str, persist: bool = False) -> Generator[
242
+ ProcessedQuery
243
+ | ProcessedQueryPersist
244
+ | ProcessedShowStatement
245
+ | ProcessedRawSQLStatement,
246
+ None,
247
+ None,
229
248
  ]:
230
249
  """Process a preql text command"""
231
250
  _, parsed = parse_text(command, self.environment)
@@ -239,6 +258,7 @@ class Executor(object):
239
258
  PersistStatement,
240
259
  MultiSelectStatement,
241
260
  ShowStatement,
261
+ RawSQLStatement,
242
262
  ),
243
263
  )
244
264
  ]
@@ -82,6 +82,7 @@ from trilogy.core.models import (
82
82
  Parenthetical,
83
83
  PersistStatement,
84
84
  Query,
85
+ RawSQLStatement,
85
86
  SelectStatement,
86
87
  SelectItem,
87
88
  WhereClause,
@@ -102,6 +103,7 @@ from trilogy.core.models import (
102
103
  RowsetDerivationStatement,
103
104
  LooseConceptList,
104
105
  list_to_wrapper,
106
+ NumericType,
105
107
  )
106
108
  from trilogy.parsing.exceptions import ParseError
107
109
  from trilogy.utility import string_to_hash
@@ -308,7 +310,9 @@ class ParseToObjects(Transformer):
308
310
 
309
311
  @v_args(meta=True)
310
312
  def struct_type(self, meta: Meta, args) -> StructType:
311
- final: list[DataType | MapType | ListType | StructType | Concept] = []
313
+ final: list[
314
+ DataType | MapType | ListType | StructType | NumericType | Concept
315
+ ] = []
312
316
  for arg in args:
313
317
  if not isinstance(arg, (DataType, ListType, StructType)):
314
318
  new = self.environment.concepts.__getitem__( # type: ignore
@@ -322,12 +326,17 @@ class ParseToObjects(Transformer):
322
326
  def list_type(self, args) -> ListType:
323
327
  return ListType(type=args[0])
324
328
 
325
- def data_type(self, args) -> DataType | ListType | StructType:
329
+ def numeric_type(self, args) -> NumericType:
330
+ return NumericType(precision=args[0], scale=args[1])
331
+
332
+ def data_type(self, args) -> DataType | ListType | StructType | NumericType:
326
333
  resolved = args[0]
327
334
  if isinstance(resolved, StructType):
328
335
  return resolved
329
336
  elif isinstance(resolved, ListType):
330
337
  return resolved
338
+ elif isinstance(resolved, NumericType):
339
+ return resolved
331
340
  return DataType(args[0].lower())
332
341
 
333
342
  def array_comparison(self, args) -> ComparisonOperator:
@@ -585,8 +594,11 @@ class ParseToObjects(Transformer):
585
594
  # namespace=self.environment.namespace,
586
595
  return Grain(components=[self.environment.concepts[a] for a in args[0]])
587
596
 
597
+ def MULTILINE_STRING(self, args) -> str:
598
+ return args[3:-3]
599
+
588
600
  def raw_column_assignment(self, args):
589
- return RawColumnExpr(text=args[0][3:-3])
601
+ return RawColumnExpr(text=args[0])
590
602
 
591
603
  @v_args(meta=True)
592
604
  def datasource(self, meta: Meta, args):
@@ -756,6 +768,10 @@ class ParseToObjects(Transformer):
756
768
  self.environment.add_concept(new, meta=meta)
757
769
  return merge
758
770
 
771
+ @v_args(meta=True)
772
+ def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
773
+ return RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
774
+
759
775
  def import_statement(self, args: list[str]) -> ImportStatement:
760
776
  alias = args[-1]
761
777
  path = args[0].split(".")
@@ -822,7 +838,11 @@ class ParseToObjects(Transformer):
822
838
  address=Address(location=address),
823
839
  grain=grain,
824
840
  )
825
- return PersistStatement(select=select, datasource=new_datasource)
841
+ return PersistStatement(
842
+ select=select,
843
+ datasource=new_datasource,
844
+ meta=Metadata(line_number=meta.line),
845
+ )
826
846
 
827
847
  @v_args(meta=True)
828
848
  def align_item(self, meta: Meta, args) -> AlignItem:
@@ -864,6 +884,7 @@ class ParseToObjects(Transformer):
864
884
  where_clause=where,
865
885
  order_by=order_by,
866
886
  limit=limit,
887
+ meta=Metadata(line_number=meta.line),
867
888
  )
868
889
  for concept in multi.derived_concepts:
869
890
  self.environment.add_concept(concept, meta=meta)
@@ -887,7 +908,11 @@ class ParseToObjects(Transformer):
887
908
  if not select_items:
888
909
  raise ValueError("Malformed select, missing select items")
889
910
  output = SelectStatement(
890
- selection=select_items, where_clause=where, limit=limit, order_by=order_by
911
+ selection=select_items,
912
+ where_clause=where,
913
+ limit=limit,
914
+ order_by=order_by,
915
+ meta=Metadata(line_number=meta.line),
891
916
  )
892
917
  for item in select_items:
893
918
  # we don't know the grain of an aggregate at assignment time
@@ -912,7 +937,7 @@ class ParseToObjects(Transformer):
912
937
 
913
938
  @v_args(meta=True)
914
939
  def query(self, meta: Meta, args):
915
- return Query(text=args[0][3:-3])
940
+ return Query(text=args[0])
916
941
 
917
942
  def where(self, args):
918
943
  root = args[0]
@@ -1534,6 +1559,8 @@ class ParseToObjects(Transformer):
1534
1559
  DataType.STRING,
1535
1560
  DataType.FLOAT,
1536
1561
  DataType.NUMBER,
1562
+ DataType.NUMERIC,
1563
+ DataType.BOOL,
1537
1564
  },
1538
1565
  arg_count=2,
1539
1566
  )
@@ -1692,6 +1719,7 @@ def parse_text(text: str, environment: Optional[Environment] = None) -> Tuple[
1692
1719
  | SelectStatement
1693
1720
  | PersistStatement
1694
1721
  | ShowStatement
1722
+ | RawSQLStatement
1695
1723
  | None
1696
1724
  ],
1697
1725
  ]:
trilogy/parsing/render.py CHANGED
@@ -38,6 +38,8 @@ from trilogy.core.models import (
38
38
  OrderBy,
39
39
  AlignClause,
40
40
  AlignItem,
41
+ RawSQLStatement,
42
+ NumericType,
41
43
  )
42
44
  from trilogy.core.enums import Modifier
43
45
 
@@ -171,6 +173,10 @@ class Renderer:
171
173
  def _(self, arg: DataType):
172
174
  return arg.value
173
175
 
176
+ @to_string.register
177
+ def _(self, arg: "NumericType"):
178
+ return f"""Numeric({arg.precision},{arg.scale})"""
179
+
174
180
  @to_string.register
175
181
  def _(self, arg: ListWrapper):
176
182
  return "[" + ", ".join([self.to_string(x) for x in arg]) + "]"
@@ -183,6 +189,10 @@ class Renderer:
183
189
  def _(self, arg: "Address"):
184
190
  return f"address {arg.location}"
185
191
 
192
+ @to_string.register
193
+ def _(self, arg: "RawSQLStatement"):
194
+ return f"raw_sql('''{arg.text}''');"
195
+
186
196
  @to_string.register
187
197
  def _(self, arg: "MagicConstants"):
188
198
  if arg == MagicConstants.NULL:
@@ -9,6 +9,7 @@
9
9
  | rowset_derivation_statement
10
10
  | import_statement
11
11
  | merge_statement
12
+ | rawsql_statement
12
13
 
13
14
  _TERMINATOR: ";"i /\s*/
14
15
 
@@ -71,6 +72,9 @@
71
72
  // merge statemment
72
73
  merge_statement: "merge" IDENTIFIER ("," IDENTIFIER)* ","?
73
74
 
75
+ // raw sql statement
76
+ rawsql_statement: "raw_sql"i "(" MULTILINE_STRING ")"
77
+
74
78
  // FUNCTION blocks
75
79
  function: raw_function
76
80
  function_binding_item: IDENTIFIER ":" data_type
@@ -276,12 +280,14 @@
276
280
 
277
281
  SHORTHAND_MODIFIER: "~"
278
282
 
279
- struct_type: "struct" "<" ((data_type | IDENTIFIER) ",")* (data_type | IDENTIFIER) ","? ">"
283
+ struct_type: "struct"i "<" ((data_type | IDENTIFIER) ",")* (data_type | IDENTIFIER) ","? ">"
284
+
285
+ list_type: "list"i "<" data_type ">"
280
286
 
281
- list_type: "list" "<" data_type ">"
287
+ numeric_type: "numeric"i "(" int_lit "," int_lit ")"
282
288
 
283
289
 
284
- !data_type: "string"i | "number"i | "numeric"i | "map"i | "list"i | "array"i | "any"i | "int"i | "bigint" | "date"i | "datetime"i | "timestamp"i | "float"i | "bool"i | struct_type | list_type
290
+ !data_type: "string"i | "number"i | "numeric"i | "map"i | "list"i | "array"i | "any"i | "int"i | "bigint"i | "date"i | "datetime"i | "timestamp"i | "float"i | "bool"i | numeric_type | struct_type | list_type
285
291
 
286
292
  PURPOSE: "key"i | "metric"i | CONST
287
293
  PROPERTY: "property"i