pytrilogy 0.0.3.52__py3-none-any.whl → 0.0.3.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pytrilogy
3
- Version: 0.0.3.52
3
+ Version: 0.0.3.54
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,5 +1,5 @@
1
- pytrilogy-0.0.3.52.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
2
- trilogy/__init__.py,sha256=3lpAbpHzxQ6c0SUaZfiuOcDzIS6IZ5DyK1fl8FmZ3hE,303
1
+ pytrilogy-0.0.3.54.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
2
+ trilogy/__init__.py,sha256=Wz4e3LWZ4MWyBm3EuLrz5-n5-yyu96rHR7k-4JPA4RE,303
3
3
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  trilogy/constants.py,sha256=5eQxk1A0pv-TQk3CCvgZCFA9_K-6nxrOm7E5Lxd7KIY,1652
5
5
  trilogy/engine.py,sha256=OK2RuqCIUId6yZ5hfF8J1nxGP0AJqHRZiafcowmW0xc,1728
@@ -11,16 +11,17 @@ trilogy/utility.py,sha256=euQccZLKoYBz0LNg5tzLlvv2YHvXh9HArnYp1V3uXsM,763
11
11
  trilogy/authoring/__init__.py,sha256=v9PRuZs4fTnxhpXAnwTxCDwlLasUax6g2FONidcujR4,2369
12
12
  trilogy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  trilogy/core/constants.py,sha256=7XaCpZn5mQmjTobbeBn56SzPWq9eMNDfzfsRU-fP0VE,171
14
- trilogy/core/enums.py,sha256=uZTi9K6PEpQ1oFV4OpHlC1NUSxrmAFdQBfRyy9Rba-8,7440
14
+ trilogy/core/enums.py,sha256=QVylGAe6epdGGpOKkeJ4cbx0mIZb0aARAKhsoZaGhoA,7576
15
15
  trilogy/core/env_processor.py,sha256=pFsxnluKIusGKx1z7tTnfsd_xZcPy9pZDungkjkyvI0,3170
16
16
  trilogy/core/environment_helpers.py,sha256=VvPIiFemqaLLpIpLIqprfu63K7muZ1YzNg7UZIUph8w,8267
17
17
  trilogy/core/ergonomics.py,sha256=e-7gE29vPLFdg0_A1smQ7eOrUwKl5VYdxRSTddHweRA,1631
18
18
  trilogy/core/exceptions.py,sha256=JPYyBcit3T_pRtlHdtKSeVJkIyWUTozW2aaut25A2xI,673
19
- trilogy/core/functions.py,sha256=OIcaftda-afXrHMSvPksLbRTwPUwQHAIpy9l78EBZVU,28643
19
+ trilogy/core/functions.py,sha256=IvqHyuO__o6Th8tkDWjb9cDxQDly6l3ZEfJ9y8YrTRU,29227
20
20
  trilogy/core/graph_models.py,sha256=z17EoO8oky2QOuO6E2aMWoVNKEVJFhLdsQZOhC4fNLU,2079
21
21
  trilogy/core/internal.py,sha256=iicDBlC6nM8d7e7jqzf_ZOmpUsW8yrr2AA8AqEiLx-s,1577
22
- trilogy/core/optimization.py,sha256=O7ag0IVQlJyWdAXBi_hHeU3Df5DRyd75Vlz6pks2J10,8197
22
+ trilogy/core/optimization.py,sha256=ClDPMuuLBjuZCHdRGsuNhKNNTkbev1I22SbnLMzHT60,8839
23
23
  trilogy/core/query_processor.py,sha256=NNzYPKN5HzivQFXugSbJC_MaupkwOYii7A_vnXuBIK4,20063
24
+ trilogy/core/utility.py,sha256=3VC13uSQWcZNghgt7Ot0ZTeEmNqs__cx122abVq9qhM,410
24
25
  trilogy/core/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
26
  trilogy/core/models/author.py,sha256=KKW_3A1hdwq7D2dFwI6xZanukPuCQQ23R4GzE5VRJ6c,77206
26
27
  trilogy/core/models/build.py,sha256=yBiOQ4Bhjz09pSD1jSGhhf9QFFQuplrvZ0JQB5-iXHk,63104
@@ -28,7 +29,7 @@ trilogy/core/models/build_environment.py,sha256=s_C9xAHuD3yZ26T15pWVBvoqvlp2LdZ8
28
29
  trilogy/core/models/core.py,sha256=wx6hJcFECMG-Ij972ADNkr-3nFXkYESr82ObPiC46_U,10875
29
30
  trilogy/core/models/datasource.py,sha256=6RjJUd2u4nYmEwFBpJlM9LbHVYDv8iHJxqiBMZqUrwI,9422
30
31
  trilogy/core/models/environment.py,sha256=AVSrvjNcNX535GhCPtYhCRY2Lp_Hj0tdY3VVt_kZb9Q,27260
31
- trilogy/core/models/execute.py,sha256=ucxMwsu5OMoP0E4pVKtkCNU0nogElJKQAqfu3arE4Jo,34879
32
+ trilogy/core/models/execute.py,sha256=F7-9VyUz5MC__VUSXd4U7gUb23Dc5PH5FdMUt6FqCPM,35214
32
33
  trilogy/core/optimizations/__init__.py,sha256=YH2-mGXZnVDnBcWVi8vTbrdw7Qs5TivG4h38rH3js_I,290
33
34
  trilogy/core/optimizations/base_optimization.py,sha256=gzDOKImoFn36k7XBD3ysEYDnbnb6vdVIztUfFQZsGnM,513
34
35
  trilogy/core/optimizations/inline_datasource.py,sha256=AHuTGh2x0GQ8usOe0NiFncfTFQ_KogdgDl4uucmhIbI,4241
@@ -45,17 +46,17 @@ trilogy/core/processing/node_generators/group_node.py,sha256=nIfiMrJQEksUfqAeeA3
45
46
  trilogy/core/processing/node_generators/group_to_node.py,sha256=jKcNCDOY6fNblrdZwaRU0sbUSr9H0moQbAxrGgX6iGA,3832
46
47
  trilogy/core/processing/node_generators/multiselect_node.py,sha256=GWV5yLmKTe1yyPhN60RG1Rnrn4ktfn9lYYXi_FVU4UI,7061
47
48
  trilogy/core/processing/node_generators/node_merge_node.py,sha256=sv55oynfqgpHEpo1OEtVDri-5fywzPhDlR85qaWikvY,16195
48
- trilogy/core/processing/node_generators/rowset_node.py,sha256=YmBs6ZQ7azLXRFEmeoecpGjK4pMHsUCovuBxfb3UKZI,6848
49
+ trilogy/core/processing/node_generators/rowset_node.py,sha256=2BiSsegbRF9csJ_Xl8P_CxIm4dAAb7dF29u6v_Odr-A,6709
49
50
  trilogy/core/processing/node_generators/select_merge_node.py,sha256=lxXhMhDKGbu67QFNbbAT-BO8gbWppIvjn_hAXpLEPe0,19953
50
51
  trilogy/core/processing/node_generators/select_node.py,sha256=Y-zO0AFkTrpi2LyebjpyHU7WWANr7nKZSS9rY7DH4Wo,1888
51
52
  trilogy/core/processing/node_generators/synonym_node.py,sha256=9LHK2XHDjbyTLjmDQieskG8fqbiSpRnFOkfrutDnOTE,2258
52
53
  trilogy/core/processing/node_generators/union_node.py,sha256=VNo6Oey4p8etU9xrOh2oTT2lIOTvY6PULUPRvVa2uxU,2877
53
54
  trilogy/core/processing/node_generators/unnest_node.py,sha256=cOEKnMRzXUW3bwmiOlgn3E1-B38osng0dh2pDykwITY,2410
54
- trilogy/core/processing/node_generators/window_node.py,sha256=RUHgpYovQObFod1xRIMWtDzMcxwlm4-1Fdrf_Cuw5W4,6346
55
+ trilogy/core/processing/node_generators/window_node.py,sha256=GP3Hvkbb0TDA6ef7W7bmvQEHVH-NRIfBT_0W4fcH3g4,6529
55
56
  trilogy/core/processing/node_generators/select_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
57
  trilogy/core/processing/node_generators/select_helpers/datasource_injection.py,sha256=GMW07bb6hXurhF0hZLYoMAKSIS65tat5hwBjvqqPeSA,6516
57
- trilogy/core/processing/nodes/__init__.py,sha256=xPFF7x3TFs1Z4IcfthCykZgrksb-UhN-pc_oIigfFSo,6014
58
- trilogy/core/processing/nodes/base_node.py,sha256=z-aZEVjnLdFm6TpmneEm2bnRXj-tRFr7mN7DYG4zH9A,16967
58
+ trilogy/core/processing/nodes/__init__.py,sha256=oHDknqAZFm6KSDDbzJXj8HDGDh-dn-YjHDGqiXc8Psw,6054
59
+ trilogy/core/processing/nodes/base_node.py,sha256=IdKR2yaQGY1iRgKXgxF1UtlyuJEmPXWRh0rGFXv7Z_U,18111
59
60
  trilogy/core/processing/nodes/filter_node.py,sha256=5VtRfKbCORx0dV-vQfgy3gOEkmmscL9f31ExvlODwvY,2461
60
61
  trilogy/core/processing/nodes/group_node.py,sha256=MUvcOg9U5J6TnWBel8eht9PdI9BfAKjUxmfjP_ZXx9o,10484
61
62
  trilogy/core/processing/nodes/merge_node.py,sha256=02oWRca0ba41U6PSAB14jwnWWxoyrvxRPLwkli259SY,15865
@@ -69,9 +70,9 @@ trilogy/core/statements/build.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
69
70
  trilogy/core/statements/common.py,sha256=KxEmz2ySySyZ6CTPzn0fJl5NX2KOk1RPyuUSwWhnK1g,759
70
71
  trilogy/core/statements/execute.py,sha256=cSlvpHFOqpiZ89pPZ5GDp9Hu6j6uj-5_h21FWm_L-KM,1248
71
72
  trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
- trilogy/dialect/base.py,sha256=lZup3hq-DyYczpG260a0wBByHyOGkbw4-yrPJvXKOM4,42300
73
- trilogy/dialect/bigquery.py,sha256=mGnBl5A3rVi4f1gt74jnaxSOCheA07OcRi6ZD8KWOGg,3436
74
- trilogy/dialect/common.py,sha256=JQ8ONloalaWEXsTTWUhZcYyzMRaZ9HdUw7cN6QWtY5c,5295
73
+ trilogy/dialect/base.py,sha256=Y4m4RQdYI3usjeTLJKUM_SIIyuxXZfe2hboG5JSxDLU,42412
74
+ trilogy/dialect/bigquery.py,sha256=MyUumO8CeyPLh2JquoKPp6yQSZYEzUQ2mM07LCps-CA,3526
75
+ trilogy/dialect/common.py,sha256=EFf2Ye7XcwTti7IsFRwMo_4AW2CF8eaxSk8XA0mA5qw,5400
75
76
  trilogy/dialect/config.py,sha256=olnyeVU5W5T6b9-dMeNAnvxuPlyc2uefb7FRME094Ec,3834
76
77
  trilogy/dialect/dataframe.py,sha256=RUbNgReEa9g3pL6H7fP9lPTrAij5pkqedpZ99D8_5AE,1522
77
78
  trilogy/dialect/duckdb.py,sha256=IQzaRaCv5c6TUDERhbsLM4uTW0aGkO_DrAMR5k_j7TU,3861
@@ -90,9 +91,9 @@ trilogy/parsing/common.py,sha256=g1RmQF4fS_OgkcC6j4hnKIcn_ap0fFa_kzNUlH5D0nA,297
90
91
  trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
91
92
  trilogy/parsing/exceptions.py,sha256=Xwwsv2C9kSNv2q-HrrKC1f60JNHShXcCMzstTSEbiCw,154
92
93
  trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
93
- trilogy/parsing/parse_engine.py,sha256=p2YnE-YV-Dt0FlC6rP7Rq8phNxzk_O4ukNzVIDyHyu4,70054
94
+ trilogy/parsing/parse_engine.py,sha256=mQzUSWOX2QdUng2ozAkqLDfMLI_NoqPjAiauhe_mHz4,70606
94
95
  trilogy/parsing/render.py,sha256=hI4y-xjXrEXvHslY2l2TQ8ic0zAOpN41ADH37J2_FZY,19047
95
- trilogy/parsing/trilogy.lark,sha256=ijY6220e2hV21F1XFsvpYRimSrpNGIdjP7b0TVz7caI,13814
96
+ trilogy/parsing/trilogy.lark,sha256=ybs65Ckb89PCitK4hcwy6znqElcWvIeMDQzsI2p_3YI,14197
96
97
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
98
  trilogy/scripts/trilogy.py,sha256=1L0XrH4mVHRt1C9T1HnaDv2_kYEfbWTb5_-cBBke79w,3774
98
99
  trilogy/std/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -101,8 +102,8 @@ trilogy/std/display.preql,sha256=2BbhvqR4rcltyAbOXAUo7SZ_yGFYZgFnurglHMbjW2g,40
101
102
  trilogy/std/geography.preql,sha256=-fqAGnBL6tR-UtT8DbSek3iMFg66ECR_B_41pODxv-k,504
102
103
  trilogy/std/money.preql,sha256=ZHW-csTX-kYbOLmKSO-TcGGgQ-_DMrUXy0BjfuJSFxM,80
103
104
  trilogy/std/report.preql,sha256=LbV-XlHdfw0jgnQ8pV7acG95xrd1-p65fVpiIc-S7W4,202
104
- pytrilogy-0.0.3.52.dist-info/METADATA,sha256=TIB3nLBjPqlhtsB-ZgL61mCM8MPkXdJkhB-nAIuhdcA,9095
105
- pytrilogy-0.0.3.52.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
106
- pytrilogy-0.0.3.52.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
107
- pytrilogy-0.0.3.52.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
108
- pytrilogy-0.0.3.52.dist-info/RECORD,,
105
+ pytrilogy-0.0.3.54.dist-info/METADATA,sha256=_Bv_Q8dZSrH750iNkW7KtpUP1vSAwZRnBzsG8KDXdXE,9095
106
+ pytrilogy-0.0.3.54.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
107
+ pytrilogy-0.0.3.54.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
108
+ pytrilogy-0.0.3.54.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
109
+ pytrilogy-0.0.3.54.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
trilogy/__init__.py CHANGED
@@ -4,6 +4,6 @@ from trilogy.dialect.enums import Dialects
4
4
  from trilogy.executor import Executor
5
5
  from trilogy.parser import parse
6
6
 
7
- __version__ = "0.0.3.52"
7
+ __version__ = "0.0.3.54"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
trilogy/core/enums.py CHANGED
@@ -181,6 +181,11 @@ class FunctionType(Enum):
181
181
  STRPOS = "strpos"
182
182
  CONTAINS = "contains"
183
183
 
184
+ # STRING REGEX
185
+ REGEXP_CONTAINS = "regexp_contains"
186
+ REGEXP_EXTRACT = "regexp_extract"
187
+ REGEXP_REPLACE = "regexp_replace"
188
+
184
189
  # Dates
185
190
  DATE = "date"
186
191
  DATETIME = "datetime"
trilogy/core/functions.py CHANGED
@@ -360,6 +360,24 @@ FUNCTION_REGISTRY: dict[FunctionType, FunctionConfig] = {
360
360
  output_type=DataType.STRING,
361
361
  arg_count=1,
362
362
  ),
363
+ FunctionType.REGEXP_CONTAINS: FunctionConfig(
364
+ valid_inputs={DataType.STRING},
365
+ output_purpose=Purpose.PROPERTY,
366
+ output_type=DataType.BOOL,
367
+ arg_count=2,
368
+ ),
369
+ FunctionType.REGEXP_EXTRACT: FunctionConfig(
370
+ valid_inputs={DataType.STRING},
371
+ output_purpose=Purpose.PROPERTY,
372
+ output_type=DataType.STRING,
373
+ arg_count=2,
374
+ ),
375
+ FunctionType.REGEXP_REPLACE: FunctionConfig(
376
+ valid_inputs={DataType.STRING},
377
+ output_purpose=Purpose.PROPERTY,
378
+ output_type=DataType.STRING,
379
+ arg_count=3,
380
+ ),
363
381
  FunctionType.DATE: FunctionConfig(
364
382
  valid_inputs={
365
383
  DataType.DATE,
@@ -37,6 +37,7 @@ from trilogy.core.models.build import (
37
37
  LooseBuildConceptList,
38
38
  )
39
39
  from trilogy.core.models.datasource import Address
40
+ from trilogy.core.utility import safe_quote
40
41
  from trilogy.utility import unique
41
42
 
42
43
  LOGGER_PREFIX = "[MODELS_EXECUTE]"
@@ -201,6 +202,7 @@ class CTE(BaseModel):
201
202
  self.base_name_override = ds_being_inlined.safe_location
202
203
  self.base_alias_override = ds_being_inlined.safe_identifier
203
204
 
205
+ # if we have a join to the parent, we need to remove it
204
206
  for join in self.joins:
205
207
  if isinstance(join, InstantiatedUnnestJoin):
206
208
  continue
@@ -322,17 +324,20 @@ class CTE(BaseModel):
322
324
  return self.source.name
323
325
 
324
326
  @property
325
- def quote_address(self) -> dict[str, bool]:
326
-
327
- return {
328
- candidate.safe_identifier: (
329
- candidate.address.quoted and not candidate.address.is_query
330
- if isinstance(candidate, BuildDatasource)
331
- and isinstance(candidate.address, Address)
332
- else False
333
- )
334
- for candidate in self.source.datasources
335
- }
327
+ def quote_address(self) -> bool:
328
+ if self.is_root_datasource:
329
+ root = self.source.datasources[0]
330
+ if isinstance(root, BuildDatasource) and isinstance(root.address, Address):
331
+ return not root.address.is_query
332
+ return True
333
+ elif not self.source.datasources:
334
+ return False
335
+ base = self.source.datasources[0]
336
+ if isinstance(base, BuildDatasource):
337
+ if isinstance(base.address, Address):
338
+ return not base.address.is_query
339
+ return True
340
+ return True
336
341
 
337
342
  @property
338
343
  def base_alias(self) -> str:
@@ -926,8 +931,8 @@ class Join(BaseModel):
926
931
  def right_ref(self) -> str:
927
932
  if self.quote:
928
933
  if self.right_cte.identifier in self.inlined_ctes:
929
- return f"{self.quote}{self.right_cte.source.datasources[0].safe_location}{self.quote} as {self.right_cte.source.datasources[0].safe_identifier}"
930
- return self.right_cte.safe_identifier
934
+ return f"{safe_quote(self.right_cte.source.datasources[0].safe_location, self.quote)} as {self.quote}{self.right_cte.source.datasources[0].safe_identifier}{self.quote}"
935
+ return f"{self.quote}{self.right_cte.safe_identifier}{self.quote}"
931
936
  if self.right_cte.identifier in self.inlined_ctes:
932
937
  return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].safe_identifier}"
933
938
  return self.right_cte.safe_identifier
@@ -120,6 +120,13 @@ def gen_inverse_map(input: list[CTE | UnionCTE]) -> dict[str, list[CTE | UnionCT
120
120
  return inverse_map
121
121
 
122
122
 
123
+ SENSITIVE_DERIVATIONS = [
124
+ Derivation.UNNEST,
125
+ Derivation.WINDOW,
126
+ # Derivation.AGGREGATE,
127
+ ]
128
+
129
+
123
130
  def is_direct_return_eligible(cte: CTE | UnionCTE) -> CTE | UnionCTE | None:
124
131
  # if isinstance(select, (PersistStatement, MultiSelectStatement)):
125
132
  # return False
@@ -151,21 +158,31 @@ def is_direct_return_eligible(cte: CTE | UnionCTE) -> CTE | UnionCTE | None:
151
158
  ]
152
159
  condition_arguments = cte.condition.row_arguments if cte.condition else []
153
160
  for x in derived_concepts:
154
- if x.derivation == Derivation.WINDOW:
155
- return None
156
- if x.derivation == Derivation.UNNEST:
157
- return None
158
- if x.derivation == Derivation.AGGREGATE:
161
+ if x.derivation in SENSITIVE_DERIVATIONS:
159
162
  return None
160
163
  for x in parent_derived_concepts:
161
164
  if x.address not in condition_arguments:
162
165
  continue
163
- if x.derivation == Derivation.UNNEST:
164
- return None
165
- if x.derivation == Derivation.WINDOW:
166
+ if x.derivation in SENSITIVE_DERIVATIONS:
166
167
  return None
168
+ for x in condition_arguments:
169
+ # if it's derived in the parent
170
+ if x.address in parent_derived_concepts:
171
+ if x.derivation in SENSITIVE_DERIVATIONS:
172
+ return None
173
+ # this maybe needs to be recursive if we flatten a ton of derivation
174
+ # into one CTE
175
+ if not x.lineage:
176
+ continue
177
+ for z in x.lineage.concept_arguments:
178
+ # if it was preexisting in the parent, it's safe
179
+ if z.address in direct_parent.source.input_concepts:
180
+ continue
181
+ # otherwise if it's dangerous, play it safe.
182
+ if z.derivation in SENSITIVE_DERIVATIONS:
183
+ return None
167
184
  logger.info(
168
- f"[Optimization][EarlyReturn] Removing redundant output CTE with derived_concepts {[x.address for x in derived_concepts]}"
185
+ f"[Optimization][EarlyReturn] Removing redundant output CTE {cte.name} with derived_concepts {[x.address for x in derived_concepts]}"
169
186
  )
170
187
  return direct_parent
171
188
 
@@ -95,9 +95,7 @@ def gen_rowset_node(
95
95
  f"{padding(depth)}{LOGGER_PREFIX} hiding {final_hidden} local optional {local_optional}"
96
96
  )
97
97
  node.hide_output_concepts(final_hidden)
98
- assert node.resolution_cache
99
- # assume grain to be output of select
100
- # but don't include anything hidden(the non-rowset concepts)
98
+
101
99
  node.grain = BuildGrain.from_concepts(
102
100
  [
103
101
  x
@@ -11,7 +11,12 @@ from trilogy.core.models.build_environment import BuildEnvironment
11
11
  from trilogy.core.processing.node_generators.common import (
12
12
  gen_enrichment_node,
13
13
  )
14
- from trilogy.core.processing.nodes import History, StrategyNode, WindowNode
14
+ from trilogy.core.processing.nodes import (
15
+ History,
16
+ StrategyNode,
17
+ WhereSafetyNode,
18
+ WindowNode,
19
+ )
15
20
  from trilogy.core.processing.utility import create_log_lambda, padding
16
21
  from trilogy.utility import unique
17
22
 
@@ -71,10 +76,13 @@ def gen_window_node(
71
76
  if equivalent_optional:
72
77
  for x in equivalent_optional:
73
78
  assert isinstance(x.lineage, WINDOW_TYPES)
79
+ base, parents = resolve_window_parent_concepts(x, environment)
74
80
  logger.info(
75
- f"{padding(depth)}{LOGGER_PREFIX} found equivalent optional {x} with parents {resolve_window_parent_concepts(x, environment)[1]}"
81
+ f"{padding(depth)}{LOGGER_PREFIX} found equivalent optional {x} with parents {parents}"
76
82
  )
77
83
  additional_outputs.append(x)
84
+ # also append the base concept it's being grouped over
85
+ targets.append(base)
78
86
 
79
87
  grain_equivalents = [
80
88
  x
@@ -85,7 +93,8 @@ def gen_window_node(
85
93
  ]
86
94
 
87
95
  for x in grain_equivalents:
88
- logger.info("Appending grain equivalent %s", x)
96
+ if x.address in additional_outputs:
97
+ continue
89
98
  targets.append(x)
90
99
 
91
100
  # finally, the ones we'll need to enrich
@@ -134,7 +143,7 @@ def gen_window_node(
134
143
  _window_node.rebuild_cache()
135
144
  _window_node.resolve()
136
145
 
137
- window_node = StrategyNode(
146
+ window_node = WhereSafetyNode(
138
147
  input_concepts=[concept] + additional_outputs + parent_concepts + targets,
139
148
  output_concepts=[concept] + additional_outputs + parent_concepts + targets,
140
149
  environment=environment,
@@ -6,7 +6,7 @@ from trilogy.core.models.build import BuildConcept, BuildWhereClause
6
6
  from trilogy.core.models.build_environment import BuildEnvironment
7
7
  from trilogy.core.models.environment import Environment
8
8
 
9
- from .base_node import NodeJoin, StrategyNode
9
+ from .base_node import NodeJoin, StrategyNode, WhereSafetyNode
10
10
  from .filter_node import FilterNode
11
11
  from .group_node import GroupNode
12
12
  from .merge_node import MergeNode
@@ -193,4 +193,5 @@ __all__ = [
193
193
  "UnnestNode",
194
194
  "UnionNode",
195
195
  "History",
196
+ "WhereSafetyNode",
196
197
  ]
@@ -291,9 +291,14 @@ class StrategyNode:
291
291
  def add_output_concept(self, concept: BuildConcept, rebuild: bool = True):
292
292
  return self.add_output_concepts([concept], rebuild)
293
293
 
294
- def hide_output_concepts(self, concepts: List[BuildConcept], rebuild: bool = True):
294
+ def hide_output_concepts(
295
+ self, concepts: List[BuildConcept] | list[str] | set[str], rebuild: bool = True
296
+ ):
295
297
  for x in concepts:
296
- self.hidden_concepts.add(x.address)
298
+ if isinstance(x, BuildConcept):
299
+ self.hidden_concepts.add(x.address)
300
+ else:
301
+ self.hidden_concepts.add(x)
297
302
  if rebuild:
298
303
  self.rebuild_cache()
299
304
  return self
@@ -471,3 +476,28 @@ class NodeJoin:
471
476
  f" {self.right_node} on"
472
477
  f" {','.join([str(k) for k in self.concepts])}"
473
478
  )
479
+
480
+
481
+ class WhereSafetyNode(StrategyNode):
482
+ """Specialized node to be used to pad certain
483
+ select outputs that can't be immediately used in a where
484
+ clause; eg window functions. Will remove itself if not required."""
485
+
486
+ def resolve(self) -> QueryDatasource:
487
+ if not self.conditions and len(self.parents) == 1:
488
+ parent = self.parents[0]
489
+ parent = parent.copy()
490
+ # avoid performance hit by not rebuilding until end
491
+ parent.set_output_concepts(self.output_concepts, rebuild=False)
492
+ parent.hide_output_concepts(self.hidden_concepts, rebuild=False)
493
+
494
+ # these conditions
495
+ if self.preexisting_conditions:
496
+ parent.set_preexisting_conditions(self.preexisting_conditions)
497
+ # TODO: add a helper for this
498
+ parent.ordering = self.ordering
499
+
500
+ # actually build the node
501
+ parent.rebuild_cache()
502
+ return parent.resolve()
503
+ return super().resolve()
@@ -0,0 +1,8 @@
1
+ def safe_quote(string: str, quote_char: str):
2
+ # split dotted identifiers
3
+ # TODO: evaluate if we need smarter parsing for strings that could actually include .
4
+ if string.startswith("https://"):
5
+ # it's a url, no splitting
6
+ return f"{quote_char}{string}{quote_char}"
7
+ components = string.split(".")
8
+ return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
trilogy/dialect/base.py CHANGED
@@ -74,6 +74,7 @@ from trilogy.core.statements.execute import (
74
74
  ProcessedRawSQLStatement,
75
75
  ProcessedShowStatement,
76
76
  )
77
+ from trilogy.core.utility import safe_quote
77
78
  from trilogy.dialect.common import render_join, render_unnest
78
79
  from trilogy.hooks.base_hook import BaseHook
79
80
 
@@ -204,6 +205,9 @@ FUNCTION_MAP = {
204
205
  FunctionType.SUBSTRING: lambda x: f"SUBSTRING({x[0]},{x[1]},{x[2]})",
205
206
  FunctionType.STRPOS: lambda x: f"STRPOS({x[0]},{x[1]})",
206
207
  FunctionType.CONTAINS: lambda x: f"CONTAINS({x[0]},{x[1]})",
208
+ FunctionType.REGEXP_CONTAINS: lambda x: f"REGEXP_CONTAINS({x[0]},{x[1]})",
209
+ FunctionType.REGEXP_EXTRACT: lambda x: f"REGEXP_EXTRACT({x[0]},{x[1]})",
210
+ FunctionType.REGEXP_REPLACE: lambda x: f"REGEXP_REPLACE({x[0]},{x[1]}, {x[2]})",
207
211
  # FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
208
212
  # date types
209
213
  FunctionType.DATE_TRUNCATE: lambda x: f"date_trunc({x[0]},{x[1]})",
@@ -270,13 +274,6 @@ ORDER BY{% for order in order_by %}
270
274
  )
271
275
 
272
276
 
273
- def safe_quote(string: str, quote_char: str):
274
- # split dotted identifiers
275
- # TODO: evaluate if we need smarter parsing for strings that could actually include .
276
- components = string.split(".")
277
- return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
278
-
279
-
280
277
  def safe_get_cte_value(coalesce, cte: CTE | UnionCTE, c: BuildConcept, quote_char: str):
281
278
  address = c.address
282
279
  raw = cte.source_map.get(address, None)
@@ -285,12 +282,17 @@ def safe_get_cte_value(coalesce, cte: CTE | UnionCTE, c: BuildConcept, quote_cha
285
282
  return None
286
283
  if isinstance(raw, str):
287
284
  rendered = cte.get_alias(c, raw)
288
- return f"{raw}.{safe_quote(rendered, quote_char)}"
285
+ return f"{quote_char}{raw}{quote_char}.{safe_quote(rendered, quote_char)}"
289
286
  if isinstance(raw, list) and len(raw) == 1:
290
287
  rendered = cte.get_alias(c, raw[0])
291
- return f"{raw[0]}.{safe_quote(rendered, quote_char)}"
288
+ return f"{quote_char}{raw[0]}{quote_char}.{safe_quote(rendered, quote_char)}"
292
289
  return coalesce(
293
- sorted([f"{x}.{safe_quote(cte.get_alias(c, x), quote_char)}" for x in raw])
290
+ sorted(
291
+ [
292
+ f"{quote_char}{x}{quote_char}.{safe_quote(cte.get_alias(c, x), quote_char)}"
293
+ for x in raw
294
+ ]
295
+ )
294
296
  )
295
297
 
296
298
 
@@ -783,12 +785,12 @@ class BaseDialect:
783
785
  else:
784
786
  source = None
785
787
  else:
786
- if cte.quote_address.get(cte.source.datasources[0].safe_identifier, False):
787
- source = f"{self.QUOTE_CHARACTER}{cte.base_name}{self.QUOTE_CHARACTER}"
788
+ if cte.quote_address:
789
+ source = safe_quote(cte.base_name, self.QUOTE_CHARACTER)
788
790
  else:
789
791
  source = cte.base_name
790
792
  if cte.base_name != cte.base_alias:
791
- source = f"{source} as {cte.base_alias}"
793
+ source = f"{source} as {self.QUOTE_CHARACTER}{cte.base_alias}{self.QUOTE_CHARACTER}"
792
794
  if not cte.render_from_clause:
793
795
  final_joins = []
794
796
  else:
@@ -18,6 +18,7 @@ FUNCTION_MAP = {
18
18
  FunctionType.LIKE: lambda x: (
19
19
  f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
20
20
  ),
21
+ FunctionType.IS_NULL: lambda x: f"CASE WHEN {x[0]} IS NULL THEN True ELSE False END",
21
22
  FunctionType.MINUTE: lambda x: f"EXTRACT(MINUTE from {x[0]})",
22
23
  FunctionType.SECOND: lambda x: f"EXTRACT(SECOND from {x[0]})",
23
24
  FunctionType.HOUR: lambda x: f"EXTRACT(HOUR from {x[0]})",
trilogy/dialect/common.py CHANGED
@@ -63,8 +63,8 @@ def render_join_concept(
63
63
  elif isinstance(raw_content, BuildFunction):
64
64
  rval = render_expr(raw_content, cte=cte)
65
65
  return rval
66
- return f"{name}.{quote_character}{raw_content}{quote_character}"
67
- return f"{name}.{quote_character}{concept.safe_address}{quote_character}"
66
+ return f"{quote_character}{name}{quote_character}.{quote_character}{raw_content}{quote_character}"
67
+ return f"{quote_character}{name}{quote_character}.{quote_character}{concept.safe_address}{quote_character}"
68
68
 
69
69
 
70
70
  def render_join(
@@ -91,8 +91,9 @@ def render_join(
91
91
  return f"FULL JOIN {render_unnest(unnest_mode, quote_character, join.object_to_unnest, render_expr_func, cte)}"
92
92
  # left_name = join.left_name
93
93
  right_name = join.right_name
94
- if cte.quote_address.get(join.right_name, False):
95
- join.quote = quote_character
94
+ join.quote = quote_character
95
+ # if cte.quote_address.get(join.right_name, False):
96
+ # join.quote = quote_character
96
97
  right_base = join.right_ref
97
98
  base_joinkeys = []
98
99
  if join.joinkey_pairs:
@@ -1744,9 +1744,27 @@ class ParseToObjects(Transformer):
1744
1744
  return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
1745
1745
 
1746
1746
  @v_args(meta=True)
1747
- def lower(self, meta, args):
1747
+ def flower(self, meta, args):
1748
1748
  return self.function_factory.create_function(args, FunctionType.LOWER, meta)
1749
1749
 
1750
+ @v_args(meta=True)
1751
+ def fregexp_contains(self, meta, args):
1752
+ return self.function_factory.create_function(
1753
+ args, FunctionType.REGEXP_CONTAINS, meta
1754
+ )
1755
+
1756
+ @v_args(meta=True)
1757
+ def fregexp_extract(self, meta, args):
1758
+ return self.function_factory.create_function(
1759
+ args, FunctionType.REGEXP_EXTRACT, meta
1760
+ )
1761
+
1762
+ @v_args(meta=True)
1763
+ def fregexp_replace(self, meta, args):
1764
+ return self.function_factory.create_function(
1765
+ args, FunctionType.REGEXP_REPLACE, meta
1766
+ )
1767
+
1750
1768
  # date functions
1751
1769
  @v_args(meta=True)
1752
1770
  def fdate(self, meta, args):
@@ -267,7 +267,7 @@
267
267
  _UPPER.1: "upper("i
268
268
  upper: _UPPER expr ")"
269
269
  _LOWER.1: "lower("i
270
- lower: _LOWER expr ")"
270
+ flower: _LOWER expr ")"
271
271
  _SPLIT.1: "split("i
272
272
  fsplit: _SPLIT expr "," string_lit ")"
273
273
  _STRPOS.1: "strpos("i
@@ -276,8 +276,14 @@
276
276
  fcontains: _CONTAINS expr "," expr ")"
277
277
  _SUBSTRING.1: "substring("i
278
278
  fsubstring: _SUBSTRING expr "," expr "," expr ")"
279
-
280
- _string_functions: like | ilike | upper | lower | fsplit | fstrpos | fsubstring | fcontains
279
+ _REGEXP_EXTRACT.1: "regexp_extract("
280
+ fregexp_extract: _REGEXP_EXTRACT expr "," expr ")"
281
+ _REGEXP_CONTAINS.1: "regexp_contains("
282
+ fregexp_contains: _REGEXP_CONTAINS expr "," expr ")"
283
+ _REGEXP_REPLACE.1: "regexp_replace("
284
+ fregexp_replace: _REGEXP_REPLACE expr "," expr "," expr ")"
285
+
286
+ _string_functions: like | ilike | upper | flower | fsplit | fstrpos | fsubstring | fcontains | fregexp_extract | fregexp_contains | fregexp_replace
281
287
 
282
288
  // special aggregate
283
289
  _GROUP.1: "group("i
@@ -311,7 +317,8 @@
311
317
  _DATE.1: "date("i
312
318
  fdate: _DATE expr ")"
313
319
  fdatetime: "datetime"i "(" expr ")"
314
- ftimestamp: "timestamp"i "(" expr ")"
320
+ _TIMESTAMP.1: "timestamp("i
321
+ ftimestamp: _TIMESTAMP expr ")"
315
322
 
316
323
  _SECOND.1: "second("i
317
324
  fsecond: _SECOND expr ")"