pytrilogy 0.0.2.2__py3-none-any.whl → 0.0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.2.2
3
+ Version: 0.0.2.3
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -247,7 +247,7 @@ N/A, only supports default auth. In python you can pass in a custom client.
247
247
 
248
248
 
249
249
  > [!TIP]
250
- > The CLI can also be used for formatting. PreQL has a default formatting style that should always be adhered to. `trilogy fmt <path to trilogy file>`
250
+ > The CLI can also be used for formatting. Trilogy has a default formatting style that should always be adhered to. `trilogy fmt <path to trilogy file>`
251
251
 
252
252
 
253
253
  ## More Examples
@@ -284,7 +284,7 @@ but all are worth checking out. Please open PRs/comment for anything missed!
284
284
 
285
285
  #### CONCEPT
286
286
 
287
- Types: `string | int | float | bool | date | datetime | time | timestamp | interval`;
287
+ Types: `string | int | float | bool | date | datetime | time | numeric(scale, precision) | timestamp | interval`;
288
288
 
289
289
  Key:
290
290
  `key <name> <type>;`
@@ -1,4 +1,4 @@
1
- trilogy/__init__.py,sha256=mSeU_j02wb5aKd5vDcHQJdZaoG1AB96bCjUn2tIVpwk,290
1
+ trilogy/__init__.py,sha256=P6eUqLnB3qFOCWULAL337Zc9648DuD0E0pGbTqzHChw,290
2
2
  trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  trilogy/constants.py,sha256=KIvi-cgU4R9urNgDdGiCsRkCrzjAfM4xGHhZb1SVy2w,881
4
4
  trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
@@ -8,34 +8,34 @@ trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
9
9
  trilogy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  trilogy/core/constants.py,sha256=LL8NLvxb3HRnAjvofyLRXqQJijLcYiXAQYQzGarVD-g,128
11
- trilogy/core/enums.py,sha256=DWKPuShM_DbyAenjNFOAxcIH2l0QC9WgIwqZKPQqk_o,5850
12
- trilogy/core/env_processor.py,sha256=Wpy-iiduBwHntTsQTYWBO1O0i3Ij9_VoL3d4IWDEoj4,2126
11
+ trilogy/core/enums.py,sha256=zf5VvxkXB9wz-cIMwGB4d00KXOTYfBhkbIp6Ff45YVs,5880
12
+ trilogy/core/env_processor.py,sha256=l7TAB0LalxjTYJdTlcmFIkLXuyxa9lrenWLeZfa9qw0,2276
13
13
  trilogy/core/environment_helpers.py,sha256=mzBDHhdF9ssZ_-LY8CcaM_ddfJavkpRYrFImUd3cjXI,5972
14
14
  trilogy/core/ergonomics.py,sha256=w3gwXdgrxNHCuaRdyKg73t6F36tj-wIjQf47WZkHmJk,1465
15
15
  trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,561
16
- trilogy/core/functions.py,sha256=xISGHMiUR9hFyoZe2l3VLTMO6UEtyGHQcMuovzlVRqw,9492
16
+ trilogy/core/functions.py,sha256=MSNe-OLAEvj72qn5mif3ce9ncRh3ZATpX3V300yAm98,10125
17
17
  trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
18
18
  trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
19
- trilogy/core/models.py,sha256=4iwlTfHzzmbDhm9vt7zgExMqY1xKyQL-fVG5Nrm-YTU,134722
20
- trilogy/core/optimization.py,sha256=iX8BCh2BIEPJ6cYlcn7piDKFfig47azphhyqkwkYJM4,4291
21
- trilogy/core/query_processor.py,sha256=PROehLEOMOuKy1EAUeIgdfGq1LrJsX0N7Z2qDED6S30,17580
19
+ trilogy/core/models.py,sha256=eZKTOD4mXGCH78YE0xNlHex15t1Ur4d6CGAZ1QNcBCQ,136597
20
+ trilogy/core/optimization.py,sha256=A8S9C9H5RcQcFSQLYtEEBnm-r1CW_e9GEWlLK7q3MqA,4930
21
+ trilogy/core/query_processor.py,sha256=BrP4x96Nbzfq6MfxYzxnOL6zYCLIoHLEii65MBT-Daw,17731
22
22
  trilogy/core/optimizations/__init__.py,sha256=pxRzNzd2g8oRMy4f_ub5va6bNS2pd4hnyp9JBzTKc1E,300
23
23
  trilogy/core/optimizations/base_optimization.py,sha256=tWWT-xnTbnEU-mNi_isMNbywm8B9WTRsNFwGpeh3rqE,468
24
24
  trilogy/core/optimizations/inline_constant.py,sha256=kHNyc2UoaPVdYfVAPAFwnWuk4sJ_IF5faRtVcDOrBtw,1110
25
25
  trilogy/core/optimizations/inline_datasource.py,sha256=KiwZ4fnRnSOVK8zzdwMAkafszo3fNn5LY8xgb0X1CbI,3194
26
- trilogy/core/optimizations/predicate_pushdown.py,sha256=mduJn4zzKmLVNUja60ZlY6LlNp389OwjEbq2Rj8K6co,5389
26
+ trilogy/core/optimizations/predicate_pushdown.py,sha256=HWHHG0hejBoIv985hVlsh3INPTrx8EGwCRWeeiPEHsc,7374
27
27
  trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- trilogy/core/processing/concept_strategies_v3.py,sha256=hmbck3a23N61HfMwC2z3lxA6Yms9TP7Lm3Aw7GiDcMA,23765
28
+ trilogy/core/processing/concept_strategies_v3.py,sha256=Ssekd9sA7W-JgAif-Bp7ID1pZujaKJzlcP_KuoH2h4I,23938
29
29
  trilogy/core/processing/graph_utils.py,sha256=aq-kqk4Iado2HywDxWEejWc-7PGO6Oa-ZQLAM6XWPHw,1199
30
- trilogy/core/processing/utility.py,sha256=hgfPfz5FlghN8edIVGTQ21eDUof5EMvnl3Vu64hjfqY,13289
30
+ trilogy/core/processing/utility.py,sha256=L92nMZQqVSb_aOtYGi2rkvDD10e8WWg5-oQ7ApJBlI4,12374
31
31
  trilogy/core/processing/node_generators/__init__.py,sha256=-mzYkRsaRNa_dfTckYkKVFSR8h8a3ihEiPJDU_tAmDo,672
32
- trilogy/core/processing/node_generators/basic_node.py,sha256=uD6noJOgrtmhjb-1aa-vD8sQ97s9Tya6wbso7V_AdyE,2819
32
+ trilogy/core/processing/node_generators/basic_node.py,sha256=4242PNGTCm2tklqMIkqVu5Iv4m_IeTnOYXxDveuCDZM,2856
33
33
  trilogy/core/processing/node_generators/common.py,sha256=liZDth7mvhkF_sUFXK7JitJsiaKD132w3ySLbF7l-nE,8956
34
34
  trilogy/core/processing/node_generators/filter_node.py,sha256=5B7UCK84A9lGgJ7EjCiC7YGWWM1xPyQGHkBwZgYypFM,4585
35
35
  trilogy/core/processing/node_generators/group_node.py,sha256=xWI1xNIXEOj6jlRGD9hcv2_vVNvY6lpzJl6pQ8HuFBE,2988
36
36
  trilogy/core/processing/node_generators/group_to_node.py,sha256=BzPdYwzoo8gRMH7BDffTTXq4z-mjfCEzvfB5I-P0_nw,2941
37
37
  trilogy/core/processing/node_generators/multiselect_node.py,sha256=vP84dnLQy6dtypi6mUbt9sMAcmmrTgQ1Oz4GI6X1IEo,6421
38
- trilogy/core/processing/node_generators/node_merge_node.py,sha256=YivTVrTEUJuQbQAHFVB2lUD4mJYQ-kTVVpwY4oT9A-Y,14664
38
+ trilogy/core/processing/node_generators/node_merge_node.py,sha256=JDQEIs43BJCgz0yofztcjd7J20Ybk4W40IJG_6blQJI,12168
39
39
  trilogy/core/processing/node_generators/rowset_node.py,sha256=2ROd2c1_o4h5sbnDdaiL1o9UiiwyYw-B6OKyfHYNe0A,6188
40
40
  trilogy/core/processing/node_generators/select_node.py,sha256=XGcz74XsWv5POWhV2gGC_jbi7T9g7sY-VtP3sjlppFc,19057
41
41
  trilogy/core/processing/node_generators/unnest_node.py,sha256=6CH66eGwpadNX7TzUhWZ8aqIisOtQeHINbLV6X3QBUk,1779
@@ -49,14 +49,14 @@ trilogy/core/processing/nodes/select_node_v2.py,sha256=COu-WPuyabGAc3HTkJB-_7eNz
49
49
  trilogy/core/processing/nodes/unnest_node.py,sha256=JFtm90IVM-46aCYkTNIaJah6v9ApAfonjVhcVM1HmDE,1903
50
50
  trilogy/core/processing/nodes/window_node.py,sha256=X7qxLUKd3tekjUUsmH_4vz5b-U89gMnGd04VBxuu2Ns,1280
51
51
  trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- trilogy/dialect/base.py,sha256=EZCP3xU8-B4xpU1pFFew4CGtkNWignbyPll6OTENFLg,29832
52
+ trilogy/dialect/base.py,sha256=bjoDwVAARpCVLjJvnfNpr1JtvoamkAAYZxw-G5QRV_A,27845
53
53
  trilogy/dialect/bigquery.py,sha256=BAN2o0SOuLvzLYLV7Sc11woapS5L7Cc36aMBeaphW5k,2905
54
54
  trilogy/dialect/common.py,sha256=5jdOHWIj3Xv8F8y5mnyWHLjxD_we2fncM-ZnNetJP7U,2781
55
55
  trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
56
56
  trilogy/dialect/duckdb.py,sha256=JQPnR5F39iDQXfTccKhbq7xWYGzYc0dPpn5vo0Qd_Vk,3076
57
57
  trilogy/dialect/enums.py,sha256=4NdpsydBpDn6jnh0JzFz5VvQEtnShErWtWHVyT6TNpw,3948
58
58
  trilogy/dialect/postgres.py,sha256=r47xbCA7nfEYENofiVfLZ-SnReNfDmUmW4OSHVkkP4E,3206
59
- trilogy/dialect/presto.py,sha256=ouSVNbs7e3eEc20emLfxKyRjdINSibPGUFL4X9OwboA,3163
59
+ trilogy/dialect/presto.py,sha256=evb7Tq77l4iBrZnBeSPfEdmdObgrca-p5H72CIrEips,3221
60
60
  trilogy/dialect/snowflake.py,sha256=N3HknYgN-fjD7BLX1Ucj-ss_ku2Ox8DgLsF3BIHutHo,2941
61
61
  trilogy/dialect/sql_server.py,sha256=HX68vNTrcDaTnOxe6Zbx_PBgrO42e2VuThxO6CYQ2cY,3026
62
62
  trilogy/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -65,18 +65,18 @@ trilogy/hooks/graph_hook.py,sha256=onHvMQPwj_KOS3HOTpRFiy7QLLKAiycq2MzJ_Q0Oh5Y,2
65
65
  trilogy/hooks/query_debugger.py,sha256=NDChfkPmmW-KINa4TaQmDe_adGiwsKFdGLDSYpbodeU,4282
66
66
  trilogy/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
67
  trilogy/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- trilogy/parsing/common.py,sha256=8MDRPccWymtVkHVQMzESjL5tOH79flWU5jo6Ys-C5UQ,5963
68
+ trilogy/parsing/common.py,sha256=UZdG51pHNMGokvHjUxv7Zt5Xrh2snxfZuVOZVyg9yZ8,6091
69
69
  trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
70
70
  trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
71
71
  trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
72
- trilogy/parsing/parse_engine.py,sha256=rjTDs8AhJ6NjPCugfZCTdu7d_bUOSC9M8MAl9KmURNw,57763
72
+ trilogy/parsing/parse_engine.py,sha256=-dXf-W6cOj-l2hDvgb7TWJKLZZEHAj0kwaw-DT0YKNU,58336
73
73
  trilogy/parsing/render.py,sha256=Gy_6wVYPwYLf35Iota08sbqveuWILtUhI8MYStcvtJM,12174
74
- trilogy/parsing/trilogy.lark,sha256=Zn5UHqiNoVKa76r1af9KND0TNdsLIeHpyG3hJjUOOTQ,11000
74
+ trilogy/parsing/trilogy.lark,sha256=DiJe_XQZ9lXVUW5AF9eZl531lPAw7mm9MMeHCGkw7kc,11148
75
75
  trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
77
- pytrilogy-0.0.2.2.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
78
- pytrilogy-0.0.2.2.dist-info/METADATA,sha256=rjwj8b_CWxQc82n5HJfPTBYlhtVKNUwIhAkWLusfA7g,7876
79
- pytrilogy-0.0.2.2.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
80
- pytrilogy-0.0.2.2.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
81
- pytrilogy-0.0.2.2.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
82
- pytrilogy-0.0.2.2.dist-info/RECORD,,
77
+ pytrilogy-0.0.2.3.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
78
+ pytrilogy-0.0.2.3.dist-info/METADATA,sha256=9Q2murSlwgsOWkr6A3FkEWWtlPKoI-VTmkONLbcnNPU,7906
79
+ pytrilogy-0.0.2.3.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
80
+ pytrilogy-0.0.2.3.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
81
+ pytrilogy-0.0.2.3.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
82
+ pytrilogy-0.0.2.3.dist-info/RECORD,,
trilogy/__init__.py CHANGED
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.2.2"
7
+ __version__ = "0.0.2.3"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
trilogy/core/enums.py CHANGED
@@ -119,6 +119,7 @@ class FunctionType(Enum):
119
119
 
120
120
  # COMPLEX
121
121
  INDEX_ACCESS = "index_access"
122
+ MAP_ACCESS = "map_access"
122
123
  ATTR_ACCESS = "attr_access"
123
124
 
124
125
  # TEXT AND MAYBE MORE
@@ -17,6 +17,11 @@ def add_concept(concept: Concept, g: ReferenceGraph):
17
17
  for _, pseudonym in concept.pseudonyms.items():
18
18
  pseudonym = pseudonym.with_default_grain()
19
19
  pseudonym_node = concept_to_node(pseudonym)
20
+ if (pseudonym_node, node_name) in g.edges and (
21
+ node_name,
22
+ pseudonym_node,
23
+ ) in g.edges:
24
+ continue
20
25
  if pseudonym_node.split("@")[0] == node_name.split("@")[0]:
21
26
  continue
22
27
  g.add_edge(pseudonym_node, node_name, pseudonym=True)
trilogy/core/functions.py CHANGED
@@ -202,15 +202,21 @@ def Split(args: list[Concept]) -> Function:
202
202
  )
203
203
 
204
204
 
205
+ def get_index_output_type(
206
+ arg: Concept,
207
+ ) -> DataType | StructType | MapType | ListType | NumericType:
208
+ if isinstance(arg.datatype, ListType):
209
+ return arg.datatype.value_data_type
210
+ elif isinstance(arg.datatype, MapType):
211
+ return arg.datatype.value_data_type
212
+ return arg.datatype
213
+
214
+
205
215
  def IndexAccess(args: list[Concept]):
206
216
  return Function(
207
217
  operator=FunctionType.INDEX_ACCESS,
208
218
  arguments=args,
209
- output_datatype=(
210
- args[0].datatype.value_data_type
211
- if isinstance(args[0].datatype, ListType)
212
- else args[0].datatype
213
- ),
219
+ output_datatype=get_index_output_type(args[0]),
214
220
  output_purpose=Purpose.PROPERTY,
215
221
  valid_inputs=[
216
222
  {
@@ -226,6 +232,25 @@ def IndexAccess(args: list[Concept]):
226
232
  )
227
233
 
228
234
 
235
+ def MapAccess(args: list[Concept]):
236
+ return Function(
237
+ operator=FunctionType.MAP_ACCESS,
238
+ arguments=args,
239
+ output_datatype=get_index_output_type(args[0]),
240
+ output_purpose=Purpose.PROPERTY,
241
+ valid_inputs=[
242
+ {
243
+ DataType.MAP,
244
+ },
245
+ {
246
+ DataType.INTEGER,
247
+ DataType.STRING,
248
+ },
249
+ ],
250
+ arg_count=2,
251
+ )
252
+
253
+
229
254
  def AttrAccess(args: list[Concept]):
230
255
  return Function(
231
256
  operator=FunctionType.ATTR_ACCESS,
trilogy/core/models.py CHANGED
@@ -67,7 +67,7 @@ from trilogy.core.enums import (
67
67
  )
68
68
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
69
69
  from trilogy.utility import unique
70
- from collections import UserList
70
+ from collections import UserList, UserDict
71
71
  from functools import cached_property
72
72
  from abc import ABC
73
73
 
@@ -267,7 +267,7 @@ class ListType(BaseModel):
267
267
 
268
268
  class MapType(BaseModel):
269
269
  key_type: DataType
270
- content_type: ALL_TYPES
270
+ value_type: ALL_TYPES
271
271
 
272
272
  @property
273
273
  def data_type(self):
@@ -277,6 +277,22 @@ class MapType(BaseModel):
277
277
  def value(self):
278
278
  return self.data_type.value
279
279
 
280
+ @property
281
+ def value_data_type(
282
+ self,
283
+ ) -> DataType | StructType | MapType | ListType | NumericType:
284
+ if isinstance(self.value_type, Concept):
285
+ return self.value_type.datatype
286
+ return self.value_type
287
+
288
+ @property
289
+ def key_data_type(
290
+ self,
291
+ ) -> DataType | StructType | MapType | ListType | NumericType:
292
+ if isinstance(self.key_type, Concept):
293
+ return self.key_type.datatype
294
+ return self.key_type
295
+
280
296
 
281
297
  class StructType(BaseModel):
282
298
  fields: List[ALL_TYPES]
@@ -314,6 +330,34 @@ class ListWrapper(Generic[VT], UserList):
314
330
  return cls(v, type=arg_to_datatype(v[0]))
315
331
 
316
332
 
333
+ class MapWrapper(Generic[KT, VT], UserDict):
334
+ """Used to distinguish parsed map objects from other dicts"""
335
+
336
+ def __init__(self, *args, key_type: DataType, value_type: DataType, **kwargs):
337
+ super().__init__(*args, **kwargs)
338
+ self.key_type = key_type
339
+ self.value_type = value_type
340
+
341
+ @classmethod
342
+ def __get_pydantic_core_schema__(
343
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
344
+ ) -> core_schema.CoreSchema:
345
+ args = get_args(source_type)
346
+ if args:
347
+ schema = handler(Dict[args]) # type: ignore
348
+ else:
349
+ schema = handler(Dict)
350
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
351
+
352
+ @classmethod
353
+ def validate(cls, v):
354
+ return cls(
355
+ v,
356
+ key_type=arg_to_datatype(list(v.keys()).pop()),
357
+ value_type=arg_to_datatype(list(v.values()).pop()),
358
+ )
359
+
360
+
317
361
  class Metadata(BaseModel):
318
362
  """Metadata container object.
319
363
  TODO: support arbitrary tags"""
@@ -949,8 +993,8 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
949
993
  output_purpose: Purpose
950
994
  valid_inputs: Optional[
951
995
  Union[
952
- Set[DataType | ListType | StructType | NumericType],
953
- List[Set[DataType | ListType | StructType] | NumericType],
996
+ Set[DataType | ListType | StructType | MapType | NumericType],
997
+ List[Set[DataType | ListType | StructType | MapType | NumericType]],
954
998
  ]
955
999
  ] = None
956
1000
  arguments: Sequence[
@@ -961,17 +1005,17 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
961
1005
  int,
962
1006
  float,
963
1007
  str,
1008
+ MapWrapper[Any, Any],
964
1009
  DataType,
965
1010
  ListType,
1011
+ MapType,
966
1012
  NumericType,
967
1013
  DatePart,
968
1014
  "Parenthetical",
969
1015
  CaseWhen,
970
1016
  "CaseElse",
971
1017
  list,
972
- ListWrapper[int],
973
- ListWrapper[str],
974
- ListWrapper[float],
1018
+ ListWrapper[Any],
975
1019
  ]
976
1020
  ]
977
1021
 
@@ -2342,7 +2386,6 @@ class CTE(BaseModel):
2342
2386
  hidden_concepts: List[Concept] = Field(default_factory=list)
2343
2387
  order_by: Optional[OrderBy] = None
2344
2388
  limit: Optional[int] = None
2345
- requires_nesting: bool = True
2346
2389
  base_name_override: Optional[str] = None
2347
2390
  base_alias_override: Optional[str] = None
2348
2391
 
@@ -3141,6 +3184,7 @@ class Environment(BaseModel):
3141
3184
  v.pseudonyms[source.address] = source
3142
3185
  if v.address == source.address:
3143
3186
  replacements[k] = target
3187
+ v.pseudonyms[target.address] = target
3144
3188
  self.concepts.update(replacements)
3145
3189
 
3146
3190
  for k, ds in self.datasources.items():
@@ -4115,6 +4159,15 @@ def list_to_wrapper(args):
4115
4159
  return ListWrapper(args, type=types[0])
4116
4160
 
4117
4161
 
4162
+ def dict_to_map_wrapper(arg):
4163
+ key_types = [arg_to_datatype(arg) for arg in arg.keys()]
4164
+
4165
+ value_types = [arg_to_datatype(arg) for arg in arg.values()]
4166
+ assert len(set(key_types)) == 1
4167
+ assert len(set(key_types)) == 1
4168
+ return MapWrapper(arg, key_type=key_types[0], value_type=value_types[0])
4169
+
4170
+
4118
4171
  def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType | NumericType:
4119
4172
  if isinstance(arg, Function):
4120
4173
  return arg.output_datatype
@@ -4143,5 +4196,7 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType | Numeric
4143
4196
  elif isinstance(arg, list):
4144
4197
  wrapper = list_to_wrapper(arg)
4145
4198
  return ListType(type=wrapper.type)
4199
+ elif isinstance(arg, MapWrapper):
4200
+ return MapType(key_type=arg.key_type, value_type=arg.value_type)
4146
4201
  else:
4147
4202
  raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
@@ -1,10 +1,10 @@
1
1
  from trilogy.core.models import (
2
2
  CTE,
3
3
  SelectStatement,
4
- PersistStatement,
5
4
  MultiSelectStatement,
5
+ Conditional,
6
6
  )
7
- from trilogy.core.enums import PurposeLineage
7
+ from trilogy.core.enums import PurposeLineage, BooleanOperator
8
8
  from trilogy.constants import logger, CONFIG
9
9
  from trilogy.core.optimizations import (
10
10
  OptimizationRule,
@@ -42,34 +42,45 @@ def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
42
42
  return inverse_map
43
43
 
44
44
 
45
- def is_direct_return_eligible(
46
- cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
47
- ) -> bool:
48
- if isinstance(select, (PersistStatement, MultiSelectStatement)):
49
- return False
45
+ def is_direct_return_eligible(cte: CTE) -> CTE | None:
46
+ # if isinstance(select, (PersistStatement, MultiSelectStatement)):
47
+ # return False
48
+ if len(cte.parent_ctes) != 1:
49
+ return None
50
+ direct_parent = cte.parent_ctes[0]
51
+
52
+ output_addresses = set([x.address for x in cte.output_columns])
53
+ parent_output_addresses = set([x.address for x in direct_parent.output_columns])
54
+ if not output_addresses.issubset(parent_output_addresses):
55
+ return None
56
+ if not direct_parent.grain == cte.grain:
57
+ return None
50
58
  derived_concepts = [
51
59
  c
52
60
  for c in cte.source.output_concepts + cte.source.hidden_concepts
53
61
  if c not in cte.source.input_concepts
54
62
  ]
55
- eligible = True
56
63
  conditions = (
57
- set(x.address for x in select.where_clause.concept_arguments)
58
- if select.where_clause
64
+ set(x.address for x in direct_parent.condition.concept_arguments)
65
+ if direct_parent.condition
59
66
  else set()
60
67
  )
61
68
  for x in derived_concepts:
62
69
  if x.derivation == PurposeLineage.WINDOW:
63
- return False
70
+ return None
64
71
  if x.derivation == PurposeLineage.UNNEST:
65
- return False
72
+ return None
66
73
  if x.derivation == PurposeLineage.AGGREGATE:
67
74
  if x.address in conditions:
68
- return False
75
+ return None
76
+ # handling top level nodes that require unpacking
77
+ for x in cte.output_columns:
78
+ if x.derivation == PurposeLineage.UNNEST:
79
+ return None
69
80
  logger.info(
70
- f"[Optimization][EarlyReturn] Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
81
+ f"[Optimization][EarlyReturn] Removing redundant output CTE with derived_concepts {[x.address for x in derived_concepts]}"
71
82
  )
72
- return eligible
83
+ return direct_parent
73
84
 
74
85
 
75
86
  def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
@@ -90,23 +101,27 @@ def optimize_ctes(
90
101
  input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
91
102
  ) -> list[CTE]:
92
103
 
93
- if CONFIG.optimizations.direct_return and is_direct_return_eligible(
94
- root_cte, select
104
+ direct_parent: CTE | None = root_cte
105
+ while CONFIG.optimizations.direct_return and (
106
+ direct_parent := is_direct_return_eligible(root_cte)
95
107
  ):
96
- root_cte.order_by = select.order_by
97
- root_cte.limit = select.limit
98
- # if select.where_clause:
99
-
100
- # if root_cte.condition:
101
- # root_cte.condition = Conditional(
102
- # left=root_cte.condition,
103
- # operator=BooleanOperator.AND,
104
- # right=select.where_clause.conditional,
105
- # )
106
- # else:
107
- # root_cte.condition = select.where_clause.conditional
108
- root_cte.requires_nesting = False
109
- sort_select_output(root_cte, select)
108
+ direct_parent.order_by = root_cte.order_by
109
+ direct_parent.limit = root_cte.limit
110
+ direct_parent.hidden_concepts = (
111
+ root_cte.hidden_concepts + direct_parent.hidden_concepts
112
+ )
113
+ if root_cte.condition:
114
+ if direct_parent.condition:
115
+ direct_parent.condition = Conditional(
116
+ left=direct_parent.condition,
117
+ operator=BooleanOperator.AND,
118
+ right=root_cte.condition,
119
+ )
120
+ else:
121
+ direct_parent.condition = root_cte.condition
122
+ root_cte = direct_parent
123
+
124
+ sort_select_output(root_cte, select)
110
125
 
111
126
  REGISTERED_RULES: list["OptimizationRule"] = []
112
127
  if CONFIG.optimizations.constant_inlining:
@@ -3,18 +3,43 @@ from trilogy.core.models import (
3
3
  Conditional,
4
4
  BooleanOperator,
5
5
  Datasource,
6
+ SubselectComparison,
7
+ Comparison,
8
+ Parenthetical,
9
+ Function,
10
+ FilterItem,
11
+ MagicConstants,
12
+ Concept,
13
+ WindowItem,
14
+ AggregateWrapper,
15
+ DataType,
6
16
  )
7
17
  from trilogy.core.optimizations.base_optimization import OptimizationRule
18
+ from trilogy.core.enums import FunctionClass
8
19
 
9
20
 
10
- def decompose_condition(conditional: Conditional):
11
- chunks = []
21
+ def decompose_condition(
22
+ conditional: Conditional,
23
+ ) -> list[SubselectComparison | Comparison | Conditional | Parenthetical]:
24
+ chunks: list[SubselectComparison | Comparison | Conditional | Parenthetical] = []
12
25
  if conditional.operator == BooleanOperator.AND:
13
- for val in [conditional.left, conditional.right]:
14
- if isinstance(val, Conditional):
15
- chunks.extend(decompose_condition(val))
16
- else:
17
- chunks.append(val)
26
+ if not (
27
+ isinstance(
28
+ conditional.left,
29
+ (SubselectComparison, Comparison, Conditional, Parenthetical),
30
+ )
31
+ and isinstance(
32
+ conditional.right,
33
+ (SubselectComparison, Comparison, Conditional, Parenthetical),
34
+ )
35
+ ):
36
+ chunks.append(conditional)
37
+ else:
38
+ for val in [conditional.left, conditional.right]:
39
+ if isinstance(val, Conditional):
40
+ chunks.extend(decompose_condition(val))
41
+ else:
42
+ chunks.append(val)
18
43
  else:
19
44
  chunks.append(conditional)
20
45
  return chunks
@@ -31,6 +56,40 @@ def is_child_of(a, comparison):
31
56
  return base
32
57
 
33
58
 
59
+ def is_basic(
60
+ element: (
61
+ int
62
+ | str
63
+ | float
64
+ | list
65
+ | WindowItem
66
+ | FilterItem
67
+ | Concept
68
+ | Comparison
69
+ | Conditional
70
+ | Parenthetical
71
+ | Function
72
+ | AggregateWrapper
73
+ | MagicConstants
74
+ | DataType
75
+ ),
76
+ ) -> bool:
77
+ if isinstance(element, Parenthetical):
78
+ return is_basic(element.content)
79
+ elif isinstance(element, SubselectComparison):
80
+ return True
81
+ elif isinstance(element, Comparison):
82
+ return is_basic(element.left) and is_basic(element.right)
83
+ elif isinstance(element, Function):
84
+ if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
85
+ return False
86
+ elif isinstance(element, AggregateWrapper):
87
+ return is_basic(element.function)
88
+ elif isinstance(element, Conditional):
89
+ return is_basic(element.left) and is_basic(element.right)
90
+ return True
91
+
92
+
34
93
  class PredicatePushdown(OptimizationRule):
35
94
 
36
95
  def __init__(self, *args, **kwargs) -> None:
@@ -128,7 +187,14 @@ class PredicatePushdown(OptimizationRule):
128
187
  )
129
188
  optimized = False
130
189
  for candidate in candidates:
131
- self.debug(f"Checking candidate {candidate}")
190
+ if not is_basic(candidate):
191
+ self.debug(
192
+ f"Skipping {candidate} as not a basic [no aggregate, etc] condition"
193
+ )
194
+ continue
195
+ self.log(
196
+ f"Checking candidate {candidate}, {type(candidate)}, {is_basic(candidate)}"
197
+ )
132
198
  for parent_cte in cte.parent_ctes:
133
199
  local_pushdown = self._check_parent(
134
200
  parent_cte=parent_cte, candidate=candidate, inverse_map=inverse_map
@@ -336,8 +336,11 @@ def validate_concept(
336
336
  virtual_addresses: set[str],
337
337
  found_map: dict[str, set[Concept]],
338
338
  accept_partial: bool,
339
+ seen: set[str],
339
340
  ):
341
+
340
342
  found_map[str(node)].add(concept)
343
+ seen.add(concept.address)
341
344
  if concept not in node.partial_concepts:
342
345
 
343
346
  found_addresses.add(concept.address)
@@ -357,6 +360,8 @@ def validate_concept(
357
360
  for _, v in concept.pseudonyms.items():
358
361
  if v.address == concept.address:
359
362
  return
363
+ if v.address in seen:
364
+ return
360
365
  validate_concept(
361
366
  v,
362
367
  node,
@@ -366,6 +371,7 @@ def validate_concept(
366
371
  virtual_addresses,
367
372
  found_map,
368
373
  accept_partial,
374
+ seen=seen,
369
375
  )
370
376
 
371
377
 
@@ -379,8 +385,10 @@ def validate_stack(
379
385
  non_partial_addresses: set[str] = set()
380
386
  partial_addresses: set[str] = set()
381
387
  virtual_addresses: set[str] = set()
388
+ seen: set[str] = set()
382
389
  for node in stack:
383
390
  resolved = node.resolve()
391
+
384
392
  for concept in resolved.output_concepts:
385
393
  validate_concept(
386
394
  concept,
@@ -391,6 +399,7 @@ def validate_stack(
391
399
  virtual_addresses,
392
400
  found_map,
393
401
  accept_partial,
402
+ seen,
394
403
  )
395
404
  for concept in node.virtual_output_concepts:
396
405
  if concept.address in non_partial_addresses:
@@ -44,7 +44,7 @@ def gen_basic_node(
44
44
  list(optional_set) + [concept],
45
45
  )
46
46
  )
47
-
47
+ # check for the concept by itself
48
48
  for attempt, basic_output in reversed(attempts):
49
49
  partials = []
50
50
  attempt = unique(attempt, "address")
@@ -4,7 +4,6 @@ from trilogy.core.models import Concept, Environment, Conditional
4
4
  from trilogy.core.processing.nodes import MergeNode, History, StrategyNode
5
5
  import networkx as nx
6
6
  from trilogy.core.graph_models import concept_to_node
7
- from trilogy.core.processing.utility import PathInfo
8
7
  from trilogy.constants import logger
9
8
  from trilogy.utility import unique
10
9
  from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
@@ -63,7 +62,9 @@ def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]
63
62
  if not str(x).startswith("ds~")
64
63
  ]
65
64
  )
66
-
65
+ # if we had no ego graphs, return all concepts
66
+ if not graphs:
67
+ return [[extract_address(node) for node in nodelist]]
67
68
  graphs = filter_unique_graphs(graphs)
68
69
  for node in nodelist:
69
70
  parsed = extract_address(node)
@@ -82,6 +83,7 @@ def determine_induced_minimal_nodes(
82
83
  H: nx.Graph = nx.to_undirected(G).copy()
83
84
  nodes_to_remove = []
84
85
  concepts = nx.get_node_attributes(G, "concept")
86
+
85
87
  for node in G.nodes:
86
88
  if concepts.get(node):
87
89
  lookup = concepts[node]
@@ -107,9 +109,11 @@ def determine_induced_minimal_nodes(
107
109
  paths = nx.multi_source_dijkstra_path(H, nodelist)
108
110
  except nx.exception.NodeNotFound:
109
111
  return None
112
+
110
113
  H.remove_nodes_from(list(x for x in H.nodes if x not in paths))
111
114
  sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist).copy()
112
115
  final: nx.DiGraph = nx.subgraph(G, sG.nodes).copy()
116
+
113
117
  for edge in G.edges:
114
118
  if edge[1] in final.nodes and edge[0].startswith("ds~"):
115
119
  ds_name = extract_address(edge[0])
@@ -125,6 +129,7 @@ def determine_induced_minimal_nodes(
125
129
  [final.in_degree(node) > 0 for node in final.nodes if node.startswith("c~")]
126
130
  ):
127
131
  return None
132
+
128
133
  if not all([node in final.nodes for node in nodelist]):
129
134
  return None
130
135
  return final
@@ -308,111 +313,44 @@ def gen_merge_node(
308
313
  history: History | None = None,
309
314
  conditions: Conditional | None = None,
310
315
  ) -> Optional[MergeNode]:
311
- join_candidates: List[PathInfo] = []
312
-
313
- # inject new concepts into search, and identify if two dses can reach there
314
- if not join_candidates:
315
- for filter_downstream in [True, False]:
316
- weak_resolve = resolve_weak_components(
317
- all_concepts,
318
- environment,
319
- g,
320
- filter_downstream=filter_downstream,
321
- accept_partial=accept_partial,
322
- )
323
- if weak_resolve:
324
- log_graph = [[y.address for y in x] for x in weak_resolve]
325
- logger.info(
326
- f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
327
- )
328
- return subgraphs_to_merge_node(
329
- weak_resolve,
330
- depth=depth,
331
- all_concepts=all_concepts,
332
- environment=environment,
333
- g=g,
334
- source_concepts=source_concepts,
335
- history=history,
336
- conditions=conditions,
337
- )
338
- if not join_candidates:
339
- return None
340
- join_additions: list[set[str]] = []
341
- for candidate in join_candidates:
342
- join_additions.append(candidate.reduced_concepts)
343
-
344
- common: set[str] = set()
345
- final_candidates: list[set[str]] = []
346
- # find all values that show up in every join_additions
347
- for ja in join_additions:
348
- if not common:
349
- common = ja
350
- else:
351
- common = common.intersection(ja)
352
- if all(ja.issubset(y) for y in join_additions):
353
- final_candidates.append(ja)
354
316
 
355
- if not final_candidates:
356
- filtered_paths = [x.difference(common) for x in join_additions]
357
- raise AmbiguousRelationshipResolutionException(
358
- f"Ambiguous concept join resolution fetching {[x.address for x in all_concepts]} - unique values in possible paths = {filtered_paths}. Include an additional concept to disambiguate",
359
- join_additions,
360
- )
361
- if not join_candidates:
362
- logger.info(
363
- f"{padding(depth)}{LOGGER_PREFIX} No additional join candidates could be found"
317
+ for filter_downstream in [True, False]:
318
+ weak_resolve = resolve_weak_components(
319
+ all_concepts,
320
+ environment,
321
+ g,
322
+ filter_downstream=filter_downstream,
323
+ accept_partial=accept_partial,
364
324
  )
365
- return None
366
- shortest: PathInfo = sorted(
367
- [x for x in join_candidates if x.reduced_concepts in final_candidates],
368
- key=lambda x: len(x.reduced_concepts),
369
- )[0]
370
- logger.info(f"{padding(depth)}{LOGGER_PREFIX} final path is {shortest.paths}")
371
-
372
- return subgraphs_to_merge_node(
373
- shortest.concept_subgraphs,
374
- depth=depth,
375
- all_concepts=all_concepts,
376
- environment=environment,
377
- g=g,
378
- source_concepts=source_concepts,
379
- history=history,
380
- conditions=conditions,
381
- )
382
- # parents = []
383
- # for graph in shortest.concept_subgraphs:
384
- # logger.info(
385
- # f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
386
- # )
387
- # parent = source_concepts(
388
- # mandatory_list=graph,
389
- # environment=environment,
390
- # g=g,
391
- # depth=depth + 1,
392
- # history=history,
393
- # )
394
- # if not parent:
395
- # logger.info(
396
- # f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
397
- # )
398
- # return None
399
- # logger.info(
400
- # f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)}"
401
- # )
402
- # parents.append(parent)
403
-
404
- # return MergeNode(
405
- # input_concepts=[
406
- # environment.concepts[x]
407
- # for x in shortest.reduced_concepts
408
- # if environment.concepts[x].derivation != PurposeLineage.MERGE
409
- # ],
410
- # output_concepts=[
411
- # x for x in all_concepts if x.derivation != PurposeLineage.MERGE
412
- # ],
413
- # environment=environment,
414
- # g=g,
415
- # parents=parents,
416
- # depth=depth,
417
- # conditions=conditions,
418
- # )
325
+ if weak_resolve:
326
+ log_graph = [[y.address for y in x] for x in weak_resolve]
327
+ logger.info(
328
+ f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
329
+ )
330
+ return subgraphs_to_merge_node(
331
+ weak_resolve,
332
+ depth=depth,
333
+ all_concepts=all_concepts,
334
+ environment=environment,
335
+ g=g,
336
+ source_concepts=source_concepts,
337
+ history=history,
338
+ conditions=conditions,
339
+ )
340
+ # one concept handling may need to be kicked to alias
341
+ if len(all_concepts) == 1:
342
+ concept = all_concepts[0]
343
+ for k, v in concept.pseudonyms.items():
344
+ test = subgraphs_to_merge_node(
345
+ [[concept, v]],
346
+ g=g,
347
+ all_concepts=[concept],
348
+ environment=environment,
349
+ depth=depth,
350
+ source_concepts=source_concepts,
351
+ history=history,
352
+ conditions=conditions,
353
+ )
354
+ if test:
355
+ return test
356
+ return None
@@ -124,15 +124,20 @@ def resolve_join_order(joins: List[BaseJoin]) -> List[BaseJoin]:
124
124
  return final_joins
125
125
 
126
126
 
127
- def add_node_join_concept(graph, concept, datasource, concepts):
128
- # we don't need to join on a concept if all of the keys exist in the grain
129
- # if concept.keys and all([x in grain for x in concept.keys]):
130
- # continue
127
+ def add_node_join_concept(
128
+ graph: nx.DiGraph,
129
+ concept: Concept,
130
+ datasource: Datasource | QueryDatasource,
131
+ concepts: List[Concept],
132
+ ):
133
+
131
134
  concepts.append(concept)
132
135
 
133
136
  graph.add_node(concept.address, type=NodeType.CONCEPT)
134
137
  graph.add_edge(datasource.identifier, concept.address)
135
- for k, v in concept.pseudonyms.items():
138
+ for _, v in concept.pseudonyms.items():
139
+ if v in concepts:
140
+ continue
136
141
  if v.address != concept.address:
137
142
  add_node_join_concept(graph, v, datasource, concepts)
138
143
 
@@ -149,13 +154,6 @@ def get_node_joins(
149
154
  graph.add_node(datasource.identifier, type=NodeType.NODE)
150
155
  for concept in datasource.output_concepts:
151
156
  add_node_join_concept(graph, concept, datasource, concepts)
152
- # we don't need to join on a concept if all of the keys exist in the grain
153
- # if concept.keys and all([x in grain for x in concept.keys]):
154
- # continue
155
- # concepts.append(concept)
156
-
157
- # graph.add_node(concept.address, type=NodeType.CONCEPT)
158
- # graph.add_edge(datasource.identifier, concept.address)
159
157
 
160
158
  # add edges for every constant to every datasource
161
159
  for datasource in datasources:
@@ -195,26 +193,6 @@ def get_node_joins(
195
193
  ),
196
194
  )
197
195
 
198
- node_map = {
199
- x[0:20]: len(
200
- [
201
- partial
202
- for partial in identifier_map[x].partial_concepts
203
- if partial in grain
204
- ]
205
- + [
206
- output
207
- for output in identifier_map[x].output_concepts
208
- if output.address in grain_pseudonyms
209
- ]
210
- )
211
- for x in node_list
212
- }
213
- print("NODE MAP")
214
- print(node_map)
215
- print([x.address for x in grain])
216
- print(grain_pseudonyms)
217
-
218
196
  for left in node_list:
219
197
  # the constant dataset is a special case
220
198
  # and can never be on the left of a join
@@ -467,9 +467,11 @@ def process_query(
467
467
  for cte in raw_ctes:
468
468
  cte.parent_ctes = [seen[x.name] for x in cte.parent_ctes]
469
469
  deduped_ctes: List[CTE] = list(seen.values())
470
+ root_cte.order_by = statement.order_by
471
+ root_cte.limit = statement.limit
472
+ root_cte.hidden_concepts = [x for x in statement.hidden_components]
470
473
 
471
474
  final_ctes = optimize_ctes(deduped_ctes, root_cte, statement)
472
-
473
475
  return ProcessedQuery(
474
476
  order_by=statement.order_by,
475
477
  grain=statement.grain,
trilogy/dialect/base.py CHANGED
@@ -8,7 +8,6 @@ from trilogy.core.enums import (
8
8
  FunctionType,
9
9
  WindowType,
10
10
  DatePart,
11
- PurposeLineage,
12
11
  ComparisonOperator,
13
12
  )
14
13
  from trilogy.core.models import (
@@ -36,6 +35,7 @@ from trilogy.core.models import (
36
35
  Environment,
37
36
  RawColumnExpr,
38
37
  ListWrapper,
38
+ MapWrapper,
39
39
  ShowStatement,
40
40
  RowsetItem,
41
41
  MultiSelectStatement,
@@ -45,6 +45,7 @@ from trilogy.core.models import (
45
45
  RawSQLStatement,
46
46
  ProcessedRawSQLStatement,
47
47
  NumericType,
48
+ MapType,
48
49
  MergeStatementV2,
49
50
  )
50
51
  from trilogy.core.query_processor import process_query, process_persist
@@ -97,6 +98,7 @@ DATATYPE_MAP = {
97
98
  DataType.FLOAT: "float",
98
99
  DataType.BOOL: "bool",
99
100
  DataType.NUMERIC: "numeric",
101
+ DataType.MAP: "map",
100
102
  }
101
103
 
102
104
 
@@ -116,6 +118,7 @@ FUNCTION_MAP = {
116
118
  FunctionType.IS_NULL: lambda x: f"isnull({x[0]})",
117
119
  # complex
118
120
  FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
121
+ FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}][1]",
119
122
  FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
120
123
  # math
121
124
  FunctionType.ADD: lambda x: f"{x[0]} + {x[1]}",
@@ -189,14 +192,13 @@ TOP {{ limit }}{% endif %}
189
192
  \t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
190
193
  {% if base %}FROM
191
194
  \t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
192
- \t{{ join }}{% endfor %}{% endif %}
193
- {% if where %}WHERE
194
- \t{{ where }}
195
- {% endif %}{%- if group_by %}GROUP BY {% for group in group_by %}
196
- \t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
197
- {%- if order_by %}
198
- ORDER BY {% for order in order_by %}
199
- {{ order }}{% if not loop.last %},{% endif %}{% endfor %}
195
+ \t{{ join }}{% endfor %}{% endif %}{% if where %}
196
+ WHERE
197
+ \t{{ where }}{% endif %}{%- if group_by %}
198
+ GROUP BY {% for group in group_by %}
199
+ \t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{%- if order_by %}
200
+ ORDER BY{% for order in order_by %}
201
+ \t{{ order }}{% if not loop.last %},{% endif %}{% endfor %}
200
202
  {% endif %}{% endif %}
201
203
  """
202
204
  )
@@ -214,7 +216,13 @@ def safe_get_cte_value(coalesce, cte: CTE, c: Concept, quote_char: str):
214
216
  raw = cte.source_map.get(address, None)
215
217
 
216
218
  if not raw:
217
- return INVALID_REFERENCE_STRING("Missing source reference")
219
+ for k, v in c.pseudonyms.items():
220
+ if cte.source_map.get(k):
221
+ c = v
222
+ raw = cte.source_map[k]
223
+ break
224
+ if not raw:
225
+ return INVALID_REFERENCE_STRING("Missing source reference")
218
226
  if isinstance(raw, str):
219
227
  rendered = cte.get_alias(c, raw)
220
228
  return f"{raw}.{safe_quote(rendered, quote_char)}"
@@ -291,6 +299,7 @@ class BaseDialect:
291
299
  self.render_expr(v, cte) # , alias=False)
292
300
  for v in c.lineage.arguments
293
301
  ]
302
+
294
303
  if cte.group_to_grain:
295
304
  rval = f"{self.FUNCTION_MAP[c.lineage.operator](args)}"
296
305
  else:
@@ -335,11 +344,11 @@ class BaseDialect:
335
344
  Parenthetical,
336
345
  AggregateWrapper,
337
346
  MagicConstants,
347
+ MapWrapper[Any, Any],
348
+ MapType,
338
349
  NumericType,
339
350
  ListType,
340
- ListWrapper[int],
341
- ListWrapper[str],
342
- ListWrapper[float],
351
+ ListWrapper[Any],
343
352
  DatePart,
344
353
  CaseWhen,
345
354
  CaseElse,
@@ -435,6 +444,8 @@ class BaseDialect:
435
444
  return str(e)
436
445
  elif isinstance(e, ListWrapper):
437
446
  return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}]"
447
+ elif isinstance(e, MapWrapper):
448
+ return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map)}:{self.render_expr(v, cte=cte, cte_map=cte_map)}' for k, v in e.items()])}}}"
438
449
  elif isinstance(e, list):
439
450
  return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}]"
440
451
  elif isinstance(e, DataType):
@@ -643,82 +654,20 @@ class BaseDialect:
643
654
  f" {selected}"
644
655
  )
645
656
 
646
- # where assignment
647
- output_where = False
648
- if query.where_clause:
649
- # found = False
650
- filter = set(
651
- [
652
- str(x.address)
653
- for x in query.where_clause.row_arguments
654
- if not x.derivation == PurposeLineage.CONSTANT
655
- ]
656
- )
657
- query_output = set([str(z.address) for z in query.output_columns])
658
- # if it wasn't an output
659
- # we would have forced it up earlier and we don't need to render at this point
660
- if filter.issubset(query_output):
661
- output_where = True
662
- for ex_set in query.where_clause.existence_arguments:
663
- for c in ex_set:
664
- if c.address not in cte_output_map:
665
- cts = [
666
- ct
667
- for ct in query.ctes
668
- if ct.name in query.base.existence_source_map[c.address]
669
- ]
670
- if not cts:
671
- raise ValueError(query.base.existence_source_map[c.address])
672
- cte_output_map[c.address] = cts[0]
673
-
674
657
  compiled_ctes = self.generate_ctes(query)
675
658
 
676
659
  # restort selections by the order they were written in
677
660
  sorted_select: List[str] = []
678
661
  for output_c in output_addresses:
679
662
  sorted_select.append(select_columns[output_c])
680
- if not query.base.requires_nesting:
681
- final = self.SQL_TEMPLATE.render(
682
- output=(
683
- query.output_to
684
- if isinstance(query, ProcessedQueryPersist)
685
- else None
686
- ),
687
- full_select=compiled_ctes[-1].statement,
688
- ctes=compiled_ctes[:-1],
689
- )
690
- else:
691
- final = self.SQL_TEMPLATE.render(
692
- output=(
693
- query.output_to
694
- if isinstance(query, ProcessedQueryPersist)
695
- else None
696
- ),
697
- select_columns=sorted_select,
698
- base=query.base.name,
699
- joins=[
700
- render_join(join, self.QUOTE_CHARACTER, None)
701
- for join in query.joins
702
- ],
703
- ctes=compiled_ctes,
704
- limit=query.limit,
705
- # move up to CTEs
706
- where=(
707
- self.render_expr(
708
- query.where_clause.conditional, cte_map=cte_output_map
709
- )
710
- if query.where_clause and output_where
711
- else None
712
- ),
713
- order_by=(
714
- [
715
- self.render_order_item(i, query.base, final=True)
716
- for i in query.order_by.items
717
- ]
718
- if query.order_by
719
- else None
720
- ),
721
- )
663
+
664
+ final = self.SQL_TEMPLATE.render(
665
+ output=(
666
+ query.output_to if isinstance(query, ProcessedQueryPersist) else None
667
+ ),
668
+ full_select=compiled_ctes[-1].statement,
669
+ ctes=compiled_ctes[:-1],
670
+ )
722
671
 
723
672
  if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
724
673
  raise ValueError(
trilogy/dialect/presto.py CHANGED
@@ -15,6 +15,7 @@ FUNCTION_MAP = {
15
15
  FunctionType.LENGTH: lambda x: f"length({x[0]})",
16
16
  FunctionType.AVG: lambda x: f"avg({x[0]})",
17
17
  FunctionType.INDEX_ACCESS: lambda x: f"element_at({x[0]},{x[1]})",
18
+ FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
18
19
  FunctionType.LIKE: lambda x: (
19
20
  f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
20
21
  ),
trilogy/parsing/common.py CHANGED
@@ -7,6 +7,7 @@ from trilogy.core.models import (
7
7
  Metadata,
8
8
  FilterItem,
9
9
  ListWrapper,
10
+ MapWrapper,
10
11
  WindowItem,
11
12
  )
12
13
  from typing import List, Tuple
@@ -41,7 +42,7 @@ def concept_list_to_keys(concepts: Tuple[Concept, ...]) -> Tuple[Concept, ...]:
41
42
 
42
43
 
43
44
  def constant_to_concept(
44
- parent: ListWrapper | list | int | float | str,
45
+ parent: ListWrapper | MapWrapper | list | int | float | str,
45
46
  name: str,
46
47
  namespace: str,
47
48
  purpose: Purpose | None = None,
@@ -53,6 +54,7 @@ def constant_to_concept(
53
54
  output_purpose=Purpose.CONSTANT,
54
55
  arguments=[parent],
55
56
  )
57
+ assert const_function.arguments[0] == parent, const_function.arguments[0]
56
58
  fmetadata = metadata or Metadata()
57
59
  return Concept(
58
60
  name=name,
@@ -186,6 +188,7 @@ def arbitrary_to_concept(
186
188
  | FilterItem
187
189
  | Function
188
190
  | ListWrapper
191
+ | MapWrapper
189
192
  | int
190
193
  | float
191
194
  | str
@@ -43,6 +43,7 @@ from trilogy.core.functions import (
43
43
  Min,
44
44
  Split,
45
45
  IndexAccess,
46
+ MapAccess,
46
47
  AttrAccess,
47
48
  Abs,
48
49
  Unnest,
@@ -94,6 +95,7 @@ from trilogy.core.models import (
94
95
  RawColumnExpr,
95
96
  arg_to_datatype,
96
97
  ListWrapper,
98
+ MapWrapper,
97
99
  MapType,
98
100
  ShowStatement,
99
101
  DataType,
@@ -104,6 +106,7 @@ from trilogy.core.models import (
104
106
  RowsetDerivationStatement,
105
107
  LooseConceptList,
106
108
  list_to_wrapper,
109
+ dict_to_map_wrapper,
107
110
  NumericType,
108
111
  )
109
112
  from trilogy.parsing.exceptions import ParseError
@@ -117,7 +120,7 @@ from trilogy.parsing.common import (
117
120
  arbitrary_to_concept,
118
121
  )
119
122
 
120
- CONSTANT_TYPES = (int, float, str, bool, list, ListWrapper)
123
+ CONSTANT_TYPES = (int, float, str, bool, list, ListWrapper, MapWrapper)
121
124
 
122
125
  with open(join(dirname(__file__), "trilogy.lark"), "r") as f:
123
126
  PARSER = Lark(
@@ -253,7 +256,7 @@ class ParseToObjects(Transformer):
253
256
  self.environment.add_concept(concept, meta=meta)
254
257
  final.append(concept)
255
258
  elif isinstance(
256
- arg, (FilterItem, WindowItem, AggregateWrapper, ListWrapper)
259
+ arg, (FilterItem, WindowItem, AggregateWrapper, ListWrapper, MapWrapper)
257
260
  ):
258
261
  id_hash = string_to_hash(str(arg))
259
262
  concept = arbitrary_to_concept(
@@ -330,7 +333,12 @@ class ParseToObjects(Transformer):
330
333
  def numeric_type(self, args) -> NumericType:
331
334
  return NumericType(precision=args[0], scale=args[1])
332
335
 
333
- def data_type(self, args) -> DataType | ListType | StructType | NumericType:
336
+ def map_type(self, args) -> MapType:
337
+ return MapType(key_type=args[0], value_type=args[1])
338
+
339
+ def data_type(
340
+ self, args
341
+ ) -> DataType | ListType | StructType | MapType | NumericType:
334
342
  resolved = args[0]
335
343
  if isinstance(resolved, StructType):
336
344
  return resolved
@@ -338,6 +346,8 @@ class ParseToObjects(Transformer):
338
346
  return resolved
339
347
  elif isinstance(resolved, NumericType):
340
348
  return resolved
349
+ elif isinstance(resolved, MapType):
350
+ return resolved
341
351
  return DataType(args[0].lower())
342
352
 
343
353
  def array_comparison(self, args) -> ComparisonOperator:
@@ -490,7 +500,6 @@ class ParseToObjects(Transformer):
490
500
  # we need to strip off every parenthetical to see what is being assigned.
491
501
  while isinstance(source_value, Parenthetical):
492
502
  source_value = source_value.content
493
-
494
503
  if isinstance(
495
504
  source_value, (FilterItem, WindowItem, AggregateWrapper, Function)
496
505
  ):
@@ -549,7 +558,7 @@ class ParseToObjects(Transformer):
549
558
  else:
550
559
  metadata = None
551
560
  name = args[1]
552
- constant: Union[str, float, int, bool] = args[2]
561
+ constant: Union[str, float, int, bool, MapWrapper, ListWrapper] = args[2]
553
562
  lookup, namespace, name, parent = parse_concept_reference(
554
563
  name, self.environment
555
564
  )
@@ -1007,6 +1016,11 @@ class ParseToObjects(Transformer):
1007
1016
  def array_lit(self, args):
1008
1017
  return list_to_wrapper(args)
1009
1018
 
1019
+ def map_lit(self, args):
1020
+ parsed = dict(zip(args[::2], args[1::2]))
1021
+ wrapped = dict_to_map_wrapper(parsed)
1022
+ return wrapped
1023
+
1010
1024
  def literal(self, args):
1011
1025
  return args[0]
1012
1026
 
@@ -1144,6 +1158,8 @@ class ParseToObjects(Transformer):
1144
1158
  @v_args(meta=True)
1145
1159
  def index_access(self, meta, args):
1146
1160
  args = self.process_function_args(args, meta=meta)
1161
+ if args[0].datatype == DataType.MAP or isinstance(args[0].datatype, MapType):
1162
+ return MapAccess(args)
1147
1163
  return IndexAccess(args)
1148
1164
 
1149
1165
  @v_args(meta=True)
@@ -274,12 +274,14 @@
274
274
  float_lit: /[0-9]*\.[0-9]+/
275
275
 
276
276
  array_lit: "[" (literal ",")* literal ","? "]"()
277
+
278
+ map_lit: "{" (literal ":" literal ",")* literal ":" literal ","? "}"
277
279
 
278
280
  !bool_lit: "True"i | "False"i
279
281
 
280
282
  !null_lit.1: "null"i
281
283
 
282
- literal: null_lit | _string_lit | int_lit | float_lit | bool_lit | array_lit
284
+ literal: null_lit | _string_lit | int_lit | float_lit | bool_lit | array_lit | map_lit
283
285
 
284
286
  MODIFIER: "Optional"i | "Partial"i | "Nullable"i
285
287
 
@@ -291,8 +293,9 @@
291
293
 
292
294
  numeric_type: "numeric"i "(" int_lit "," int_lit ")"
293
295
 
296
+ map_type: "map"i "<" data_type "," data_type ">"
294
297
 
295
- !data_type: "string"i | "number"i | "numeric"i | "map"i | "list"i | "array"i | "any"i | "int"i | "bigint"i | "date"i | "datetime"i | "timestamp"i | "float"i | "bool"i | numeric_type | struct_type | list_type
298
+ !data_type: "string"i | "number"i | "numeric"i | "map"i | "list"i | "array"i | "any"i | "int"i | "bigint"i | "date"i | "datetime"i | "timestamp"i | "float"i | "bool"i | numeric_type | map_type | struct_type | list_type
296
299
 
297
300
  PURPOSE: "key"i | "metric"i | CONST
298
301
  PROPERTY: "property"i