pytrilogy 0.0.1.110__py3-none-any.whl → 0.0.1.112__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (33) hide show
  1. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/RECORD +33 -33
  3. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +1 -1
  6. trilogy/core/models.py +93 -67
  7. trilogy/core/optimization.py +33 -19
  8. trilogy/core/processing/concept_strategies_v3.py +44 -19
  9. trilogy/core/processing/node_generators/basic_node.py +2 -0
  10. trilogy/core/processing/node_generators/common.py +3 -1
  11. trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  12. trilogy/core/processing/node_generators/filter_node.py +36 -6
  13. trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  14. trilogy/core/processing/node_generators/rowset_node.py +30 -6
  15. trilogy/core/processing/node_generators/select_node.py +23 -9
  16. trilogy/core/processing/node_generators/unnest_node.py +24 -3
  17. trilogy/core/processing/node_generators/window_node.py +4 -2
  18. trilogy/core/processing/nodes/__init__.py +7 -6
  19. trilogy/core/processing/nodes/base_node.py +40 -6
  20. trilogy/core/processing/nodes/filter_node.py +15 -1
  21. trilogy/core/processing/nodes/group_node.py +20 -1
  22. trilogy/core/processing/nodes/merge_node.py +36 -7
  23. trilogy/core/processing/nodes/select_node_v2.py +34 -39
  24. trilogy/core/processing/nodes/unnest_node.py +12 -0
  25. trilogy/core/processing/nodes/window_node.py +11 -0
  26. trilogy/core/processing/utility.py +0 -14
  27. trilogy/core/query_processor.py +125 -29
  28. trilogy/dialect/base.py +45 -40
  29. trilogy/executor.py +31 -3
  30. trilogy/parsing/parse_engine.py +65 -18
  31. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/LICENSE.md +0 -0
  32. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/entry_points.txt +0 -0
  33. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Set, Union, Dict
1
+ from typing import List, Optional, Set, Union, Dict, Tuple
2
2
 
3
3
  from trilogy.core.env_processor import generate_graph
4
4
  from trilogy.core.graph_models import ReferenceGraph
@@ -84,19 +84,31 @@ def base_join_to_join(
84
84
 
85
85
  def generate_source_map(
86
86
  query_datasource: QueryDatasource, all_new_ctes: List[CTE]
87
- ) -> Dict[str, str | list[str]]:
87
+ ) -> Tuple[Dict[str, list[str]], Dict[str, list[str]]]:
88
88
  source_map: Dict[str, list[str]] = defaultdict(list)
89
89
  # now populate anything derived in this level
90
90
  for qdk, qdv in query_datasource.source_map.items():
91
+ unnest = [x for x in qdv if isinstance(x, UnnestJoin)]
92
+ for x in unnest:
93
+ source_map[qdk] = []
91
94
  if (
92
95
  qdk not in source_map
93
96
  and len(qdv) == 1
94
97
  and isinstance(list(qdv)[0], UnnestJoin)
95
98
  ):
96
99
  source_map[qdk] = []
100
+ basic = [x for x in qdv if isinstance(x, Datasource)]
101
+ for base in basic:
102
+ source_map[qdk].append(base.name)
97
103
 
98
- else:
99
- for cte in all_new_ctes:
104
+ ctes = [x for x in qdv if isinstance(x, QueryDatasource)]
105
+ if ctes:
106
+ names = set([x.name for x in ctes])
107
+ matches = [cte for cte in all_new_ctes if cte.source.name in names]
108
+
109
+ if not matches and names:
110
+ raise SyntaxError(query_datasource.source_map)
111
+ for cte in matches:
100
112
  output_address = [
101
113
  x.address
102
114
  for x in cte.output_columns
@@ -105,21 +117,27 @@ def generate_source_map(
105
117
  if qdk in output_address:
106
118
  source_map[qdk].append(cte.name)
107
119
  # now do a pass that accepts partials
108
- # TODO: move this into a second loop by first creationg all sub sourcdes
120
+ # TODO: move this into a second loop by first creationg all sub sources
109
121
  # then loop through this
110
- for cte in all_new_ctes:
111
- output_address = [x.address for x in cte.output_columns]
112
- if qdk in output_address:
113
- if qdk not in source_map:
114
- source_map[qdk] = [cte.name]
115
- if qdk not in source_map and not qdv:
116
- # set source to empty, as it must be derived in this element
117
- source_map[qdk] = []
122
+ for cte in matches:
123
+ if qdk not in source_map:
124
+ source_map[qdk] = [cte.name]
118
125
  if qdk not in source_map:
119
- raise ValueError(
120
- f"Missing {qdk} in {source_map}, source map {query_datasource.source_map.keys()} "
121
- )
122
- return {k: "" if not v else v for k, v in source_map.items()}
126
+ if not qdv:
127
+ source_map[qdk] = []
128
+ elif CONFIG.validate_missing:
129
+ raise ValueError(
130
+ f"Missing {qdk} in {source_map}, source map {query_datasource.source_map} "
131
+ )
132
+
133
+ # existence lookups use a separate map
134
+ # as they cannot be referenced in row resolution
135
+ existence_source_map: Dict[str, list[str]] = defaultdict(list)
136
+ for ek, ev in query_datasource.existence_source_map.items():
137
+ names = set([x.name for x in ev])
138
+ ematches = [cte.name for cte in all_new_ctes if cte.source.name in names]
139
+ existence_source_map[ek] = ematches
140
+ return {k: [] if not v else v for k, v in source_map.items()}, existence_source_map
123
141
 
124
142
 
125
143
  def datasource_to_query_datasource(datasource: Datasource) -> QueryDatasource:
@@ -158,6 +176,52 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
158
176
  return full_name.replace("<", "").replace(">", "").replace(",", "_")
159
177
 
160
178
 
179
+ def resolve_cte_base_name_and_alias(
180
+ name: str,
181
+ source: QueryDatasource,
182
+ parents: List[CTE],
183
+ joins: List[Join | InstantiatedUnnestJoin],
184
+ ) -> Tuple[str | None, str | None]:
185
+
186
+ valid_joins: List[Join] = [join for join in joins if isinstance(join, Join)]
187
+ relevant_parent_sources = set()
188
+ for k, v in source.source_map.items():
189
+ if v:
190
+ relevant_parent_sources.update(v)
191
+ eligible = [x for x in source.datasources if x in relevant_parent_sources]
192
+ if (
193
+ len(eligible) == 1
194
+ and isinstance(eligible[0], Datasource)
195
+ and not eligible[0].name == CONSTANT_DATASET
196
+ ):
197
+ ds = eligible[0]
198
+ return ds.safe_location, ds.identifier
199
+
200
+ # if we have multiple joined CTEs, pick the base
201
+ # as the root
202
+ elif len(eligible) == 1 and len(parents) == 1:
203
+ return parents[0].name, parents[0].name
204
+ elif valid_joins and len(valid_joins) > 0:
205
+ candidates = [x.left_cte.name for x in valid_joins]
206
+ disallowed = [x.right_cte.name for x in valid_joins]
207
+ try:
208
+ cte = [y for y in candidates if y not in disallowed][0]
209
+ return cte, cte
210
+ except IndexError:
211
+ raise SyntaxError(
212
+ f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in parents]}"
213
+ )
214
+ elif eligible:
215
+ matched = [x for x in parents if x.source.name == eligible[0].name]
216
+ if matched:
217
+ return matched[0].name, matched[0].name
218
+
219
+ logger.info(
220
+ f"Could not determine CTE base name for {name} with relevant sources {relevant_parent_sources}"
221
+ )
222
+ return None, None
223
+
224
+
161
225
  def datasource_to_ctes(
162
226
  query_datasource: QueryDatasource, name_map: dict[str, str]
163
227
  ) -> List[CTE]:
@@ -176,7 +240,8 @@ def datasource_to_ctes(
176
240
  sub_cte = datasource_to_ctes(sub_datasource, name_map)
177
241
  parents += sub_cte
178
242
  all_new_ctes += sub_cte
179
- source_map = generate_source_map(query_datasource, all_new_ctes)
243
+ source_map, existence_map = generate_source_map(query_datasource, all_new_ctes)
244
+
180
245
  else:
181
246
  # source is the first datasource of the query datasource
182
247
  source = query_datasource.datasources[0]
@@ -184,13 +249,27 @@ def datasource_to_ctes(
184
249
  # render properly on initial access; since they have
185
250
  # no actual source
186
251
  if source.full_name == DEFAULT_NAMESPACE + "_" + CONSTANT_DATASET:
187
- source_map = {k: "" for k in query_datasource.source_map}
252
+ source_map = {k: [] for k in query_datasource.source_map}
253
+ existence_map = source_map
188
254
  else:
189
255
  source_map = {
190
- k: "" if not v else source.identifier
256
+ k: [] if not v else [source.identifier]
191
257
  for k, v in query_datasource.source_map.items()
192
258
  }
259
+ existence_map = source_map
260
+
193
261
  human_id = generate_cte_name(query_datasource.full_name, name_map)
262
+ logger.info(
263
+ f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, parent had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} "
264
+ )
265
+ final_joins = [
266
+ x
267
+ for x in [base_join_to_join(join, parents) for join in query_datasource.joins]
268
+ if x
269
+ ]
270
+ base_name, base_alias = resolve_cte_base_name_and_alias(
271
+ human_id, query_datasource, parents, final_joins
272
+ )
194
273
  cte = CTE(
195
274
  name=human_id,
196
275
  source=query_datasource,
@@ -200,14 +279,9 @@ def datasource_to_ctes(
200
279
  for c in query_datasource.output_concepts
201
280
  ],
202
281
  source_map=source_map,
282
+ existence_source_map=existence_map,
203
283
  # related columns include all referenced columns, such as filtering
204
- joins=[
205
- x
206
- for x in [
207
- base_join_to_join(join, parents) for join in query_datasource.joins
208
- ]
209
- if x
210
- ],
284
+ joins=final_joins,
211
285
  grain=query_datasource.grain,
212
286
  group_to_grain=query_datasource.group_required,
213
287
  # we restrict parent_ctes to one level
@@ -217,11 +291,13 @@ def datasource_to_ctes(
217
291
  partial_concepts=query_datasource.partial_concepts,
218
292
  join_derived_concepts=query_datasource.join_derived_concepts,
219
293
  hidden_concepts=query_datasource.hidden_concepts,
294
+ base_name_override=base_name,
295
+ base_alias_override=base_alias,
220
296
  )
221
297
  if cte.grain != query_datasource.grain:
222
298
  raise ValueError("Grain was corrupted in CTE generation")
223
299
  for x in cte.output_columns:
224
- if x.address not in cte.source_map:
300
+ if x.address not in cte.source_map and CONFIG.validate_missing:
225
301
  raise ValueError(
226
302
  f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
227
303
  )
@@ -243,12 +319,32 @@ def get_query_datasources(
243
319
  if not statement.output_components:
244
320
  raise ValueError(f"Statement has no output components {statement}")
245
321
  ds = source_query_concepts(
246
- statement.output_components, environment=environment, g=graph
322
+ statement.output_components,
323
+ environment=environment,
324
+ g=graph,
247
325
  )
248
326
  if hooks:
249
327
  for hook in hooks:
250
328
  hook.process_root_strategy_node(ds)
251
329
  final_qds = ds.resolve()
330
+
331
+ # we if we have a where clause doing an existence check
332
+ # treat that as separate subquery
333
+ if (where := statement.where_clause) and where.existence_arguments:
334
+ for subselect in where.existence_arguments:
335
+ if not subselect:
336
+ continue
337
+ logger.info(
338
+ f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
339
+ )
340
+ eds = source_query_concepts([*subselect], environment=environment, g=graph)
341
+
342
+ final_eds = eds.resolve()
343
+ first_parent = final_qds
344
+ first_parent.datasources.append(final_eds)
345
+ for x in final_eds.output_concepts:
346
+ if x.address not in first_parent.existence_source_map:
347
+ first_parent.existence_source_map[x.address] = {final_eds}
252
348
  return final_qds
253
349
 
254
350
 
trilogy/dialect/base.py CHANGED
@@ -193,27 +193,6 @@ ORDER BY {% for order in order_by %}
193
193
  )
194
194
 
195
195
 
196
- def check_lineage(c: Concept, cte: CTE) -> bool:
197
- checks = []
198
- if not c.lineage:
199
- return True
200
- for sub_c in c.lineage.concept_arguments:
201
- if not isinstance(sub_c, Concept):
202
- continue
203
- if sub_c.address in cte.source_map or (
204
- sub_c.lineage and check_lineage(sub_c, cte)
205
- ):
206
- checks.append(True)
207
- else:
208
- logger.debug(
209
- f"{LOGGER_PREFIX} [{sub_c.address}] not found in source map for"
210
- f" {cte.name}, have cte keys {[c for c in cte.source_map.keys()]} and"
211
- f" datasource keys {[c for c in cte.source.source_map.keys()]}"
212
- )
213
- checks.append(False)
214
- return all(checks)
215
-
216
-
217
196
  def safe_quote(string: str, quote_char: str):
218
197
  # split dotted identifiers
219
198
  # TODO: evaluate if we need smarter parsing for strings that could actually include .
@@ -259,7 +238,7 @@ class BaseDialect:
259
238
  f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
260
239
  )
261
240
 
262
- if c.lineage and cte.source_map.get(c.address, "") == "":
241
+ if c.lineage and cte.source_map.get(c.address, []) == []:
263
242
  logger.debug(
264
243
  f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
265
244
  )
@@ -273,7 +252,11 @@ class BaseDialect:
273
252
  ]
274
253
  rval = f"{self.WINDOW_FUNCTION_MAP[c.lineage.type](concept = self.render_concept_sql(c.lineage.content, cte=cte, alias=False), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
275
254
  elif isinstance(c.lineage, FilterItem):
276
- rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
255
+ # for cases when we've optimized this
256
+ if len(cte.output_columns) == 1:
257
+ rval = self.render_expr(c.lineage.content, cte=cte)
258
+ else:
259
+ rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
277
260
  elif isinstance(c.lineage, RowsetItem):
278
261
  rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False)}"
279
262
  elif isinstance(c.lineage, MultiSelectStatement):
@@ -356,17 +339,28 @@ class BaseDialect:
356
339
  cte: Optional[CTE] = None,
357
340
  cte_map: Optional[Dict[str, CTE]] = None,
358
341
  ) -> str:
359
- # if isinstance(e, Concept):
360
- # cte = cte or cte_map.get(e.address, None)
361
342
 
362
343
  if isinstance(e, SubselectComparison):
363
- assert cte, "Subselects must be rendered with a CTE in context"
344
+
364
345
  if isinstance(e.right, Concept):
365
- return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {self.render_expr(e.right, cte=cte, cte_map=cte_map)} from {cte.source_map[e.right.address][0]})"
346
+ # we won't always have an existnce map
347
+ # so fall back to the normal map
348
+ lookup_cte = cte
349
+ if cte_map and not lookup_cte:
350
+ lookup_cte = cte_map.get(e.right.address)
351
+ assert lookup_cte, "Subselects must be rendered with a CTE in context"
352
+ if e.right.address not in lookup_cte.existence_source_map:
353
+ lookup = lookup_cte.source_map[e.right.address]
354
+ else:
355
+ lookup = lookup_cte.existence_source_map[e.right.address]
356
+
357
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {lookup[0]})"
358
+ elif isinstance(e.right, (ListWrapper, Parenthetical)):
359
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
360
+ elif isinstance(e.right, (str, int, bool, float, list)):
361
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} ({self.render_expr(e.right, cte=cte, cte_map=cte_map)})"
366
362
  else:
367
- raise NotImplementedError(
368
- f"Subselects must be a concept, got {e.right}"
369
- )
363
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} ({self.render_expr(e.right, cte=cte, cte_map=cte_map)})"
370
364
  elif isinstance(e, Comparison):
371
365
  return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
372
366
  elif isinstance(e, Conditional):
@@ -449,15 +443,15 @@ class BaseDialect:
449
443
  for c in cte.output_columns
450
444
  if c.address not in [y.address for y in cte.hidden_concepts]
451
445
  ]
446
+ if cte.base_name == cte.base_alias:
447
+ source = cte.base_name
448
+ else:
449
+ source = f"{cte.base_name} as {cte.base_alias}"
452
450
  return CompiledCTE(
453
451
  name=cte.name,
454
452
  statement=self.SQL_TEMPLATE.render(
455
453
  select_columns=select_columns,
456
- base=(
457
- f"{cte.base_name} as {cte.base_alias}"
458
- if cte.render_from_clause
459
- else None
460
- ),
454
+ base=(f"{source}" if cte.render_from_clause else None),
461
455
  grain=cte.grain,
462
456
  limit=cte.limit,
463
457
  # some joins may not need to be rendered
@@ -513,7 +507,7 @@ class BaseDialect:
513
507
  c
514
508
  for c in cte.output_columns
515
509
  if c.purpose == Purpose.CONSTANT
516
- and cte.source_map[c.address] != ""
510
+ and cte.source_map[c.address] != []
517
511
  ],
518
512
  "address",
519
513
  )
@@ -639,7 +633,7 @@ class BaseDialect:
639
633
  filter = set(
640
634
  [
641
635
  str(x.address)
642
- for x in query.where_clause.concept_arguments
636
+ for x in query.where_clause.row_arguments
643
637
  if not x.derivation == PurposeLineage.CONSTANT
644
638
  ]
645
639
  )
@@ -650,10 +644,21 @@ class BaseDialect:
650
644
 
651
645
  if not found:
652
646
  raise NotImplementedError(
653
- f"Cannot generate query with filtering on grain {filter} that is"
654
- f" not a subset of the query output grain {query_output}. Use a"
655
- " filtered concept instead."
647
+ f"Cannot generate query with filtering on row arguments {filter} that is"
648
+ f" not a subset of the query output grain {query_output}. Try a"
649
+ " filtered concept instead, or include it in the select clause"
656
650
  )
651
+ for ex_set in query.where_clause.existence_arguments:
652
+ for c in ex_set:
653
+ if c.address not in cte_output_map:
654
+ cts = [
655
+ ct
656
+ for ct in query.ctes
657
+ if ct.name in query.base.existence_source_map[c.address]
658
+ ]
659
+ if not cts:
660
+ raise ValueError(query.base.existence_source_map[c.address])
661
+ cte_output_map[c.address] = cts[0]
657
662
 
658
663
  compiled_ctes = self.generate_ctes(query)
659
664
 
trilogy/executor.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Any
1
+ from typing import List, Optional, Any, Generator
2
2
  from functools import singledispatchmethod
3
3
  from sqlalchemy import text
4
4
  from sqlalchemy.engine import Engine, CursorResult
@@ -222,6 +222,35 @@ class Executor(object):
222
222
  sql.append(x)
223
223
  return sql
224
224
 
225
+ def parse_text_generator(
226
+ self, command: str, persist: bool = False
227
+ ) -> Generator[
228
+ ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement, None, None
229
+ ]:
230
+ """Process a preql text command"""
231
+ _, parsed = parse_text(command, self.environment)
232
+ generatable = [
233
+ x
234
+ for x in parsed
235
+ if isinstance(
236
+ x,
237
+ (
238
+ SelectStatement,
239
+ PersistStatement,
240
+ MultiSelectStatement,
241
+ ShowStatement,
242
+ ),
243
+ )
244
+ ]
245
+ while generatable:
246
+ t = generatable.pop(0)
247
+ x = self.generator.generate_queries(
248
+ self.environment, [t], hooks=self.hooks
249
+ )[0]
250
+ if persist and isinstance(x, ProcessedQueryPersist):
251
+ self.environment.add_datasource(x.datasource)
252
+ yield x
253
+
225
254
  def execute_raw_sql(self, command: str) -> CursorResult:
226
255
  """Run a command against the raw underlying
227
256
  execution engine"""
@@ -229,10 +258,9 @@ class Executor(object):
229
258
 
230
259
  def execute_text(self, command: str) -> List[CursorResult]:
231
260
  """Run a preql text command"""
232
- sql = self.parse_text(command)
233
261
  output = []
234
262
  # connection = self.engine.connect()
235
- for statement in sql:
263
+ for statement in self.parse_text_generator(command):
236
264
  if isinstance(statement, ProcessedShowStatement):
237
265
  output.append(
238
266
  generate_result_set(
@@ -101,6 +101,7 @@ from trilogy.core.models import (
101
101
  ConceptDerivation,
102
102
  RowsetDerivationStatement,
103
103
  LooseConceptList,
104
+ list_to_wrapper,
104
105
  )
105
106
  from trilogy.parsing.exceptions import ParseError
106
107
  from trilogy.utility import string_to_hash
@@ -113,6 +114,7 @@ from trilogy.parsing.common import (
113
114
  arbitrary_to_concept,
114
115
  )
115
116
 
117
+
116
118
  CONSTANT_TYPES = (int, float, str, bool, ListWrapper)
117
119
 
118
120
  grammar = r"""
@@ -138,7 +140,7 @@ grammar = r"""
138
140
  //<customer_id,country>.property local_alias STRING
139
141
  concept_property_declaration: PROPERTY (prop_ident | IDENTIFIER) data_type concept_nullable_modifier? metadata?
140
142
  //metric post_length <- len(post_text);
141
- concept_derivation: (PURPOSE | AUTO | PROPERTY ) IDENTIFIER "<" "-" expr
143
+ concept_derivation: (PURPOSE | AUTO | PROPERTY ) (prop_ident | IDENTIFIER) "<" "-" expr
142
144
 
143
145
  rowset_derivation_statement: ("rowset"i IDENTIFIER "<" "-" (multi_select_statement | select_statement)) | ("with"i IDENTIFIER "as"i (multi_select_statement | select_statement))
144
146
 
@@ -179,13 +181,11 @@ grammar = r"""
179
181
  // multiple_selects
180
182
  multi_select_statement: select_statement ("merge" select_statement)+ "align"i align_clause where? comment* order_by? comment* limit? comment*
181
183
 
182
-
183
184
  align_item: IDENTIFIER ":" IDENTIFIER ("," IDENTIFIER)* ","?
184
185
 
185
186
  align_clause: align_item ("," align_item)* ","?
186
187
 
187
188
  // merge statemment
188
-
189
189
  merge_statement: "merge" IDENTIFIER ("," IDENTIFIER)* ","? comment*
190
190
 
191
191
  // FUNCTION blocks
@@ -193,7 +193,6 @@ grammar = r"""
193
193
  function_binding_item: IDENTIFIER ":" data_type
194
194
  function_binding_list: (function_binding_item ",")* function_binding_item ","?
195
195
  raw_function: "bind" "sql" IDENTIFIER "(" function_binding_list ")" "-" ">" data_type "as"i MULTILINE_STRING
196
-
197
196
 
198
197
  // user_id where state = Mexico
199
198
  filter_item: "filter"i IDENTIFIER where
@@ -249,9 +248,11 @@ grammar = r"""
249
248
 
250
249
  COMPARISON_OPERATOR: (/is[\s]+not/ | "is" |"=" | ">" | "<" | ">=" | "<=" | "!=" )
251
250
 
252
- comparison: (expr COMPARISON_OPERATOR expr) | (expr array_comparison expr_tuple)
251
+ comparison: (expr COMPARISON_OPERATOR expr)
252
+
253
+ between_comparison: expr "BETWEEN"i expr "AND"i expr
253
254
 
254
- subselect_comparison: expr array_comparison expr
255
+ subselect_comparison: expr array_comparison expr | (expr array_comparison expr_tuple)
255
256
 
256
257
  expr_tuple: "(" (expr ",")* expr ","? ")"
257
258
 
@@ -263,11 +264,10 @@ grammar = r"""
263
264
 
264
265
  parenthetical: "(" (conditional | expr) ")"
265
266
 
266
- expr: window_item | filter_item | comparison | subselect_comparison | fgroup | aggregate_functions | unnest | _string_functions | _math_functions | _generic_functions | _constant_functions| _date_functions | literal | expr_reference | index_access | attr_access | parenthetical
267
+ expr: window_item | filter_item | between_comparison | comparison | subselect_comparison | fgroup | aggregate_functions | unnest | _string_functions | _math_functions | _generic_functions | _constant_functions| _date_functions | literal | expr_reference | index_access | attr_access | parenthetical
267
268
 
268
269
  // functions
269
270
 
270
- //math TODO: add syntactic sugar
271
271
  fadd: ("add"i "(" expr "," expr ")" ) | ( expr "+" expr )
272
272
  fsub: ("subtract"i "(" expr "," expr ")" ) | ( expr "-" expr )
273
273
  fmul: ("multiply"i "(" expr "," expr ")" ) | ( expr "*" expr )
@@ -738,10 +738,17 @@ class ParseToObjects(Transformer):
738
738
  purpose = args[0]
739
739
  if purpose == Purpose.AUTO:
740
740
  purpose = None
741
- name = args[1]
742
- lookup, namespace, name, parent_concept = parse_concept_reference(
743
- name, self.environment, purpose
744
- )
741
+ raw_name = args[1]
742
+ if isinstance(raw_name, str):
743
+ lookup, namespace, name, parent_concept = parse_concept_reference(
744
+ raw_name, self.environment, purpose
745
+ )
746
+ else:
747
+ keys, name = raw_name
748
+ if "." in name:
749
+ namespace, name = name.rsplit(".", 1)
750
+ else:
751
+ namespace = self.environment.namespace or DEFAULT_NAMESPACE
745
752
  source_value = args[2]
746
753
  # we need to strip off every parenthetical to see what is being assigned.
747
754
  while isinstance(source_value, Parenthetical):
@@ -971,7 +978,26 @@ class ParseToObjects(Transformer):
971
978
  return Ordering(args.lower())
972
979
 
973
980
  def order_list(self, args):
974
- return [OrderItem(expr=x, order=y) for x, y in zip(args[::2], args[1::2])]
981
+
982
+ def handle_order_item(x, namespace: str):
983
+ if not isinstance(x, Concept):
984
+ x = arbitrary_to_concept(
985
+ x,
986
+ namespace=namespace,
987
+ name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(x))}",
988
+ )
989
+ return x
990
+
991
+ return [
992
+ OrderItem(
993
+ expr=handle_order_item(
994
+ x,
995
+ self.environment.namespace,
996
+ ),
997
+ order=y,
998
+ )
999
+ for x, y in zip(args[::2], args[1::2])
1000
+ ]
975
1001
 
976
1002
  def order_by(self, args):
977
1003
  return OrderBy(items=args[0])
@@ -1207,26 +1233,47 @@ class ParseToObjects(Transformer):
1207
1233
  return float(args[0])
1208
1234
 
1209
1235
  def array_lit(self, args):
1210
- types = [arg_to_datatype(arg) for arg in args]
1211
- assert len(set(types)) == 1
1212
- return ListWrapper(args, type=types[0])
1236
+ return list_to_wrapper(args)
1213
1237
 
1214
1238
  def literal(self, args):
1215
1239
  return args[0]
1216
1240
 
1217
1241
  def comparison(self, args) -> Comparison:
1242
+ if args[1] == ComparisonOperator.IN:
1243
+ raise SyntaxError
1218
1244
  return Comparison(left=args[0], right=args[2], operator=args[1])
1219
1245
 
1246
+ def between_comparison(self, args) -> Conditional:
1247
+ left_bound = args[1]
1248
+ right_bound = args[2]
1249
+ return Conditional(
1250
+ left=Comparison(
1251
+ left=args[0], right=left_bound, operator=ComparisonOperator.GTE
1252
+ ),
1253
+ right=Comparison(
1254
+ left=args[0], right=right_bound, operator=ComparisonOperator.LTE
1255
+ ),
1256
+ operator=BooleanOperator.AND,
1257
+ )
1258
+
1220
1259
  @v_args(meta=True)
1221
1260
  def subselect_comparison(self, meta: Meta, args) -> SubselectComparison:
1222
1261
  right = args[2]
1223
- if not isinstance(right, Concept):
1262
+
1263
+ while isinstance(right, Parenthetical) and isinstance(
1264
+ right.content,
1265
+ (Concept, Function, FilterItem, WindowItem, AggregateWrapper, ListWrapper),
1266
+ ):
1267
+ right = right.content
1268
+ if isinstance(
1269
+ right, (Function, FilterItem, WindowItem, AggregateWrapper, ListWrapper)
1270
+ ):
1224
1271
  right = arbitrary_to_concept(
1225
1272
  right,
1226
1273
  namespace=self.environment.namespace,
1227
1274
  name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(right))}",
1228
1275
  )
1229
- self.environment.add_concept(right)
1276
+ self.environment.add_concept(right, meta=meta)
1230
1277
  return SubselectComparison(
1231
1278
  left=args[0],
1232
1279
  right=right,