pytrilogy 0.0.2.12__py3-none-any.whl → 0.0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (31) hide show
  1. {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/RECORD +31 -31
  3. {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +16 -1
  6. trilogy/core/enums.py +3 -0
  7. trilogy/core/models.py +150 -17
  8. trilogy/core/optimizations/predicate_pushdown.py +1 -1
  9. trilogy/core/processing/node_generators/basic_node.py +8 -1
  10. trilogy/core/processing/node_generators/common.py +13 -36
  11. trilogy/core/processing/node_generators/filter_node.py +1 -15
  12. trilogy/core/processing/node_generators/group_node.py +19 -1
  13. trilogy/core/processing/node_generators/group_to_node.py +0 -12
  14. trilogy/core/processing/node_generators/multiselect_node.py +1 -10
  15. trilogy/core/processing/node_generators/rowset_node.py +3 -14
  16. trilogy/core/processing/node_generators/select_node.py +26 -0
  17. trilogy/core/processing/node_generators/window_node.py +1 -1
  18. trilogy/core/processing/nodes/base_node.py +40 -11
  19. trilogy/core/processing/nodes/group_node.py +31 -18
  20. trilogy/core/processing/nodes/merge_node.py +14 -5
  21. trilogy/core/processing/nodes/select_node_v2.py +4 -0
  22. trilogy/core/processing/utility.py +91 -3
  23. trilogy/core/query_processor.py +6 -12
  24. trilogy/dialect/common.py +10 -8
  25. trilogy/executor.py +8 -2
  26. trilogy/parsing/common.py +34 -4
  27. trilogy/parsing/parse_engine.py +31 -19
  28. trilogy/parsing/trilogy.lark +5 -5
  29. {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/LICENSE.md +0 -0
  30. {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/entry_points.txt +0 -0
  31. {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/top_level.txt +0 -0
@@ -43,6 +43,7 @@ class SelectNode(StrategyNode):
43
43
  parents: List["StrategyNode"] | None = None,
44
44
  depth: int = 0,
45
45
  partial_concepts: List[Concept] | None = None,
46
+ nullable_concepts: List[Concept] | None = None,
46
47
  accept_partial: bool = False,
47
48
  grain: Optional[Grain] = None,
48
49
  force_group: bool | None = False,
@@ -58,6 +59,7 @@ class SelectNode(StrategyNode):
58
59
  parents=parents,
59
60
  depth=depth,
60
61
  partial_concepts=partial_concepts,
62
+ nullable_concepts=nullable_concepts,
61
63
  force_group=force_group,
62
64
  grain=grain,
63
65
  conditions=conditions,
@@ -115,6 +117,7 @@ class SelectNode(StrategyNode):
115
117
  partial_concepts=[
116
118
  c.concept for c in datasource.columns if not c.is_complete
117
119
  ],
120
+ nullable_concepts=[c.concept for c in datasource.columns if c.is_nullable],
118
121
  source_type=SourceType.DIRECT_SELECT,
119
122
  condition=self.conditions,
120
123
  # select nodes should never group
@@ -183,6 +186,7 @@ class SelectNode(StrategyNode):
183
186
  parents=self.parents,
184
187
  whole_grain=self.whole_grain,
185
188
  partial_concepts=list(self.partial_concepts),
189
+ nullable_concepts=list(self.nullable_concepts),
186
190
  accept_partial=self.accept_partial,
187
191
  grain=self.grain,
188
192
  force_group=self.force_group,
@@ -18,9 +18,11 @@ from trilogy.core.models import (
18
18
  WindowItem,
19
19
  AggregateWrapper,
20
20
  DataType,
21
+ ConceptPair,
22
+ UnnestJoin,
21
23
  )
22
24
 
23
- from trilogy.core.enums import Purpose, Granularity, BooleanOperator
25
+ from trilogy.core.enums import Purpose, Granularity, BooleanOperator, Modifier
24
26
  from trilogy.core.constants import CONSTANT_DATASET
25
27
  from enum import Enum
26
28
  from trilogy.utility import unique
@@ -243,8 +245,23 @@ def get_node_joins(
243
245
  local_concepts = [
244
246
  c for c in local_concepts if c.granularity != Granularity.SINGLE_ROW
245
247
  ]
246
- else:
248
+ elif any(
249
+ [
250
+ c.address in [x.address for x in identifier_map[right].partial_concepts]
251
+ for c in local_concepts
252
+ ]
253
+ ) or any(
254
+ [
255
+ c.address in [x.address for x in identifier_map[left].nullable_concepts]
256
+ for c in local_concepts
257
+ ]
258
+ ):
247
259
  join_type = JoinType.LEFT_OUTER
260
+ local_concepts = [
261
+ c for c in local_concepts if c.granularity != Granularity.SINGLE_ROW
262
+ ]
263
+ else:
264
+ join_type = JoinType.INNER
248
265
  # remove any constants if other join keys exist
249
266
  local_concepts = [
250
267
  c for c in local_concepts if c.granularity != Granularity.SINGLE_ROW
@@ -287,7 +304,18 @@ def get_node_joins(
287
304
  )
288
305
  narg = (left_arg, right_arg)
289
306
  if narg not in join_tuples:
290
- join_tuples.append((left_arg, right_arg))
307
+ modifiers = set()
308
+ if left_arg.address in [
309
+ x.address for x in left_datasource.nullable_concepts
310
+ ] and right_arg.address in [
311
+ x.address for x in right_datasource.nullable_concepts
312
+ ]:
313
+ modifiers.add(Modifier.NULLABLE)
314
+ join_tuples.append(
315
+ ConceptPair(
316
+ left=left_arg, right=right_arg, modifiers=list(modifiers)
317
+ )
318
+ )
291
319
  final_joins_pre.append(
292
320
  BaseJoin(
293
321
  left_datasource=identifier_map[left],
@@ -412,3 +440,63 @@ def decompose_condition(
412
440
  else:
413
441
  chunks.append(conditional)
414
442
  return chunks
443
+
444
+
445
+ def find_nullable_concepts(
446
+ source_map: Dict[str, set[Datasource | QueryDatasource | UnnestJoin]],
447
+ datasources: List[Datasource | QueryDatasource],
448
+ joins: List[BaseJoin | UnnestJoin],
449
+ ) -> List[str]:
450
+ """give a set of datasources and joins, find the concepts
451
+ that may contain nulls in the output set
452
+ """
453
+ nullable_datasources = set()
454
+ datasource_map = {
455
+ x.identifier: x
456
+ for x in datasources
457
+ if isinstance(x, (Datasource, QueryDatasource))
458
+ }
459
+ for join in joins:
460
+ is_on_nullable_condition = False
461
+ if not isinstance(join, BaseJoin):
462
+ continue
463
+ if not join.concept_pairs:
464
+ continue
465
+ for pair in join.concept_pairs:
466
+ if pair.right.address in [
467
+ y.address
468
+ for y in datasource_map[
469
+ join.right_datasource.identifier
470
+ ].nullable_concepts
471
+ ]:
472
+ is_on_nullable_condition = True
473
+ break
474
+ if pair.left.address in [
475
+ y.address
476
+ for y in datasource_map[
477
+ join.left_datasource.identifier
478
+ ].nullable_concepts
479
+ ]:
480
+ is_on_nullable_condition = True
481
+ break
482
+ if is_on_nullable_condition:
483
+ nullable_datasources.add(datasource_map[join.right_datasource.identifier])
484
+ final_nullable = set()
485
+
486
+ for k, v in source_map.items():
487
+ local_nullable = [
488
+ x for x in datasources if k in [v.address for v in x.nullable_concepts]
489
+ ]
490
+ if all(
491
+ [
492
+ k in [v.address for v in x.nullable_concepts]
493
+ for x in datasources
494
+ if k in [z.address for z in x.output_concepts]
495
+ ]
496
+ ):
497
+ final_nullable.add(k)
498
+ all_ds = set([ds for ds in local_nullable]).union(nullable_datasources)
499
+ if nullable_datasources:
500
+ if set(v).issubset(all_ds):
501
+ final_nullable.add(k)
502
+ return list(sorted(final_nullable))
@@ -35,7 +35,6 @@ from trilogy.core.ergonomics import CTE_NAMES
35
35
  from trilogy.core.optimization import optimize_ctes
36
36
  from math import ceil
37
37
  from collections import defaultdict
38
- from random import shuffle
39
38
 
40
39
  LOGGER_PREFIX = "[QUERY BUILD]"
41
40
 
@@ -128,8 +127,6 @@ def generate_source_map(
128
127
  if qdk in output_address:
129
128
  source_map[qdk].append(cte.name)
130
129
  # now do a pass that accepts partials
131
- # TODO: move this into a second loop by first creationg all sub sources
132
- # then loop through this
133
130
  for cte in matches:
134
131
  if qdk not in source_map:
135
132
  source_map[qdk] = [cte.name]
@@ -180,7 +177,6 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
180
177
  int = ceil(idx / len(CTE_NAMES))
181
178
  suffix = f"_{int}"
182
179
  valid = [x for x in CTE_NAMES if x + suffix not in name_map.values()]
183
- shuffle(valid)
184
180
  lookup = valid[0]
185
181
  new_name = f"{lookup}{suffix}"
186
182
  name_map[full_name] = new_name
@@ -196,8 +192,6 @@ def resolve_cte_base_name_and_alias_v2(
196
192
  raw_joins: List[Join | InstantiatedUnnestJoin],
197
193
  ) -> Tuple[str | None, str | None]:
198
194
  joins: List[Join] = [join for join in raw_joins if isinstance(join, Join)]
199
- # INFO trilogy:query_processor.py:263 Finished building source map for civet with 3 parents, have {'local.relevant_customers': ['fowl', 'fowl'],
200
- # 'customer.demographics.gender': ['mandrill'], 'customer.id': ['mandrill'], 'customer.demographics.id': ['mandrill'], 'customer.id_9268029262289908': [], 'customer.demographics.gender_1513806568509111': []}, query_datasource had non-empty keys ['local.relevant_customers', 'customer.demographics.gender', 'customer.id', 'customer.demographics.id'] and existence had non-empty keys []
201
195
  if (
202
196
  len(source.datasources) == 1
203
197
  and isinstance(source.datasources[0], Datasource)
@@ -301,6 +295,7 @@ def datasource_to_ctes(
301
295
  parent_ctes=parents,
302
296
  condition=query_datasource.condition,
303
297
  partial_concepts=query_datasource.partial_concepts,
298
+ nullable_concepts=query_datasource.nullable_concepts,
304
299
  join_derived_concepts=query_datasource.join_derived_concepts,
305
300
  hidden_concepts=query_datasource.hidden_concepts,
306
301
  base_name_override=base_name,
@@ -334,12 +329,13 @@ def append_existence_check(
334
329
  for subselect in where.existence_arguments:
335
330
  if not subselect:
336
331
  continue
337
- logger.info(
338
- f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
339
- )
332
+
340
333
  eds = source_query_concepts(
341
334
  [*subselect], environment=environment, g=graph, history=history
342
335
  )
336
+ logger.info(
337
+ f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
338
+ )
343
339
  node.add_parents([eds])
344
340
  node.add_existence_concepts([*subselect])
345
341
 
@@ -384,9 +380,7 @@ def get_query_node(
384
380
  if nest_where and statement.where_clause:
385
381
  if not all_aggregate:
386
382
  ods.conditions = statement.where_clause.conditional
387
- ods.output_concepts = statement.output_components
388
- # ods.hidden_concepts = where_delta
389
- ods.rebuild_cache()
383
+ ods.set_output_concepts(statement.output_components)
390
384
  append_existence_check(ods, environment, graph, history)
391
385
  ds = GroupNode(
392
386
  output_concepts=statement.output_components,
trilogy/dialect/common.py CHANGED
@@ -3,9 +3,9 @@ from trilogy.core.enums import UnnestMode, Modifier
3
3
  from typing import Optional, Callable
4
4
 
5
5
 
6
- def null_wrapper(lval: str, rval: str, concept: Concept) -> str:
7
- if concept.modifiers and Modifier.NULLABLE in concept.modifiers:
8
- return f"(({lval} is null and {rval} is null) or ({lval} = {rval}))"
6
+ def null_wrapper(lval: str, rval: str, modifiers: list[Modifier]) -> str:
7
+ if Modifier.NULLABLE in modifiers:
8
+ return f"({lval} = {rval} or ({lval} is null and {rval} is null))"
9
9
  return f"{lval} = {rval}"
10
10
 
11
11
 
@@ -48,7 +48,7 @@ def render_join(
48
48
  null_wrapper(
49
49
  f"{left_name}.{quote_character}{join.left_cte.get_alias(key.concept) if isinstance(join.left_cte, Datasource) else key.concept.safe_address}{quote_character}",
50
50
  f"{right_name}.{quote_character}{join.right_cte.get_alias(key.concept) if isinstance(join.right_cte, Datasource) else key.concept.safe_address}{quote_character}",
51
- key.concept,
51
+ modifiers=key.concept.modifiers or [],
52
52
  )
53
53
  for key in join.joinkeys
54
54
  ]
@@ -56,11 +56,13 @@ def render_join(
56
56
  base_joinkeys.extend(
57
57
  [
58
58
  null_wrapper(
59
- f"{left_name}.{quote_character}{join.left_cte.get_alias(left_concept) if isinstance(join.left_cte, Datasource) else left_concept.safe_address}{quote_character}",
60
- f"{right_name}.{quote_character}{join.right_cte.get_alias(right_concept) if isinstance(join.right_cte, Datasource) else right_concept.safe_address}{quote_character}",
61
- left_concept,
59
+ f"{left_name}.{quote_character}{join.left_cte.get_alias(pair.left) if isinstance(join.left_cte, Datasource) else pair.left.safe_address}{quote_character}",
60
+ f"{right_name}.{quote_character}{join.right_cte.get_alias(pair.right) if isinstance(join.right_cte, Datasource) else pair.right.safe_address}{quote_character}",
61
+ modifiers=pair.modifiers
62
+ + (pair.left.modifiers or [])
63
+ + (pair.right.modifiers or []),
62
64
  )
63
- for left_concept, right_concept in join.joinkey_pairs
65
+ for pair in join.joinkey_pairs
64
66
  ]
65
67
  )
66
68
  if not base_joinkeys:
trilogy/executor.py CHANGED
@@ -300,10 +300,16 @@ class Executor(object):
300
300
  self.environment.add_datasource(x.datasource)
301
301
  yield x
302
302
 
303
- def execute_raw_sql(self, command: str) -> CursorResult:
303
+ def execute_raw_sql(
304
+ self, command: str, variables: dict | None = None
305
+ ) -> CursorResult:
304
306
  """Run a command against the raw underlying
305
307
  execution engine"""
306
- return self.connection.execute(text(command))
308
+ if variables:
309
+ return self.connection.execute(text(command), variables)
310
+ return self.connection.execute(
311
+ text(command),
312
+ )
307
313
 
308
314
  def execute_text(self, command: str) -> List[CursorResult]:
309
315
  """Run a preql text command"""
trilogy/parsing/common.py CHANGED
@@ -25,6 +25,15 @@ from trilogy.core.enums import PurposeLineage
25
25
  from trilogy.constants import (
26
26
  VIRTUAL_CONCEPT_PREFIX,
27
27
  )
28
+ from trilogy.core.enums import Modifier
29
+
30
+
31
+ def get_upstream_modifiers(keys: List[Concept]) -> list[Modifier]:
32
+ modifiers = set()
33
+ for pkey in keys:
34
+ if pkey.modifiers:
35
+ modifiers.update(pkey.modifiers)
36
+ return list(modifiers)
28
37
 
29
38
 
30
39
  def process_function_args(
@@ -50,7 +59,7 @@ def process_function_args(
50
59
  id_hash = string_to_hash(str(arg))
51
60
  concept = function_to_concept(
52
61
  arg,
53
- name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
62
+ name=f"{VIRTUAL_CONCEPT_PREFIX}_{arg.operator.value}_{id_hash}",
54
63
  namespace=environment.namespace,
55
64
  )
56
65
  # to satisfy mypy, concept will always have metadata
@@ -125,7 +134,7 @@ def constant_to_concept(
125
134
 
126
135
 
127
136
  def function_to_concept(parent: Function, name: str, namespace: str) -> Concept:
128
- pkeys = []
137
+ pkeys: List[Concept] = []
129
138
  for x in parent.arguments:
130
139
  pkeys += [
131
140
  x
@@ -135,7 +144,7 @@ def function_to_concept(parent: Function, name: str, namespace: str) -> Concept:
135
144
  grain = Grain()
136
145
  for x in pkeys:
137
146
  grain += x.grain
138
-
147
+ modifiers = get_upstream_modifiers(pkeys)
139
148
  key_grain = []
140
149
  for x in pkeys:
141
150
  if x.keys:
@@ -155,6 +164,7 @@ def function_to_concept(parent: Function, name: str, namespace: str) -> Concept:
155
164
  namespace=namespace,
156
165
  grain=grain,
157
166
  keys=keys,
167
+ modifiers=modifiers,
158
168
  )
159
169
 
160
170
 
@@ -166,6 +176,7 @@ def filter_item_to_concept(
166
176
  metadata: Metadata | None = None,
167
177
  ) -> Concept:
168
178
  fmetadata = metadata or Metadata()
179
+ modifiers = get_upstream_modifiers(parent.content.concept_arguments)
169
180
  return Concept(
170
181
  name=name,
171
182
  datatype=parent.content.datatype,
@@ -184,6 +195,7 @@ def filter_item_to_concept(
184
195
  if parent.content.purpose == Purpose.PROPERTY
185
196
  else Grain()
186
197
  ),
198
+ modifiers=modifiers,
187
199
  )
188
200
 
189
201
 
@@ -202,6 +214,7 @@ def window_item_to_concept(
202
214
  grain += [item.expr.output]
203
215
  else:
204
216
  grain = parent.over + [parent.content.output]
217
+ modifiers = get_upstream_modifiers(parent.content.concept_arguments)
205
218
  return Concept(
206
219
  name=name,
207
220
  datatype=parent.content.datatype,
@@ -212,6 +225,7 @@ def window_item_to_concept(
212
225
  grain=Grain(components=grain),
213
226
  namespace=namespace,
214
227
  keys=keys,
228
+ modifiers=modifiers,
215
229
  )
216
230
 
217
231
 
@@ -229,6 +243,7 @@ def agg_wrapper_to_concept(
229
243
  # at that grain
230
244
  fmetadata = metadata or Metadata()
231
245
  aggfunction = parent.function
246
+ modifiers = get_upstream_modifiers(parent.concept_arguments)
232
247
  out = Concept(
233
248
  name=name,
234
249
  datatype=aggfunction.output_datatype,
@@ -238,6 +253,7 @@ def agg_wrapper_to_concept(
238
253
  grain=Grain(components=parent.by) if parent.by else Grain(),
239
254
  namespace=namespace,
240
255
  keys=tuple(parent.by) if parent.by else keys,
256
+ modifiers=modifiers,
241
257
  )
242
258
  return out
243
259
 
@@ -255,20 +271,34 @@ def arbitrary_to_concept(
255
271
  | str
256
272
  ),
257
273
  namespace: str,
258
- name: str,
274
+ name: str | None = None,
259
275
  metadata: Metadata | None = None,
260
276
  purpose: Purpose | None = None,
261
277
  ) -> Concept:
262
278
 
263
279
  if isinstance(parent, AggregateWrapper):
280
+ if not name:
281
+ name = (
282
+ f"_agg_{parent.function.operator.value}_{string_to_hash(str(parent))}"
283
+ )
264
284
  return agg_wrapper_to_concept(parent, namespace, name, metadata, purpose)
265
285
  elif isinstance(parent, WindowItem):
286
+ if not name:
287
+ name = f"_window_{parent.type.value}_{string_to_hash(str(parent))}"
266
288
  return window_item_to_concept(parent, name, namespace, purpose, metadata)
267
289
  elif isinstance(parent, FilterItem):
290
+ if not name:
291
+ name = f"_filter_{parent.content.name}_{string_to_hash(str(parent))}"
268
292
  return filter_item_to_concept(parent, name, namespace, purpose, metadata)
269
293
  elif isinstance(parent, Function):
294
+ if not name:
295
+ name = f"_func_{parent.operator.value}_{string_to_hash(str(parent))}"
270
296
  return function_to_concept(parent, name, namespace)
271
297
  elif isinstance(parent, ListWrapper):
298
+ if not name:
299
+ name = f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
272
300
  return constant_to_concept(parent, name, namespace, purpose, metadata)
273
301
  else:
302
+ if not name:
303
+ name = f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
274
304
  return constant_to_concept(parent, name, namespace, purpose, metadata)
@@ -16,7 +16,6 @@ from trilogy.core.internal import INTERNAL_NAMESPACE, ALL_ROWS_CONCEPT
16
16
  from trilogy.constants import (
17
17
  DEFAULT_NAMESPACE,
18
18
  NULL_VALUE,
19
- VIRTUAL_CONCEPT_PREFIX,
20
19
  MagicConstants,
21
20
  )
22
21
  from trilogy.core.enums import (
@@ -93,6 +92,7 @@ from trilogy.core.models import (
93
92
  WindowItemOver,
94
93
  RawColumnExpr,
95
94
  arg_to_datatype,
95
+ merge_datatypes,
96
96
  ListWrapper,
97
97
  MapWrapper,
98
98
  MapType,
@@ -109,7 +109,6 @@ from trilogy.core.models import (
109
109
  HavingClause,
110
110
  )
111
111
  from trilogy.parsing.exceptions import ParseError
112
- from trilogy.utility import string_to_hash
113
112
  from trilogy.parsing.common import (
114
113
  agg_wrapper_to_concept,
115
114
  window_item_to_concept,
@@ -739,8 +738,8 @@ class ParseToObjects(Transformer):
739
738
  x = arbitrary_to_concept(
740
739
  x,
741
740
  namespace=namespace,
742
- name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(x))}",
743
741
  )
742
+ self.environment.add_concept(x)
744
743
  return x
745
744
 
746
745
  return [
@@ -781,6 +780,11 @@ class ParseToObjects(Transformer):
781
780
  def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
782
781
  return RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
783
782
 
783
+ def resolve_import_address(self, address) -> str:
784
+ with open(address, "r", encoding="utf-8") as f:
785
+ text = f.read()
786
+ return text
787
+
784
788
  def import_statement(self, args: list[str]) -> ImportStatement:
785
789
  alias = args[-1]
786
790
  path = args[0].split(".")
@@ -790,8 +794,7 @@ class ParseToObjects(Transformer):
790
794
  nparser = self.parsed[target]
791
795
  else:
792
796
  try:
793
- with open(target, "r", encoding="utf-8") as f:
794
- text = f.read()
797
+ text = self.resolve_import_address(target)
795
798
  nparser = ParseToObjects(
796
799
  visit_tokens=True,
797
800
  text=text,
@@ -1093,7 +1096,6 @@ class ParseToObjects(Transformer):
1093
1096
  left = arbitrary_to_concept(
1094
1097
  args[0],
1095
1098
  namespace=self.environment.namespace,
1096
- name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(args[0]))}",
1097
1099
  )
1098
1100
  self.environment.add_concept(left)
1099
1101
  else:
@@ -1102,7 +1104,6 @@ class ParseToObjects(Transformer):
1102
1104
  right = arbitrary_to_concept(
1103
1105
  args[2],
1104
1106
  namespace=self.environment.namespace,
1105
- name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(args[2]))}",
1106
1107
  )
1107
1108
  self.environment.add_concept(right)
1108
1109
  else:
@@ -1137,7 +1138,6 @@ class ParseToObjects(Transformer):
1137
1138
  right = arbitrary_to_concept(
1138
1139
  right,
1139
1140
  namespace=self.environment.namespace,
1140
- name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(right))}",
1141
1141
  )
1142
1142
  self.environment.add_concept(right, meta=meta)
1143
1143
  return SubselectComparison(
@@ -1186,8 +1186,9 @@ class ParseToObjects(Transformer):
1186
1186
  def window_item_order(self, args):
1187
1187
  return WindowItemOrder(contents=args[0])
1188
1188
 
1189
- def window_item(self, args) -> WindowItem:
1190
- type = args[0]
1189
+ @v_args(meta=True)
1190
+ def window_item(self, meta, args) -> WindowItem:
1191
+ type: WindowType = args[0]
1191
1192
  order_by = []
1192
1193
  over = []
1193
1194
  index = None
@@ -1203,6 +1204,14 @@ class ParseToObjects(Transformer):
1203
1204
  concept = self.environment.concepts[item]
1204
1205
  elif isinstance(item, Concept):
1205
1206
  concept = item
1207
+ elif isinstance(item, WindowType):
1208
+ type = item
1209
+ else:
1210
+ concept = arbitrary_to_concept(
1211
+ item,
1212
+ namespace=self.environment.namespace,
1213
+ )
1214
+ self.environment.add_concept(concept, meta=meta)
1206
1215
  assert concept
1207
1216
  return WindowItem(
1208
1217
  type=type, content=concept, over=over, order_by=order_by, index=index
@@ -1697,8 +1706,7 @@ class ParseToObjects(Transformer):
1697
1706
  @v_args(meta=True)
1698
1707
  def fadd(self, meta, args) -> Function:
1699
1708
  args = process_function_args(args, meta=meta, environment=self.environment)
1700
- output_datatype = arg_to_datatype(args[0])
1701
- # TODO: check for valid transforms?
1709
+ output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1702
1710
  return Function(
1703
1711
  operator=FunctionType.ADD,
1704
1712
  arguments=args,
@@ -1711,7 +1719,7 @@ class ParseToObjects(Transformer):
1711
1719
  @v_args(meta=True)
1712
1720
  def fsub(self, meta, args) -> Function:
1713
1721
  args = process_function_args(args, meta=meta, environment=self.environment)
1714
- output_datatype = arg_to_datatype(args[0])
1722
+ output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1715
1723
  return Function(
1716
1724
  operator=FunctionType.SUBTRACT,
1717
1725
  arguments=args,
@@ -1724,7 +1732,7 @@ class ParseToObjects(Transformer):
1724
1732
  @v_args(meta=True)
1725
1733
  def fmul(self, meta, args) -> Function:
1726
1734
  args = process_function_args(args, meta=meta, environment=self.environment)
1727
- output_datatype = arg_to_datatype(args[0])
1735
+ output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1728
1736
  return Function(
1729
1737
  operator=FunctionType.MULTIPLY,
1730
1738
  arguments=args,
@@ -1736,8 +1744,8 @@ class ParseToObjects(Transformer):
1736
1744
 
1737
1745
  @v_args(meta=True)
1738
1746
  def fdiv(self, meta: Meta, args):
1739
- output_datatype = arg_to_datatype(args[0])
1740
1747
  args = process_function_args(args, meta=meta, environment=self.environment)
1748
+ output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
1741
1749
  return Function(
1742
1750
  operator=FunctionType.DIVIDE,
1743
1751
  arguments=args,
@@ -1749,12 +1757,11 @@ class ParseToObjects(Transformer):
1749
1757
 
1750
1758
  @v_args(meta=True)
1751
1759
  def fmod(self, meta: Meta, args):
1752
- output_datatype = arg_to_datatype(args[0])
1753
1760
  args = process_function_args(args, meta=meta, environment=self.environment)
1754
1761
  return Function(
1755
1762
  operator=FunctionType.MOD,
1756
1763
  arguments=args,
1757
- output_datatype=output_datatype,
1764
+ output_datatype=DataType.INTEGER,
1758
1765
  output_purpose=function_args_to_output_purpose(args),
1759
1766
  valid_inputs=[
1760
1767
  {DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
@@ -1781,12 +1788,15 @@ class ParseToObjects(Transformer):
1781
1788
 
1782
1789
  def fcase(self, args: List[Union[CaseWhen, CaseElse]]):
1783
1790
  datatypes = set()
1791
+ mapz = dict()
1784
1792
  for arg in args:
1785
1793
  output_datatype = arg_to_datatype(arg.expr)
1786
- datatypes.add(output_datatype)
1794
+ if output_datatype != DataType.NULL:
1795
+ datatypes.add(output_datatype)
1796
+ mapz[str(arg.expr)] = output_datatype
1787
1797
  if not len(datatypes) == 1:
1788
1798
  raise SyntaxError(
1789
- f"All case expressions must have the same output datatype, got {datatypes}"
1799
+ f"All case expressions must have the same output datatype, got {datatypes} from {mapz}"
1790
1800
  )
1791
1801
  return Function(
1792
1802
  operator=FunctionType.CASE,
@@ -1830,6 +1840,8 @@ def unpack_visit_error(e: VisitError):
1830
1840
  unpack_visit_error(e.orig_exc)
1831
1841
  elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
1832
1842
  raise e.orig_exc
1843
+ elif isinstance(e.orig_exc, SyntaxError):
1844
+ raise InvalidSyntaxException(str(e.orig_exc) + str(e.rule) + str(e.obj))
1833
1845
  elif isinstance(e.orig_exc, (ValidationError, TypeError)):
1834
1846
  raise InvalidSyntaxException(str(e.orig_exc) + str(e.rule) + str(e.obj))
1835
1847
  raise e
@@ -43,7 +43,7 @@
43
43
 
44
44
  query: "query" MULTILINE_STRING
45
45
 
46
- concept_assignment: SHORTHAND_MODIFIER? IDENTIFIER
46
+ concept_assignment: SHORTHAND_MODIFIER* IDENTIFIER
47
47
 
48
48
  //column_assignment
49
49
  //figure out if we want static
@@ -90,9 +90,9 @@
90
90
 
91
91
 
92
92
  // rank/lag/lead
93
- WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i) /[\s]+/
93
+ WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i | "avg"i | "max"i | "min"i ) /[\s]+/
94
94
 
95
- window_item: WINDOW_TYPE int_lit? concept_lit window_item_over? window_item_order?
95
+ window_item: WINDOW_TYPE int_lit? expr window_item_over? window_item_order?
96
96
 
97
97
  window_item_over: ("OVER"i over_list)
98
98
 
@@ -150,7 +150,7 @@
150
150
 
151
151
  subselect_comparison: expr array_comparison (literal | _constant_functions | _string_functions | concept_lit | filter_item | window_item | unnest | fgroup | expr_tuple | parenthetical )
152
152
 
153
- expr_tuple: "(" expr ("," expr)+ ","? ")"
153
+ expr_tuple: ("(" expr ("," expr)+ ","? ")") | ("(" expr "," ")")
154
154
 
155
155
  parenthetical: "(" expr ")"
156
156
 
@@ -296,7 +296,7 @@
296
296
 
297
297
  MODIFIER: "Optional"i | "Partial"i | "Nullable"i
298
298
 
299
- SHORTHAND_MODIFIER: "~"
299
+ SHORTHAND_MODIFIER: "~" | "?"
300
300
 
301
301
  struct_type: "struct"i "<" ((data_type | IDENTIFIER) ",")* (data_type | IDENTIFIER) ","? ">"
302
302