pytrilogy 0.0.2.17__py3-none-any.whl → 0.0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (40) hide show
  1. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/METADATA +12 -8
  2. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/RECORD +40 -39
  3. trilogy/__init__.py +1 -1
  4. trilogy/constants.py +1 -1
  5. trilogy/core/enums.py +1 -0
  6. trilogy/core/functions.py +11 -0
  7. trilogy/core/models.py +89 -47
  8. trilogy/core/optimization.py +15 -9
  9. trilogy/core/processing/concept_strategies_v3.py +372 -145
  10. trilogy/core/processing/node_generators/basic_node.py +27 -55
  11. trilogy/core/processing/node_generators/common.py +6 -7
  12. trilogy/core/processing/node_generators/filter_node.py +28 -31
  13. trilogy/core/processing/node_generators/group_node.py +14 -2
  14. trilogy/core/processing/node_generators/group_to_node.py +3 -1
  15. trilogy/core/processing/node_generators/multiselect_node.py +3 -0
  16. trilogy/core/processing/node_generators/node_merge_node.py +14 -9
  17. trilogy/core/processing/node_generators/rowset_node.py +12 -12
  18. trilogy/core/processing/node_generators/select_merge_node.py +302 -0
  19. trilogy/core/processing/node_generators/select_node.py +7 -511
  20. trilogy/core/processing/node_generators/unnest_node.py +4 -3
  21. trilogy/core/processing/node_generators/window_node.py +12 -37
  22. trilogy/core/processing/nodes/__init__.py +0 -2
  23. trilogy/core/processing/nodes/base_node.py +69 -20
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +18 -17
  26. trilogy/core/processing/nodes/merge_node.py +4 -10
  27. trilogy/core/processing/nodes/select_node_v2.py +28 -14
  28. trilogy/core/processing/nodes/window_node.py +1 -2
  29. trilogy/core/processing/utility.py +51 -3
  30. trilogy/core/query_processor.py +17 -73
  31. trilogy/dialect/base.py +7 -3
  32. trilogy/dialect/duckdb.py +4 -1
  33. trilogy/dialect/sql_server.py +3 -3
  34. trilogy/hooks/query_debugger.py +5 -3
  35. trilogy/parsing/parse_engine.py +66 -38
  36. trilogy/parsing/trilogy.lark +2 -1
  37. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/LICENSE.md +0 -0
  38. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/WHEEL +0 -0
  39. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/entry_points.txt +0 -0
  40. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.18.dist-info}/top_level.txt +0 -0
@@ -4,9 +4,9 @@ from trilogy.core.env_processor import generate_graph
4
4
  from trilogy.core.graph_models import ReferenceGraph
5
5
  from trilogy.core.constants import CONSTANT_DATASET
6
6
  from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
7
- from trilogy.core.enums import SelectFiltering, BooleanOperator
7
+ from trilogy.core.enums import BooleanOperator
8
8
  from trilogy.constants import CONFIG, DEFAULT_NAMESPACE
9
- from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode, History
9
+ from trilogy.core.processing.nodes import SelectNode, StrategyNode, History
10
10
  from trilogy.core.models import (
11
11
  Concept,
12
12
  Environment,
@@ -193,8 +193,7 @@ def resolve_cte_base_name_and_alias_v2(
193
193
  ) -> Tuple[str | None, str | None]:
194
194
  joins: List[Join] = [join for join in raw_joins if isinstance(join, Join)]
195
195
  if (
196
- len(source.datasources) == 1
197
- and isinstance(source.datasources[0], Datasource)
196
+ isinstance(source.datasources[0], Datasource)
198
197
  and not source.datasources[0].name == CONSTANT_DATASET
199
198
  ):
200
199
  ds = source.datasources[0]
@@ -265,9 +264,7 @@ def datasource_to_ctes(
265
264
  existence_map = source_map
266
265
 
267
266
  human_id = generate_cte_name(query_datasource.full_name, name_map)
268
- logger.info(
269
- f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, query_datasource had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} and existence had non-empty keys {[k for k, v in query_datasource.existence_source_map.items() if v]} "
270
- )
267
+
271
268
  final_joins = [
272
269
  x
273
270
  for x in [base_join_to_join(join, parents) for join in query_datasource.joins]
@@ -317,29 +314,6 @@ def datasource_to_ctes(
317
314
  return output
318
315
 
319
316
 
320
- def append_existence_check(
321
- node: StrategyNode,
322
- environment: Environment,
323
- graph: ReferenceGraph,
324
- history: History | None = None,
325
- ):
326
- # we if we have a where clause doing an existence check
327
- # treat that as separate subquery
328
- if (where := node.conditions) and where.existence_arguments:
329
- for subselect in where.existence_arguments:
330
- if not subselect:
331
- continue
332
-
333
- eds = source_query_concepts(
334
- [*subselect], environment=environment, g=graph, history=history
335
- )
336
- logger.info(
337
- f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
338
- )
339
- node.add_parents([eds])
340
- node.add_existence_concepts([*subselect])
341
-
342
-
343
317
  def get_query_node(
344
318
  environment: Environment,
345
319
  statement: SelectStatement | MultiSelectStatement,
@@ -354,16 +328,6 @@ def get_query_node(
354
328
  raise ValueError(f"Statement has no output components {statement}")
355
329
 
356
330
  search_concepts: list[Concept] = statement.output_components
357
- nest_where = statement.where_clause_category == SelectFiltering.IMPLICIT
358
-
359
- # if all are aggregates, we've pushed the filtering inside the aggregates anyway
360
- all_aggregate = all([x.is_aggregate for x in search_concepts])
361
-
362
- if nest_where and statement.where_clause and not all_aggregate:
363
- search_concepts = unique(
364
- statement.where_clause.row_arguments + search_concepts, "address"
365
- )
366
- nest_where = True
367
331
 
368
332
  ods: StrategyNode = source_query_concepts(
369
333
  search_concepts,
@@ -376,45 +340,24 @@ def get_query_node(
376
340
  raise ValueError(
377
341
  f"Could not find source query concepts for {[x.address for x in search_concepts]}"
378
342
  )
379
- ds: StrategyNode
380
- if nest_where and statement.where_clause:
381
- if not all_aggregate:
382
- ods.conditions = statement.where_clause.conditional
383
- ods.set_output_concepts(statement.output_components)
384
- append_existence_check(ods, environment, graph, history)
385
- ds = GroupNode(
386
- output_concepts=statement.output_components,
387
- input_concepts=statement.output_components,
388
- parents=[ods],
389
- environment=ods.environment,
390
- g=ods.g,
391
- partial_concepts=ods.partial_concepts,
392
- )
393
- # we can still check existence here.
394
-
395
- elif statement.where_clause:
396
- ds = SelectNode(
397
- output_concepts=statement.output_components,
398
- input_concepts=ods.input_concepts,
399
- parents=[ods],
400
- environment=ods.environment,
401
- g=ods.g,
402
- partial_concepts=ods.partial_concepts,
403
- conditions=statement.where_clause.conditional,
404
- )
405
- append_existence_check(ds, environment, graph)
406
-
407
- else:
408
- ds = ods
343
+ ds: StrategyNode = ods
409
344
  if statement.having_clause:
345
+ final = statement.having_clause.conditional
410
346
  if ds.conditions:
411
- ds.conditions = Conditional(
347
+ final = Conditional(
412
348
  left=ds.conditions,
413
349
  right=statement.having_clause.conditional,
414
350
  operator=BooleanOperator.AND,
415
351
  )
416
- else:
417
- ds.conditions = statement.having_clause.conditional
352
+ ds = SelectNode(
353
+ output_concepts=statement.output_components,
354
+ input_concepts=ds.output_concepts,
355
+ parents=[ds],
356
+ environment=ds.environment,
357
+ g=ds.g,
358
+ partial_concepts=ds.partial_concepts,
359
+ conditions=final,
360
+ )
418
361
  return ds
419
362
 
420
363
 
@@ -477,6 +420,7 @@ def process_query(
477
420
  hooks: List[BaseHook] | None = None,
478
421
  ) -> ProcessedQuery:
479
422
  hooks = hooks or []
423
+ statement.refresh_bindings(environment)
480
424
  graph = generate_graph(environment)
481
425
  root_datasource = get_query_datasources(
482
426
  environment=environment, graph=graph, statement=statement, hooks=hooks
trilogy/dialect/base.py CHANGED
@@ -121,6 +121,7 @@ FUNCTION_MAP = {
121
121
  FunctionType.CASE: lambda x: render_case(x),
122
122
  FunctionType.SPLIT: lambda x: f"split({x[0]}, {x[1]})",
123
123
  FunctionType.IS_NULL: lambda x: f"isnull({x[0]})",
124
+ FunctionType.BOOL: lambda x: f"CASE WHEN {x[0]} THEN TRUE ELSE FALSE END",
124
125
  # complex
125
126
  FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
126
127
  FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}][1]",
@@ -176,8 +177,8 @@ FUNCTION_MAP = {
176
177
 
177
178
  FUNCTION_GRAIN_MATCH_MAP = {
178
179
  **FUNCTION_MAP,
179
- FunctionType.COUNT_DISTINCT: lambda args: f"{args[0]}",
180
- FunctionType.COUNT: lambda args: f"{args[0]}",
180
+ FunctionType.COUNT_DISTINCT: lambda args: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
181
+ FunctionType.COUNT: lambda args: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
181
182
  FunctionType.SUM: lambda args: f"{args[0]}",
182
183
  FunctionType.AVG: lambda args: f"{args[0]}",
183
184
  FunctionType.MAX: lambda args: f"{args[0]}",
@@ -582,8 +583,11 @@ class BaseDialect:
582
583
  having: Conditional | Parenthetical | Comparison | None = None
583
584
  materialized = {x for x, v in cte.source_map.items() if v}
584
585
  if cte.condition:
585
- if is_scalar_condition(cte.condition, materialized=materialized):
586
+ if not cte.group_to_grain or is_scalar_condition(
587
+ cte.condition, materialized=materialized
588
+ ):
586
589
  where = cte.condition
590
+
587
591
  else:
588
592
  components = decompose_condition(cte.condition)
589
593
  for x in components:
trilogy/dialect/duckdb.py CHANGED
@@ -36,9 +36,12 @@ FUNCTION_MAP = {
36
36
  # we may return a static value
37
37
  FUNCTION_GRAIN_MATCH_MAP = {
38
38
  **FUNCTION_MAP,
39
- FunctionType.COUNT: lambda args: f"{args[0]}",
39
+ FunctionType.COUNT_DISTINCT: lambda args: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
40
+ FunctionType.COUNT: lambda args: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
40
41
  FunctionType.SUM: lambda args: f"{args[0]}",
41
42
  FunctionType.AVG: lambda args: f"{args[0]}",
43
+ FunctionType.MAX: lambda args: f"{args[0]}",
44
+ FunctionType.MIN: lambda args: f"{args[0]}",
42
45
  }
43
46
 
44
47
  DUCKDB_TEMPLATE = Template(
@@ -41,9 +41,9 @@ TSQL_TEMPLATE = Template(
41
41
  """{%- if ctes %}
42
42
  WITH {% for cte in ctes %}
43
43
  {{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
44
- {%- if full_select -%}
45
- {{full_select}}
46
- {%- else -%}
44
+ {%- if full_select -%}{{full_select}}
45
+ {%- else -%}{%- if comment %}
46
+ -- {{ comment }}{% endif %}
47
47
  SELECT
48
48
  {%- if limit is not none %}
49
49
  TOP {{ limit }}{% endif %}
@@ -31,6 +31,9 @@ def print_recursive_resolved(
31
31
  extra.append("filter")
32
32
  if input.group_required:
33
33
  extra.append("group")
34
+ output = [c.address for c in input.output_concepts[:3]]
35
+ if len(input.output_concepts) > 3:
36
+ output.append("...")
34
37
  display = [
35
38
  (
36
39
  " " * depth,
@@ -40,7 +43,7 @@ def print_recursive_resolved(
40
43
  ">",
41
44
  # [c.address for c in input.input_concepts],
42
45
  "->",
43
- [c.address for c in input.output_concepts],
46
+ output,
44
47
  )
45
48
  ]
46
49
  if isinstance(input, QueryDatasource):
@@ -86,7 +89,6 @@ def print_recursive_ctes(input: CTE, depth: int = 0, max_depth: int | None = Non
86
89
  sql = renderer.render_cte(input).statement
87
90
  for line in sql.split("\n"):
88
91
  logger.debug(" " * (depth) + line)
89
- print("-----")
90
92
  if isinstance(input, CTE):
91
93
  for child in input.parent_ctes:
92
94
  print_recursive_ctes(child, depth + 1)
@@ -130,5 +132,5 @@ class DebuggingHook(BaseHook):
130
132
  if self.process_nodes != PrintMode.OFF:
131
133
  printed = print_recursive_nodes(node, mode=self.process_nodes)
132
134
  for row in printed:
133
- logger.info("".join([str(v) for v in row]))
135
+ # logger.info("".join([str(v) for v in row]))
134
136
  print("".join([str(v) for v in row]))
@@ -30,7 +30,7 @@ from trilogy.core.enums import (
30
30
  WindowType,
31
31
  DatePart,
32
32
  ShowCategory,
33
- SelectFiltering,
33
+ FunctionClass,
34
34
  )
35
35
  from trilogy.core.exceptions import InvalidSyntaxException, UndefinedConceptException
36
36
  from trilogy.core.functions import (
@@ -50,6 +50,7 @@ from trilogy.core.functions import (
50
50
  CurrentDate,
51
51
  CurrentDatetime,
52
52
  IsNull,
53
+ Bool,
53
54
  SubString,
54
55
  StrPos,
55
56
  )
@@ -840,6 +841,7 @@ class ParseToObjects(Transformer):
840
841
  grain: Grain | None = args[3]
841
842
  else:
842
843
  grain = None
844
+
843
845
  new_datasource = select.to_datasource(
844
846
  namespace=(
845
847
  self.environment.namespace
@@ -930,7 +932,8 @@ class ParseToObjects(Transformer):
930
932
  order_by=order_by,
931
933
  meta=Metadata(line_number=meta.line),
932
934
  )
933
-
935
+ locally_derived: set[str] = set()
936
+ all_in_output: set[str] = set()
934
937
  for item in select_items:
935
938
  # we don't know the grain of an aggregate at assignment time
936
939
  # so rebuild at this point in the tree
@@ -938,59 +941,79 @@ class ParseToObjects(Transformer):
938
941
  if isinstance(item.content, ConceptTransform):
939
942
  new_concept = item.content.output.with_select_context(
940
943
  output.grain,
941
- conditional=(
942
- output.where_clause.conditional
943
- if output.where_clause
944
- and output.where_clause_category == SelectFiltering.IMPLICIT
945
- else None
946
- ),
944
+ conditional=None,
945
+ # conditional=(
946
+ # output.where_clause.conditional
947
+ # if output.where_clause
948
+ # and output.where_clause_category == SelectFiltering.IMPLICIT
949
+ # else None
950
+ # ),
947
951
  environment=self.environment,
948
952
  )
949
953
  self.environment.add_concept(new_concept, meta=meta)
950
954
  item.content.output = new_concept
955
+ locally_derived.add(new_concept.address)
956
+ all_in_output.add(new_concept.address)
951
957
  elif isinstance(item.content, Concept):
952
958
  # Sometimes cached values here don't have the latest info
953
- # bug we can't just use environment, as it might not have the right grain.
959
+ # but we can't just use environment, as it might not have the right grain.
954
960
  item.content = self.environment.concepts[
955
961
  item.content.address
956
962
  ].with_grain(item.content.grain)
957
- # TODO: revisit if we can push down every filter
958
- # else:
959
- # item.content = (
960
- # item.content.with_filter(
961
- # output.where_clause.conditional, environment=self.environment
962
- # )
963
- # if output.where_clause
964
- # and output.where_clause_category == SelectFiltering.IMPLICIT
965
- # else item.content
966
- # )
967
-
963
+ all_in_output.add(item.content.address)
968
964
  if order_by:
969
965
  for orderitem in order_by.items:
970
966
  if isinstance(orderitem.expr, Concept):
971
967
  if orderitem.expr.purpose == Purpose.METRIC:
972
968
  orderitem.expr = orderitem.expr.with_select_context(
973
969
  output.grain,
974
- conditional=(
975
- output.where_clause.conditional
976
- if output.where_clause
977
- and output.where_clause_category
978
- == SelectFiltering.IMPLICIT
979
- else None
980
- ),
970
+ conditional=None,
971
+ # conditional=(
972
+ # output.where_clause.conditional
973
+ # if output.where_clause
974
+ # and output.where_clause_category
975
+ # == SelectFiltering.IMPLICIT
976
+ # else None
977
+ # ),
981
978
  environment=self.environment,
982
979
  )
983
- # TODO :push down every filter
984
- # else:
985
- # orderitem.expr = (
986
- # orderitem.expr.with_filter(
987
- # output.where_clause.conditional,
988
- # environment=self.environment,
989
- # )
990
- # if output.where_clause
991
- # and output.where_clause_category == SelectFiltering.IMPLICIT
992
- # else orderitem.expr
993
- # )
980
+ if output.where_clause:
981
+ for concept in output.where_clause.concept_arguments:
982
+
983
+ if (
984
+ concept.lineage
985
+ and isinstance(concept.lineage, Function)
986
+ and concept.lineage.operator
987
+ in FunctionClass.AGGREGATE_FUNCTIONS.value
988
+ ):
989
+ if concept.address in locally_derived:
990
+ raise SyntaxError(
991
+ f"Cannot reference an aggregate derived in the select ({concept.address}) in the same statement where clause; move to the HAVING clause instead; Line: {meta.line}"
992
+ )
993
+
994
+ if (
995
+ concept.lineage
996
+ and isinstance(concept.lineage, AggregateWrapper)
997
+ and concept.lineage.function.operator
998
+ in FunctionClass.AGGREGATE_FUNCTIONS.value
999
+ ):
1000
+ if concept.address in locally_derived:
1001
+ raise SyntaxError(
1002
+ f"Cannot reference an aggregate derived in the select ({concept.address}) in the same statement where clause; move to the HAVING clause instead; Line: {meta.line}"
1003
+ )
1004
+ if output.having_clause:
1005
+ for concept in output.having_clause.concept_arguments:
1006
+ if concept.address not in all_in_output:
1007
+ raise SyntaxError(
1008
+ f"Cannot reference a column ({concept.address}) that is not in the select projection in the HAVING clause, move to WHERE; Line: {meta.line}"
1009
+ )
1010
+ if output.order_by:
1011
+ for concept in output.order_by.concept_arguments:
1012
+ if concept.address not in all_in_output:
1013
+ raise SyntaxError(
1014
+ f"Cannot order by a column that is not in the output projection; {meta.line}"
1015
+ )
1016
+
994
1017
  return output
995
1018
 
996
1019
  @v_args(meta=True)
@@ -1832,6 +1855,11 @@ class ParseToObjects(Transformer):
1832
1855
  args = process_function_args(args, meta=meta, environment=self.environment)
1833
1856
  return IsNull(args)
1834
1857
 
1858
+ @v_args(meta=True)
1859
+ def fbool(self, meta, args):
1860
+ args = process_function_args(args, meta=meta, environment=self.environment)
1861
+ return Bool(args)
1862
+
1835
1863
 
1836
1864
  def unpack_visit_error(e: VisitError):
1837
1865
  """This is required to get exceptions from imports, which would
@@ -187,8 +187,9 @@
187
187
  fcase: "CASE"i (fcase_when)* (fcase_else)? "END"i
188
188
  len: "len"i "(" expr ")"
189
189
  fnot: "NOT"i expr
190
+ fbool: "bool"i "(" expr ")"
190
191
 
191
- _generic_functions: fcast | concat | fcoalesce | fcase | len | fnot
192
+ _generic_functions: fcast | concat | fcoalesce | fcase | len | fnot | fbool
192
193
 
193
194
  //constant
194
195
  CURRENT_DATE.1: /current_date\(\)/