pytrilogy 0.0.2.2__py3-none-any.whl → 0.0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (31) hide show
  1. {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/METADATA +3 -3
  2. {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/RECORD +31 -31
  3. trilogy/__init__.py +1 -1
  4. trilogy/core/enums.py +2 -0
  5. trilogy/core/env_processor.py +5 -0
  6. trilogy/core/functions.py +39 -6
  7. trilogy/core/models.py +81 -9
  8. trilogy/core/optimization.py +46 -31
  9. trilogy/core/optimizations/predicate_pushdown.py +33 -8
  10. trilogy/core/processing/concept_strategies_v3.py +10 -0
  11. trilogy/core/processing/node_generators/basic_node.py +1 -1
  12. trilogy/core/processing/node_generators/common.py +3 -3
  13. trilogy/core/processing/node_generators/filter_node.py +20 -16
  14. trilogy/core/processing/node_generators/node_merge_node.py +46 -108
  15. trilogy/core/processing/nodes/group_node.py +28 -2
  16. trilogy/core/processing/utility.py +56 -32
  17. trilogy/core/query_processor.py +10 -3
  18. trilogy/dialect/base.py +62 -88
  19. trilogy/dialect/bigquery.py +3 -1
  20. trilogy/dialect/duckdb.py +5 -4
  21. trilogy/dialect/postgres.py +3 -1
  22. trilogy/dialect/presto.py +4 -1
  23. trilogy/dialect/snowflake.py +3 -1
  24. trilogy/dialect/sql_server.py +3 -1
  25. trilogy/parsing/common.py +5 -1
  26. trilogy/parsing/parse_engine.py +94 -6
  27. trilogy/parsing/trilogy.lark +11 -4
  28. {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/LICENSE.md +0 -0
  29. {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/WHEEL +0 -0
  30. {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/entry_points.txt +0 -0
  31. {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/top_level.txt +0 -0
@@ -47,7 +47,7 @@ def resolve_function_parent_concepts(concept: Concept) -> List[Concept]:
47
47
 
48
48
  def resolve_filter_parent_concepts(
49
49
  concept: Concept,
50
- ) -> Tuple[Concept, List[Concept], List[Concept]]:
50
+ ) -> Tuple[Concept, List[Concept], List[Tuple[Concept, ...]]]:
51
51
  if not isinstance(concept.lineage, FilterItem):
52
52
  raise ValueError(
53
53
  f"Concept {concept} lineage is not filter item, is {type(concept.lineage)}"
@@ -58,7 +58,7 @@ def resolve_filter_parent_concepts(
58
58
  base_rows += concept.lineage.where.row_arguments
59
59
  # TODO: pass tuple groups through
60
60
  for ctuple in concept.lineage.where.existence_arguments:
61
- base_existence += list(ctuple)
61
+ base_existence.append(ctuple)
62
62
  if direct_parent.grain:
63
63
  base_rows += direct_parent.grain.components_copy
64
64
  if (
@@ -71,7 +71,7 @@ def resolve_filter_parent_concepts(
71
71
  return (
72
72
  concept.lineage.content,
73
73
  unique(base_rows, "address"),
74
- unique(base_existence, "address"),
74
+ base_existence,
75
75
  )
76
76
  return concept.lineage.content, unique(base_rows, "address"), []
77
77
 
@@ -74,27 +74,31 @@ def gen_filter_node(
74
74
  return parent
75
75
 
76
76
  core_parents.append(parent)
77
+
77
78
  if parent_existence_concepts:
78
- logger.info(
79
- f"{padding(depth)}{LOGGER_PREFIX} fetching filter node existence parents {[x.address for x in parent_existence_concepts]}"
80
- )
81
- parent_existence = source_concepts(
82
- mandatory_list=parent_existence_concepts,
83
- environment=environment,
84
- g=g,
85
- depth=depth + 1,
86
- history=history,
87
- )
88
- if not parent_existence:
79
+ for existence_tuple in parent_existence_concepts:
80
+ if not existence_tuple:
81
+ continue
89
82
  logger.info(
90
- f"{padding(depth)}{LOGGER_PREFIX} filter existence node parents could not be found"
83
+ f"{padding(depth)}{LOGGER_PREFIX} fetching filter node existence parents {[x.address for x in existence_tuple]}"
91
84
  )
92
- return None
93
- core_parents.append(parent_existence)
94
-
85
+ parent_existence = source_concepts(
86
+ mandatory_list=list(existence_tuple),
87
+ environment=environment,
88
+ g=g,
89
+ depth=depth + 1,
90
+ history=history,
91
+ )
92
+ if not parent_existence:
93
+ logger.info(
94
+ f"{padding(depth)}{LOGGER_PREFIX} filter existence node parents could not be found"
95
+ )
96
+ return None
97
+ core_parents.append(parent_existence)
98
+ flattened_existence = [x for y in parent_existence_concepts for x in y]
95
99
  filter_node = FilterNode(
96
100
  input_concepts=unique(
97
- [immediate_parent] + parent_row_concepts + parent_existence_concepts,
101
+ [immediate_parent] + parent_row_concepts + flattened_existence,
98
102
  "address",
99
103
  ),
100
104
  output_concepts=[concept, immediate_parent] + parent_row_concepts,
@@ -4,7 +4,6 @@ from trilogy.core.models import Concept, Environment, Conditional
4
4
  from trilogy.core.processing.nodes import MergeNode, History, StrategyNode
5
5
  import networkx as nx
6
6
  from trilogy.core.graph_models import concept_to_node
7
- from trilogy.core.processing.utility import PathInfo
8
7
  from trilogy.constants import logger
9
8
  from trilogy.utility import unique
10
9
  from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
@@ -63,7 +62,9 @@ def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]
63
62
  if not str(x).startswith("ds~")
64
63
  ]
65
64
  )
66
-
65
+ # if we had no ego graphs, return all concepts
66
+ if not graphs:
67
+ return [[extract_address(node) for node in nodelist]]
67
68
  graphs = filter_unique_graphs(graphs)
68
69
  for node in nodelist:
69
70
  parsed = extract_address(node)
@@ -82,6 +83,7 @@ def determine_induced_minimal_nodes(
82
83
  H: nx.Graph = nx.to_undirected(G).copy()
83
84
  nodes_to_remove = []
84
85
  concepts = nx.get_node_attributes(G, "concept")
86
+
85
87
  for node in G.nodes:
86
88
  if concepts.get(node):
87
89
  lookup = concepts[node]
@@ -107,9 +109,11 @@ def determine_induced_minimal_nodes(
107
109
  paths = nx.multi_source_dijkstra_path(H, nodelist)
108
110
  except nx.exception.NodeNotFound:
109
111
  return None
112
+
110
113
  H.remove_nodes_from(list(x for x in H.nodes if x not in paths))
111
114
  sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist).copy()
112
115
  final: nx.DiGraph = nx.subgraph(G, sG.nodes).copy()
116
+
113
117
  for edge in G.edges:
114
118
  if edge[1] in final.nodes and edge[0].startswith("ds~"):
115
119
  ds_name = extract_address(edge[0])
@@ -125,6 +129,7 @@ def determine_induced_minimal_nodes(
125
129
  [final.in_degree(node) > 0 for node in final.nodes if node.startswith("c~")]
126
130
  ):
127
131
  return None
132
+
128
133
  if not all([node in final.nodes for node in nodelist]):
129
134
  return None
130
135
  return final
@@ -308,111 +313,44 @@ def gen_merge_node(
308
313
  history: History | None = None,
309
314
  conditions: Conditional | None = None,
310
315
  ) -> Optional[MergeNode]:
311
- join_candidates: List[PathInfo] = []
312
-
313
- # inject new concepts into search, and identify if two dses can reach there
314
- if not join_candidates:
315
- for filter_downstream in [True, False]:
316
- weak_resolve = resolve_weak_components(
317
- all_concepts,
318
- environment,
319
- g,
320
- filter_downstream=filter_downstream,
321
- accept_partial=accept_partial,
322
- )
323
- if weak_resolve:
324
- log_graph = [[y.address for y in x] for x in weak_resolve]
325
- logger.info(
326
- f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
327
- )
328
- return subgraphs_to_merge_node(
329
- weak_resolve,
330
- depth=depth,
331
- all_concepts=all_concepts,
332
- environment=environment,
333
- g=g,
334
- source_concepts=source_concepts,
335
- history=history,
336
- conditions=conditions,
337
- )
338
- if not join_candidates:
339
- return None
340
- join_additions: list[set[str]] = []
341
- for candidate in join_candidates:
342
- join_additions.append(candidate.reduced_concepts)
343
-
344
- common: set[str] = set()
345
- final_candidates: list[set[str]] = []
346
- # find all values that show up in every join_additions
347
- for ja in join_additions:
348
- if not common:
349
- common = ja
350
- else:
351
- common = common.intersection(ja)
352
- if all(ja.issubset(y) for y in join_additions):
353
- final_candidates.append(ja)
354
316
 
355
- if not final_candidates:
356
- filtered_paths = [x.difference(common) for x in join_additions]
357
- raise AmbiguousRelationshipResolutionException(
358
- f"Ambiguous concept join resolution fetching {[x.address for x in all_concepts]} - unique values in possible paths = {filtered_paths}. Include an additional concept to disambiguate",
359
- join_additions,
360
- )
361
- if not join_candidates:
362
- logger.info(
363
- f"{padding(depth)}{LOGGER_PREFIX} No additional join candidates could be found"
317
+ for filter_downstream in [True, False]:
318
+ weak_resolve = resolve_weak_components(
319
+ all_concepts,
320
+ environment,
321
+ g,
322
+ filter_downstream=filter_downstream,
323
+ accept_partial=accept_partial,
364
324
  )
365
- return None
366
- shortest: PathInfo = sorted(
367
- [x for x in join_candidates if x.reduced_concepts in final_candidates],
368
- key=lambda x: len(x.reduced_concepts),
369
- )[0]
370
- logger.info(f"{padding(depth)}{LOGGER_PREFIX} final path is {shortest.paths}")
371
-
372
- return subgraphs_to_merge_node(
373
- shortest.concept_subgraphs,
374
- depth=depth,
375
- all_concepts=all_concepts,
376
- environment=environment,
377
- g=g,
378
- source_concepts=source_concepts,
379
- history=history,
380
- conditions=conditions,
381
- )
382
- # parents = []
383
- # for graph in shortest.concept_subgraphs:
384
- # logger.info(
385
- # f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
386
- # )
387
- # parent = source_concepts(
388
- # mandatory_list=graph,
389
- # environment=environment,
390
- # g=g,
391
- # depth=depth + 1,
392
- # history=history,
393
- # )
394
- # if not parent:
395
- # logger.info(
396
- # f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
397
- # )
398
- # return None
399
- # logger.info(
400
- # f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)}"
401
- # )
402
- # parents.append(parent)
403
-
404
- # return MergeNode(
405
- # input_concepts=[
406
- # environment.concepts[x]
407
- # for x in shortest.reduced_concepts
408
- # if environment.concepts[x].derivation != PurposeLineage.MERGE
409
- # ],
410
- # output_concepts=[
411
- # x for x in all_concepts if x.derivation != PurposeLineage.MERGE
412
- # ],
413
- # environment=environment,
414
- # g=g,
415
- # parents=parents,
416
- # depth=depth,
417
- # conditions=conditions,
418
- # )
325
+ if weak_resolve:
326
+ log_graph = [[y.address for y in x] for x in weak_resolve]
327
+ logger.info(
328
+ f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
329
+ )
330
+ return subgraphs_to_merge_node(
331
+ weak_resolve,
332
+ depth=depth,
333
+ all_concepts=all_concepts,
334
+ environment=environment,
335
+ g=g,
336
+ source_concepts=source_concepts,
337
+ history=history,
338
+ conditions=conditions,
339
+ )
340
+ # one concept handling may need to be kicked to alias
341
+ if len(all_concepts) == 1:
342
+ concept = all_concepts[0]
343
+ for k, v in concept.pseudonyms.items():
344
+ test = subgraphs_to_merge_node(
345
+ [[concept, v]],
346
+ g=g,
347
+ all_concepts=[concept],
348
+ environment=environment,
349
+ depth=depth,
350
+ source_concepts=source_concepts,
351
+ history=history,
352
+ conditions=conditions,
353
+ )
354
+ if test:
355
+ return test
356
+ return None
@@ -19,7 +19,7 @@ from trilogy.core.processing.nodes.base_node import (
19
19
  concept_list_to_grain,
20
20
  )
21
21
  from trilogy.utility import unique
22
-
22
+ from trilogy.core.processing.utility import is_scalar_condition
23
23
 
24
24
  LOGGER_PREFIX = "[CONCEPT DETAIL - GROUP NODE]"
25
25
 
@@ -111,7 +111,8 @@ class GroupNode(StrategyNode):
111
111
  f" {parent.grain}"
112
112
  )
113
113
  source_type = SourceType.GROUP
114
- return QueryDatasource(
114
+
115
+ base = QueryDatasource(
115
116
  input_concepts=self.input_concepts,
116
117
  output_concepts=self.output_concepts,
117
118
  datasources=parent_sources,
@@ -134,6 +135,31 @@ class GroupNode(StrategyNode):
134
135
  partial_concepts=self.partial_concepts,
135
136
  condition=self.conditions,
136
137
  )
138
+ # if there is a condition on a group node and it's not scalar
139
+ # inject an additional CTE
140
+ if self.conditions:
141
+ logger.info("CONDITIONS")
142
+ logger.info(str(self.conditions))
143
+ logger.info(is_scalar_condition(self.conditions))
144
+ if self.conditions and not is_scalar_condition(self.conditions):
145
+ base.condition = None
146
+ base.output_concepts = self.output_concepts + self.conditions.row_arguments
147
+ return QueryDatasource(
148
+ input_concepts=base.output_concepts,
149
+ output_concepts=self.output_concepts,
150
+ datasources=[base],
151
+ source_type=SourceType.SELECT,
152
+ source_map=resolve_concept_map(
153
+ [base],
154
+ targets=self.output_concepts,
155
+ inherited_inputs=base.output_concepts,
156
+ ),
157
+ joins=[],
158
+ grain=grain,
159
+ partial_concepts=self.partial_concepts,
160
+ condition=self.conditions,
161
+ )
162
+ return base
137
163
 
138
164
  def copy(self) -> "GroupNode":
139
165
  return GroupNode(
@@ -8,6 +8,16 @@ from trilogy.core.models import (
8
8
  QueryDatasource,
9
9
  LooseConceptList,
10
10
  Environment,
11
+ Conditional,
12
+ SubselectComparison,
13
+ Comparison,
14
+ Parenthetical,
15
+ Function,
16
+ FilterItem,
17
+ MagicConstants,
18
+ WindowItem,
19
+ AggregateWrapper,
20
+ DataType,
11
21
  )
12
22
 
13
23
  from trilogy.core.enums import Purpose, Granularity
@@ -18,6 +28,8 @@ from collections import defaultdict
18
28
  from logging import Logger
19
29
  from pydantic import BaseModel
20
30
 
31
+ from trilogy.core.enums import FunctionClass
32
+
21
33
 
22
34
  class NodeType(Enum):
23
35
  CONCEPT = 1
@@ -124,15 +136,20 @@ def resolve_join_order(joins: List[BaseJoin]) -> List[BaseJoin]:
124
136
  return final_joins
125
137
 
126
138
 
127
- def add_node_join_concept(graph, concept, datasource, concepts):
128
- # we don't need to join on a concept if all of the keys exist in the grain
129
- # if concept.keys and all([x in grain for x in concept.keys]):
130
- # continue
139
+ def add_node_join_concept(
140
+ graph: nx.DiGraph,
141
+ concept: Concept,
142
+ datasource: Datasource | QueryDatasource,
143
+ concepts: List[Concept],
144
+ ):
145
+
131
146
  concepts.append(concept)
132
147
 
133
148
  graph.add_node(concept.address, type=NodeType.CONCEPT)
134
149
  graph.add_edge(datasource.identifier, concept.address)
135
- for k, v in concept.pseudonyms.items():
150
+ for _, v in concept.pseudonyms.items():
151
+ if v in concepts:
152
+ continue
136
153
  if v.address != concept.address:
137
154
  add_node_join_concept(graph, v, datasource, concepts)
138
155
 
@@ -149,13 +166,6 @@ def get_node_joins(
149
166
  graph.add_node(datasource.identifier, type=NodeType.NODE)
150
167
  for concept in datasource.output_concepts:
151
168
  add_node_join_concept(graph, concept, datasource, concepts)
152
- # we don't need to join on a concept if all of the keys exist in the grain
153
- # if concept.keys and all([x in grain for x in concept.keys]):
154
- # continue
155
- # concepts.append(concept)
156
-
157
- # graph.add_node(concept.address, type=NodeType.CONCEPT)
158
- # graph.add_edge(datasource.identifier, concept.address)
159
169
 
160
170
  # add edges for every constant to every datasource
161
171
  for datasource in datasources:
@@ -195,26 +205,6 @@ def get_node_joins(
195
205
  ),
196
206
  )
197
207
 
198
- node_map = {
199
- x[0:20]: len(
200
- [
201
- partial
202
- for partial in identifier_map[x].partial_concepts
203
- if partial in grain
204
- ]
205
- + [
206
- output
207
- for output in identifier_map[x].output_concepts
208
- if output.address in grain_pseudonyms
209
- ]
210
- )
211
- for x in node_list
212
- }
213
- print("NODE MAP")
214
- print(node_map)
215
- print([x.address for x in grain])
216
- print(grain_pseudonyms)
217
-
218
208
  for left in node_list:
219
209
  # the constant dataset is a special case
220
210
  # and can never be on the left of a join
@@ -354,3 +344,37 @@ def get_disconnected_components(
354
344
  x for x in sub_graphs if calculate_graph_relevance(graph, x, all_concepts) > 0
355
345
  ]
356
346
  return len(sub_graphs), sub_graphs
347
+
348
+
349
+ def is_scalar_condition(
350
+ element: (
351
+ int
352
+ | str
353
+ | float
354
+ | list
355
+ | WindowItem
356
+ | FilterItem
357
+ | Concept
358
+ | Comparison
359
+ | Conditional
360
+ | Parenthetical
361
+ | Function
362
+ | AggregateWrapper
363
+ | MagicConstants
364
+ | DataType
365
+ ),
366
+ ) -> bool:
367
+ if isinstance(element, Parenthetical):
368
+ return is_scalar_condition(element.content)
369
+ elif isinstance(element, SubselectComparison):
370
+ return True
371
+ elif isinstance(element, Comparison):
372
+ return is_scalar_condition(element.left) and is_scalar_condition(element.right)
373
+ elif isinstance(element, Function):
374
+ if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
375
+ return False
376
+ elif isinstance(element, AggregateWrapper):
377
+ return is_scalar_condition(element.function)
378
+ elif isinstance(element, Conditional):
379
+ return is_scalar_condition(element.left) and is_scalar_condition(element.right)
380
+ return True
@@ -350,7 +350,11 @@ def get_query_datasources(
350
350
 
351
351
  search_concepts: list[Concept] = statement.output_components
352
352
  nest_where = statement.where_clause_category == SelectFiltering.IMPLICIT
353
- if nest_where and statement.where_clause:
353
+
354
+ # if all are aggregates, we've pushed the filtering inside the aggregates anyway
355
+ all_aggregate = all([x.is_aggregate for x in search_concepts])
356
+
357
+ if nest_where and statement.where_clause and not all_aggregate:
354
358
  search_concepts = unique(
355
359
  statement.where_clause.row_arguments + search_concepts, "address"
356
360
  )
@@ -363,7 +367,8 @@ def get_query_datasources(
363
367
  )
364
368
  ds: GroupNode | SelectNode
365
369
  if nest_where and statement.where_clause:
366
- ods.conditions = statement.where_clause.conditional
370
+ if not all_aggregate:
371
+ ods.conditions = statement.where_clause.conditional
367
372
  ods.output_concepts = search_concepts
368
373
  # ods.hidden_concepts = where_delta
369
374
  ods.rebuild_cache()
@@ -467,9 +472,11 @@ def process_query(
467
472
  for cte in raw_ctes:
468
473
  cte.parent_ctes = [seen[x.name] for x in cte.parent_ctes]
469
474
  deduped_ctes: List[CTE] = list(seen.values())
475
+ root_cte.order_by = statement.order_by
476
+ root_cte.limit = statement.limit
477
+ root_cte.hidden_concepts = [x for x in statement.hidden_components]
470
478
 
471
479
  final_ctes = optimize_ctes(deduped_ctes, root_cte, statement)
472
-
473
480
  return ProcessedQuery(
474
481
  order_by=statement.order_by,
475
482
  grain=statement.grain,