pytrilogy 0.0.2.10__py3-none-any.whl → 0.0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (30) hide show
  1. {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/RECORD +30 -30
  3. trilogy/__init__.py +1 -1
  4. trilogy/core/enums.py +0 -1
  5. trilogy/core/environment_helpers.py +44 -6
  6. trilogy/core/models.py +47 -26
  7. trilogy/core/optimization.py +31 -3
  8. trilogy/core/optimizations/__init__.py +2 -1
  9. trilogy/core/optimizations/predicate_pushdown.py +60 -42
  10. trilogy/core/processing/concept_strategies_v3.py +8 -4
  11. trilogy/core/processing/node_generators/basic_node.py +15 -9
  12. trilogy/core/processing/node_generators/filter_node.py +20 -3
  13. trilogy/core/processing/node_generators/group_node.py +2 -0
  14. trilogy/core/processing/node_generators/node_merge_node.py +28 -2
  15. trilogy/core/processing/node_generators/unnest_node.py +10 -3
  16. trilogy/core/processing/nodes/base_node.py +7 -2
  17. trilogy/core/processing/nodes/group_node.py +0 -1
  18. trilogy/core/processing/nodes/merge_node.py +11 -4
  19. trilogy/core/processing/nodes/unnest_node.py +13 -9
  20. trilogy/core/processing/utility.py +3 -1
  21. trilogy/core/query_processor.py +20 -5
  22. trilogy/dialect/base.py +96 -56
  23. trilogy/dialect/common.py +3 -3
  24. trilogy/parsing/common.py +58 -1
  25. trilogy/parsing/parse_engine.py +111 -136
  26. trilogy/parsing/trilogy.lark +5 -1
  27. {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/LICENSE.md +0 -0
  28. {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/WHEEL +0 -0
  29. {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/entry_points.txt +0 -0
  30. {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ from trilogy.core.processing.node_generators.common import (
10
10
  )
11
11
  from trilogy.utility import unique
12
12
  from trilogy.constants import logger
13
+ from itertools import combinations
13
14
 
14
15
  LOGGER_PREFIX = "[GEN_BASIC_NODE]"
15
16
 
@@ -31,12 +32,17 @@ def gen_basic_node(
31
32
  )
32
33
 
33
34
  local_optional_redundant = [x for x in local_optional if x in parent_concepts]
34
- attempts = [(parent_concepts, [concept] + local_optional_redundant)]
35
- from itertools import combinations
35
+ attempts: List[tuple[list[Concept], list[Concept]]] = [
36
+ (parent_concepts, [concept] + local_optional_redundant)
37
+ ]
38
+ equivalent_optional = [x for x in local_optional if x.lineage == concept.lineage]
39
+ non_equivalent_optional = [
40
+ x for x in local_optional if x not in equivalent_optional
41
+ ]
36
42
 
37
43
  if local_optional:
38
- for combo in range(1, len(local_optional) + 1):
39
- combos = combinations(local_optional, combo)
44
+ for combo in range(1, len(non_equivalent_optional) + 1):
45
+ combos = combinations(non_equivalent_optional, combo)
40
46
  for optional_set in combos:
41
47
  attempts.append(
42
48
  (
@@ -64,13 +70,10 @@ def gen_basic_node(
64
70
  continue
65
71
  if all(x in source.partial_concepts for source in sources):
66
72
  partials.append(x)
67
- outputs = parent_node.output_concepts + [concept]
68
- logger.info(
69
- f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in outputs]}"
70
- )
71
- # parents.resolve()
72
73
 
73
74
  parent_node.add_output_concept(concept)
75
+ for x in equivalent_optional:
76
+ parent_node.add_output_concept(x)
74
77
 
75
78
  parent_node.remove_output_concepts(
76
79
  [
@@ -79,6 +82,9 @@ def gen_basic_node(
79
82
  if x.address not in [y.address for y in basic_output]
80
83
  ]
81
84
  )
85
+ logger.info(
86
+ f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in parent_node.output_concepts]}"
87
+ )
82
88
  return parent_node
83
89
  logger.info(
84
90
  f"{depth_prefix}{LOGGER_PREFIX} No basic node could be generated for {concept}"
@@ -39,6 +39,15 @@ def gen_filter_node(
39
39
  raise SyntaxError('Filter node must have a lineage of type "FilterItem"')
40
40
  where = concept.lineage.where
41
41
 
42
+ optional_included: list[Concept] = []
43
+ for x in local_optional:
44
+ if isinstance(x.lineage, FilterItem):
45
+ if concept.lineage.where == where:
46
+ logger.info(
47
+ f"{padding(depth)}{LOGGER_PREFIX} fetching {x.lineage.content.address} as optional parent with same filter conditions "
48
+ )
49
+ parent_row_concepts.append(x.lineage.content)
50
+ optional_included.append(x)
42
51
  logger.info(
43
52
  f"{padding(depth)}{LOGGER_PREFIX} filter {concept.address} derived from {immediate_parent.address} row parents {[x.address for x in parent_row_concepts]} and {[[y.address] for x in parent_existence_concepts for y in x]} existence parents"
44
53
  )
@@ -49,6 +58,7 @@ def gen_filter_node(
49
58
  g=g,
50
59
  depth=depth + 1,
51
60
  history=history,
61
+ conditions=conditions,
52
62
  )
53
63
 
54
64
  flattened_existence = [x for y in parent_existence_concepts for x in y]
@@ -88,6 +98,11 @@ def gen_filter_node(
88
98
  f"{padding(depth)}{LOGGER_PREFIX} query conditions are the same as filter conditions, can optimize across all concepts"
89
99
  )
90
100
  optimized_pushdown = True
101
+ elif optional_included == local_optional:
102
+ logger.info(
103
+ f"{padding(depth)}{LOGGER_PREFIX} all optional concepts are included in the filter, can optimize across all concepts"
104
+ )
105
+ optimized_pushdown = True
91
106
  if optimized_pushdown:
92
107
  if isinstance(row_parent, SelectNode):
93
108
  logger.info(
@@ -116,6 +131,7 @@ def gen_filter_node(
116
131
  x
117
132
  for x in local_optional
118
133
  if x.address in [y.address for y in parent.output_concepts]
134
+ or x.address in [y.address for y in optional_included]
119
135
  ]
120
136
  parent.add_parents(core_parents)
121
137
  parent.add_condition(where.conditional)
@@ -175,6 +191,7 @@ def gen_filter_node(
175
191
  ] + outputs
176
192
  filter_node.rebuild_cache()
177
193
  return filter_node
194
+
178
195
  enrich_node = source_concepts( # this fetches the parent + join keys
179
196
  # to then connect to the rest of the query
180
197
  mandatory_list=[immediate_parent] + parent_row_concepts + local_optional,
@@ -182,10 +199,11 @@ def gen_filter_node(
182
199
  g=g,
183
200
  depth=depth + 1,
184
201
  history=history,
202
+ conditions=conditions,
185
203
  )
186
204
  if not enrich_node:
187
205
  return filter_node
188
- x = MergeNode(
206
+ return MergeNode(
189
207
  input_concepts=[concept, immediate_parent] + local_optional,
190
208
  output_concepts=[
191
209
  concept,
@@ -206,8 +224,7 @@ def gen_filter_node(
206
224
  [immediate_parent] + parent_row_concepts
207
225
  ),
208
226
  join_type=JoinType.LEFT_OUTER,
209
- filter_to_mutual=False,
227
+ filter_to_mutual=True,
210
228
  )
211
229
  ],
212
230
  )
213
- return x
@@ -91,4 +91,6 @@ def gen_group_node(
91
91
  depth=depth,
92
92
  source_concepts=source_concepts,
93
93
  log_lambda=create_log_lambda(LOGGER_PREFIX, depth, logger),
94
+ history=history,
95
+ conditions=conditions,
94
96
  )
@@ -1,6 +1,6 @@
1
1
  from typing import List, Optional
2
2
 
3
- from trilogy.core.models import Concept, Environment, Conditional
3
+ from trilogy.core.models import Concept, Environment, Conditional, WhereClause
4
4
  from trilogy.core.processing.nodes import MergeNode, History, StrategyNode
5
5
  import networkx as nx
6
6
  from trilogy.core.graph_models import concept_to_node
@@ -86,7 +86,7 @@ def determine_induced_minimal_nodes(
86
86
 
87
87
  for node in G.nodes:
88
88
  if concepts.get(node):
89
- lookup = concepts[node]
89
+ lookup: Concept = concepts[node]
90
90
  if lookup.derivation not in (PurposeLineage.BASIC, PurposeLineage.ROOT):
91
91
  nodes_to_remove.append(node)
92
92
  elif lookup.derivation == PurposeLineage.BASIC and G.out_degree(node) == 0:
@@ -155,6 +155,26 @@ def detect_ambiguity_and_raise(all_concepts, reduced_concept_sets) -> None:
155
155
  )
156
156
 
157
157
 
158
+ def has_synonym(concept: Concept, others: list[list[Concept]]) -> bool:
159
+ return any(
160
+ c.address in concept.pseudonyms or concept.address in c.pseudonyms
161
+ for sublist in others
162
+ for c in sublist
163
+ )
164
+
165
+
166
+ def filter_relevant_subgraphs(subgraphs: list[list[Concept]]) -> list[list[Concept]]:
167
+ return [
168
+ subgraph
169
+ for subgraph in subgraphs
170
+ if len(subgraph) > 1
171
+ or (
172
+ len(subgraph) == 1
173
+ and not has_synonym(subgraph[0], [x for x in subgraphs if x != subgraph])
174
+ )
175
+ ]
176
+
177
+
158
178
  def resolve_weak_components(
159
179
  all_concepts: List[Concept],
160
180
  environment: Environment,
@@ -249,6 +269,7 @@ def resolve_weak_components(
249
269
  continue
250
270
  subgraphs.append(sub_component)
251
271
  return subgraphs
272
+ # return filter_relevant_subgraphs(subgraphs)
252
273
 
253
274
 
254
275
  def subgraphs_to_merge_node(
@@ -260,6 +281,7 @@ def subgraphs_to_merge_node(
260
281
  source_concepts,
261
282
  history,
262
283
  conditions,
284
+ search_conditions: WhereClause | None = None,
263
285
  enable_early_exit: bool = True,
264
286
  ):
265
287
  parents: List[StrategyNode] = []
@@ -277,6 +299,7 @@ def subgraphs_to_merge_node(
277
299
  g=g,
278
300
  depth=depth + 1,
279
301
  history=history,
302
+ conditions=search_conditions,
280
303
  )
281
304
  if not parent:
282
305
  logger.info(
@@ -315,6 +338,7 @@ def gen_merge_node(
315
338
  accept_partial: bool = False,
316
339
  history: History | None = None,
317
340
  conditions: Conditional | None = None,
341
+ search_conditions: WhereClause | None = None,
318
342
  ) -> Optional[MergeNode]:
319
343
 
320
344
  for filter_downstream in [True, False]:
@@ -339,6 +363,7 @@ def gen_merge_node(
339
363
  source_concepts=source_concepts,
340
364
  history=history,
341
365
  conditions=conditions,
366
+ search_conditions=search_conditions,
342
367
  )
343
368
  # one concept handling may need to be kicked to alias
344
369
  if len(all_concepts) == 1:
@@ -354,6 +379,7 @@ def gen_merge_node(
354
379
  history=history,
355
380
  conditions=conditions,
356
381
  enable_early_exit=False,
382
+ search_conditions=search_conditions,
357
383
  )
358
384
  if test:
359
385
  return test
@@ -22,9 +22,14 @@ def gen_unnest_node(
22
22
  arguments = []
23
23
  if isinstance(concept.lineage, Function):
24
24
  arguments = concept.lineage.concept_arguments
25
+
26
+ equivalent_optional = [x for x in local_optional if x.lineage == concept.lineage]
27
+ non_equivalent_optional = [
28
+ x for x in local_optional if x not in equivalent_optional
29
+ ]
25
30
  if arguments or local_optional:
26
31
  parent = source_concepts(
27
- mandatory_list=arguments + local_optional,
32
+ mandatory_list=arguments + non_equivalent_optional,
28
33
  environment=environment,
29
34
  g=g,
30
35
  depth=depth + 1,
@@ -38,8 +43,8 @@ def gen_unnest_node(
38
43
  return None
39
44
 
40
45
  base = UnnestNode(
41
- unnest_concept=concept,
42
- input_concepts=arguments + local_optional,
46
+ unnest_concepts=[concept] + equivalent_optional,
47
+ input_concepts=arguments + non_equivalent_optional,
43
48
  output_concepts=[concept] + local_optional,
44
49
  environment=environment,
45
50
  g=g,
@@ -57,4 +62,6 @@ def gen_unnest_node(
57
62
  )
58
63
  qds = new.resolve()
59
64
  assert qds.source_map[concept.address] == {base.resolve()}
65
+ for x in equivalent_optional:
66
+ assert qds.source_map[x.address] == {base.resolve()}
60
67
  return new
@@ -61,17 +61,22 @@ def resolve_concept_map(
61
61
  for concept in input.output_concepts:
62
62
  if concept.address not in input.non_partial_concept_addresses:
63
63
  continue
64
- if concept.address not in inherited:
65
- continue
64
+
66
65
  if (
67
66
  isinstance(input, QueryDatasource)
68
67
  and concept.address in input.hidden_concepts
69
68
  ):
70
69
  continue
71
70
  if concept.address in full_addresses:
71
+
72
72
  concept_map[concept.address].add(input)
73
73
  elif concept.address not in concept_map:
74
+ # equi_targets = [x for x in targets if concept.address in x.pseudonyms or x.address in concept.pseudonyms]
75
+ # if equi_targets:
76
+ # for equi in equi_targets:
77
+ # concept_map[equi.address] = set()
74
78
  concept_map[concept.address].add(input)
79
+
75
80
  # second loop, include partials
76
81
  for input in inputs:
77
82
  for concept in input.output_concepts:
@@ -121,7 +121,6 @@ class GroupNode(StrategyNode):
121
121
  source_type=source_type,
122
122
  source_map=resolve_concept_map(
123
123
  parent_sources,
124
- # targets = self.output_concepts,
125
124
  targets=(
126
125
  unique(
127
126
  self.output_concepts + self.conditions.concept_arguments,
@@ -28,14 +28,18 @@ LOGGER_PREFIX = "[CONCEPT DETAIL - MERGE NODE]"
28
28
 
29
29
 
30
30
  def deduplicate_nodes(
31
- merged: dict[str, QueryDatasource | Datasource], logging_prefix: str
31
+ merged: dict[str, QueryDatasource | Datasource],
32
+ logging_prefix: str,
33
+ environment: Environment,
32
34
  ) -> tuple[bool, dict[str, QueryDatasource | Datasource], set[str]]:
33
35
  duplicates = False
34
36
  removed: set[str] = set()
35
37
  set_map: dict[str, set[str]] = {}
36
38
  for k, v in merged.items():
37
39
  unique_outputs = [
38
- x.address for x in v.output_concepts if x not in v.partial_concepts
40
+ environment.concepts[x.address].address
41
+ for x in v.output_concepts
42
+ if x not in v.partial_concepts
39
43
  ]
40
44
  set_map[k] = set(unique_outputs)
41
45
  for k1, v1 in set_map.items():
@@ -71,12 +75,15 @@ def deduplicate_nodes_and_joins(
71
75
  joins: List[NodeJoin] | None,
72
76
  merged: dict[str, QueryDatasource | Datasource],
73
77
  logging_prefix: str,
78
+ environment: Environment,
74
79
  ) -> Tuple[List[NodeJoin] | None, dict[str, QueryDatasource | Datasource]]:
75
80
  # it's possible that we have more sources than we need
76
81
  duplicates = True
77
82
  while duplicates:
78
83
  duplicates = False
79
- duplicates, merged, removed = deduplicate_nodes(merged, logging_prefix)
84
+ duplicates, merged, removed = deduplicate_nodes(
85
+ merged, logging_prefix, environment=environment
86
+ )
80
87
  # filter out any removed joins
81
88
  if joins is not None:
82
89
  joins = [
@@ -245,7 +252,7 @@ class MergeNode(StrategyNode):
245
252
 
246
253
  # it's possible that we have more sources than we need
247
254
  final_joins, merged = deduplicate_nodes_and_joins(
248
- final_joins, merged, self.logging_prefix
255
+ final_joins, merged, self.logging_prefix, self.environment
249
256
  )
250
257
  # early exit if we can just return the parent
251
258
  final_datasets: List[QueryDatasource | Datasource] = list(merged.values())
@@ -6,6 +6,7 @@ from trilogy.core.models import (
6
6
  SourceType,
7
7
  Concept,
8
8
  UnnestJoin,
9
+ Function,
9
10
  )
10
11
  from trilogy.core.processing.nodes.base_node import StrategyNode
11
12
 
@@ -19,7 +20,7 @@ class UnnestNode(StrategyNode):
19
20
 
20
21
  def __init__(
21
22
  self,
22
- unnest_concept: Concept,
23
+ unnest_concepts: List[Concept],
23
24
  input_concepts: List[Concept],
24
25
  output_concepts: List[Concept],
25
26
  environment,
@@ -37,25 +38,28 @@ class UnnestNode(StrategyNode):
37
38
  parents=parents,
38
39
  depth=depth,
39
40
  )
40
- self.unnest_concept = unnest_concept
41
+ self.unnest_concepts = unnest_concepts
41
42
 
42
43
  def _resolve(self) -> QueryDatasource:
43
44
  """We need to ensure that any filtered values are removed from the output to avoid inappropriate references"""
44
45
  base = super()._resolve()
45
-
46
+ lineage = self.unnest_concepts[0].lineage
47
+ assert isinstance(lineage, Function)
48
+ final = "_".join(set([c.address for c in self.unnest_concepts]))
46
49
  unnest = UnnestJoin(
47
- concept=self.unnest_concept,
48
- alias=f'unnest_{self.unnest_concept.address.replace(".", "_")}',
50
+ concepts=self.unnest_concepts,
51
+ parent=lineage,
52
+ alias=f'unnest_{final.replace(".", "_")}',
49
53
  )
50
54
  base.joins.append(unnest)
51
-
52
- base.source_map[self.unnest_concept.address] = {unnest}
53
- base.join_derived_concepts = [self.unnest_concept]
55
+ for unnest_concept in self.unnest_concepts:
56
+ base.source_map[unnest_concept.address] = {unnest}
57
+ base.join_derived_concepts = [unnest_concept]
54
58
  return base
55
59
 
56
60
  def copy(self) -> "UnnestNode":
57
61
  return UnnestNode(
58
- unnest_concept=self.unnest_concept,
62
+ unnest_concepts=self.unnest_concepts,
59
63
  input_concepts=list(self.input_concepts),
60
64
  output_concepts=list(self.output_concepts),
61
65
  environment=self.environment,
@@ -285,7 +285,9 @@ def get_node_joins(
285
285
  raise SyntaxError(
286
286
  f"Could not find {joinc.address} in {right_datasource.identifier} output {[c.address for c in right_datasource.output_concepts]}"
287
287
  )
288
- join_tuples.append((left_arg, right_arg))
288
+ narg = (left_arg, right_arg)
289
+ if narg not in join_tuples:
290
+ join_tuples.append((left_arg, right_arg))
289
291
  final_joins_pre.append(
290
292
  BaseJoin(
291
293
  left_datasource=identifier_map[left],
@@ -46,7 +46,10 @@ def base_join_to_join(
46
46
  """This function converts joins at the datasource level
47
47
  to joins at the CTE level"""
48
48
  if isinstance(base_join, UnnestJoin):
49
- return InstantiatedUnnestJoin(concept=base_join.concept, alias=base_join.alias)
49
+ return InstantiatedUnnestJoin(
50
+ concept_to_unnest=base_join.parent.concept_arguments[0],
51
+ alias=base_join.alias,
52
+ )
50
53
  if base_join.left_datasource.identifier == base_join.right_datasource.identifier:
51
54
  raise ValueError(f"Joining on same datasource {base_join}")
52
55
  left_ctes = [
@@ -145,7 +148,9 @@ def generate_source_map(
145
148
  names = set([x.name for x in ev])
146
149
  ematches = [cte.name for cte in all_new_ctes if cte.source.name in names]
147
150
  existence_source_map[ek] = ematches
148
- return {k: [] if not v else v for k, v in source_map.items()}, existence_source_map
151
+ return {
152
+ k: [] if not v else list(set(v)) for k, v in source_map.items()
153
+ }, existence_source_map
149
154
 
150
155
 
151
156
  def datasource_to_query_datasource(datasource: Datasource) -> QueryDatasource:
@@ -191,6 +196,8 @@ def resolve_cte_base_name_and_alias_v2(
191
196
  raw_joins: List[Join | InstantiatedUnnestJoin],
192
197
  ) -> Tuple[str | None, str | None]:
193
198
  joins: List[Join] = [join for join in raw_joins if isinstance(join, Join)]
199
+ # INFO trilogy:query_processor.py:263 Finished building source map for civet with 3 parents, have {'local.relevant_customers': ['fowl', 'fowl'],
200
+ # 'customer.demographics.gender': ['mandrill'], 'customer.id': ['mandrill'], 'customer.demographics.id': ['mandrill'], 'customer.id_9268029262289908': [], 'customer.demographics.gender_1513806568509111': []}, query_datasource had non-empty keys ['local.relevant_customers', 'customer.demographics.gender', 'customer.id', 'customer.demographics.id'] and existence had non-empty keys []
194
201
  if (
195
202
  len(source.datasources) == 1
196
203
  and isinstance(source.datasources[0], Datasource)
@@ -212,12 +219,16 @@ def resolve_cte_base_name_and_alias_v2(
212
219
 
213
220
  counts: dict[str, int] = defaultdict(lambda: 0)
214
221
  output_addresses = [x.address for x in source.output_concepts]
222
+ input_address = [x.address for x in source.input_concepts]
215
223
  for k, v in source_map.items():
216
224
  for vx in v:
217
225
  if k in output_addresses:
218
226
  counts[vx] = counts[vx] + 1
219
- else:
220
- counts[vx] = counts[vx]
227
+
228
+ if k in input_address:
229
+ counts[vx] = counts[vx] + 1
230
+
231
+ counts[vx] = counts[vx]
221
232
  if counts:
222
233
  return max(counts, key=counts.get), max(counts, key=counts.get) # type: ignore
223
234
  return None, None
@@ -298,7 +309,11 @@ def datasource_to_ctes(
298
309
  if cte.grain != query_datasource.grain:
299
310
  raise ValueError("Grain was corrupted in CTE generation")
300
311
  for x in cte.output_columns:
301
- if x.address not in cte.source_map and CONFIG.validate_missing:
312
+ if (
313
+ x.address not in cte.source_map
314
+ and not any(y in cte.source_map for y in x.pseudonyms)
315
+ and CONFIG.validate_missing
316
+ ):
302
317
  raise ValueError(
303
318
  f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
304
319
  )