pytrilogy 0.0.2.11__py3-none-any.whl → 0.0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (31) hide show
  1. {pytrilogy-0.0.2.11.dist-info → pytrilogy-0.0.2.13.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.2.11.dist-info → pytrilogy-0.0.2.13.dist-info}/RECORD +31 -31
  3. {pytrilogy-0.0.2.11.dist-info → pytrilogy-0.0.2.13.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +5 -0
  6. trilogy/core/enums.py +3 -1
  7. trilogy/core/environment_helpers.py +44 -6
  8. trilogy/core/models.py +51 -27
  9. trilogy/core/optimization.py +31 -3
  10. trilogy/core/optimizations/__init__.py +2 -1
  11. trilogy/core/optimizations/predicate_pushdown.py +60 -42
  12. trilogy/core/processing/concept_strategies_v3.py +6 -4
  13. trilogy/core/processing/node_generators/basic_node.py +22 -9
  14. trilogy/core/processing/node_generators/common.py +13 -23
  15. trilogy/core/processing/node_generators/node_merge_node.py +22 -1
  16. trilogy/core/processing/node_generators/unnest_node.py +10 -3
  17. trilogy/core/processing/nodes/base_node.py +18 -11
  18. trilogy/core/processing/nodes/group_node.py +0 -1
  19. trilogy/core/processing/nodes/merge_node.py +12 -5
  20. trilogy/core/processing/nodes/unnest_node.py +13 -9
  21. trilogy/core/processing/utility.py +3 -1
  22. trilogy/core/query_processor.py +14 -12
  23. trilogy/dialect/base.py +95 -52
  24. trilogy/dialect/common.py +3 -3
  25. trilogy/executor.py +8 -2
  26. trilogy/parsing/common.py +73 -2
  27. trilogy/parsing/parse_engine.py +88 -132
  28. trilogy/parsing/trilogy.lark +3 -3
  29. {pytrilogy-0.0.2.11.dist-info → pytrilogy-0.0.2.13.dist-info}/LICENSE.md +0 -0
  30. {pytrilogy-0.0.2.11.dist-info → pytrilogy-0.0.2.13.dist-info}/entry_points.txt +0 -0
  31. {pytrilogy-0.0.2.11.dist-info → pytrilogy-0.0.2.13.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,8 @@ from trilogy.core.processing.node_generators.common import (
10
10
  )
11
11
  from trilogy.utility import unique
12
12
  from trilogy.constants import logger
13
+ from trilogy.core.enums import SourceType
14
+ from itertools import combinations
13
15
 
14
16
  LOGGER_PREFIX = "[GEN_BASIC_NODE]"
15
17
 
@@ -31,12 +33,21 @@ def gen_basic_node(
31
33
  )
32
34
 
33
35
  local_optional_redundant = [x for x in local_optional if x in parent_concepts]
34
- attempts = [(parent_concepts, [concept] + local_optional_redundant)]
35
- from itertools import combinations
36
+ attempts: List[tuple[list[Concept], list[Concept]]] = [
37
+ (parent_concepts, [concept] + local_optional_redundant)
38
+ ]
39
+ equivalent_optional = [
40
+ x
41
+ for x in local_optional
42
+ if x.lineage == concept.lineage and x.address != concept.address
43
+ ]
44
+ non_equivalent_optional = [
45
+ x for x in local_optional if x not in equivalent_optional
46
+ ]
36
47
 
37
48
  if local_optional:
38
- for combo in range(1, len(local_optional) + 1):
39
- combos = combinations(local_optional, combo)
49
+ for combo in range(1, len(non_equivalent_optional) + 1):
50
+ combos = combinations(non_equivalent_optional, combo)
40
51
  for optional_set in combos:
41
52
  attempts.append(
42
53
  (
@@ -55,8 +66,10 @@ def gen_basic_node(
55
66
  depth=depth + 1,
56
67
  history=history,
57
68
  )
69
+
58
70
  if not parent_node:
59
71
  continue
72
+ parent_node.source_type = SourceType.BASIC
60
73
  parents: List[StrategyNode] = [parent_node]
61
74
  for x in basic_output:
62
75
  sources = [p for p in parents if x in p.output_concepts]
@@ -64,13 +77,10 @@ def gen_basic_node(
64
77
  continue
65
78
  if all(x in source.partial_concepts for source in sources):
66
79
  partials.append(x)
67
- outputs = parent_node.output_concepts + [concept]
68
- logger.info(
69
- f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in outputs]}"
70
- )
71
- # parents.resolve()
72
80
 
73
81
  parent_node.add_output_concept(concept)
82
+ for x in equivalent_optional:
83
+ parent_node.add_output_concept(x)
74
84
 
75
85
  parent_node.remove_output_concepts(
76
86
  [
@@ -79,6 +89,9 @@ def gen_basic_node(
79
89
  if x.address not in [y.address for y in basic_output]
80
90
  ]
81
91
  )
92
+ logger.info(
93
+ f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in parent_node.output_concepts]}"
94
+ )
82
95
  return parent_node
83
96
  logger.info(
84
97
  f"{depth_prefix}{LOGGER_PREFIX} No basic node could be generated for {concept}"
@@ -1,4 +1,4 @@
1
- from typing import List, Tuple
1
+ from typing import List, Tuple, Callable
2
2
 
3
3
 
4
4
  from trilogy.core.enums import PurposeLineage, Purpose
@@ -96,6 +96,7 @@ def gen_property_enrichment_node(
96
96
  g,
97
97
  depth: int,
98
98
  source_concepts,
99
+ log_lambda: Callable,
99
100
  history: History | None = None,
100
101
  conditions: WhereClause | None = None,
101
102
  ):
@@ -106,8 +107,8 @@ def gen_property_enrichment_node(
106
107
  keys = "-".join([y.address for y in x.keys])
107
108
  required_keys[keys].add(x.address)
108
109
  final_nodes = []
109
- node_joins = []
110
110
  for _k, vs in required_keys.items():
111
+ log_lambda(f"Generating enrichment node for {_k} with {vs}")
111
112
  ks = _k.split("-")
112
113
  enrich_node: StrategyNode = source_concepts(
113
114
  mandatory_list=[environment.concepts[k] for k in ks]
@@ -119,17 +120,6 @@ def gen_property_enrichment_node(
119
120
  conditions=conditions,
120
121
  )
121
122
  final_nodes.append(enrich_node)
122
- node_joins.append(
123
- NodeJoin(
124
- left_node=enrich_node,
125
- right_node=base_node,
126
- concepts=concept_to_relevant_joins(
127
- [environment.concepts[k] for k in ks]
128
- ),
129
- filter_to_mutual=False,
130
- join_type=JoinType.LEFT_OUTER,
131
- )
132
- )
133
123
  return MergeNode(
134
124
  input_concepts=unique(
135
125
  base_node.output_concepts
@@ -146,9 +136,8 @@ def gen_property_enrichment_node(
146
136
  g=g,
147
137
  parents=[
148
138
  base_node,
149
- enrich_node,
150
- ],
151
- node_joins=node_joins,
139
+ ]
140
+ + final_nodes,
152
141
  )
153
142
 
154
143
 
@@ -197,6 +186,7 @@ def gen_enrichment_node(
197
186
  source_concepts,
198
187
  history=history,
199
188
  conditions=conditions,
189
+ log_lambda=log_lambda,
200
190
  )
201
191
 
202
192
  enrich_node: StrategyNode = source_concepts( # this fetches the parent + join keys
@@ -216,14 +206,14 @@ def gen_enrichment_node(
216
206
  log_lambda(
217
207
  f"{str(type(base_node).__name__)} returning merge node with group node + enrichment node"
218
208
  )
219
-
209
+ non_hidden = [
210
+ x
211
+ for x in base_node.output_concepts
212
+ if x.address not in [y.address for y in base_node.hidden_concepts]
213
+ ]
220
214
  return MergeNode(
221
- input_concepts=unique(
222
- join_keys + extra_required + base_node.output_concepts, "address"
223
- ),
224
- output_concepts=unique(
225
- join_keys + extra_required + base_node.output_concepts, "address"
226
- ),
215
+ input_concepts=unique(join_keys + extra_required + non_hidden, "address"),
216
+ output_concepts=unique(join_keys + extra_required + non_hidden, "address"),
227
217
  environment=environment,
228
218
  g=g,
229
219
  parents=[enrich_node, base_node],
@@ -86,7 +86,7 @@ def determine_induced_minimal_nodes(
86
86
 
87
87
  for node in G.nodes:
88
88
  if concepts.get(node):
89
- lookup = concepts[node]
89
+ lookup: Concept = concepts[node]
90
90
  if lookup.derivation not in (PurposeLineage.BASIC, PurposeLineage.ROOT):
91
91
  nodes_to_remove.append(node)
92
92
  elif lookup.derivation == PurposeLineage.BASIC and G.out_degree(node) == 0:
@@ -155,6 +155,26 @@ def detect_ambiguity_and_raise(all_concepts, reduced_concept_sets) -> None:
155
155
  )
156
156
 
157
157
 
158
+ def has_synonym(concept: Concept, others: list[list[Concept]]) -> bool:
159
+ return any(
160
+ c.address in concept.pseudonyms or concept.address in c.pseudonyms
161
+ for sublist in others
162
+ for c in sublist
163
+ )
164
+
165
+
166
+ def filter_relevant_subgraphs(subgraphs: list[list[Concept]]) -> list[list[Concept]]:
167
+ return [
168
+ subgraph
169
+ for subgraph in subgraphs
170
+ if len(subgraph) > 1
171
+ or (
172
+ len(subgraph) == 1
173
+ and not has_synonym(subgraph[0], [x for x in subgraphs if x != subgraph])
174
+ )
175
+ ]
176
+
177
+
158
178
  def resolve_weak_components(
159
179
  all_concepts: List[Concept],
160
180
  environment: Environment,
@@ -249,6 +269,7 @@ def resolve_weak_components(
249
269
  continue
250
270
  subgraphs.append(sub_component)
251
271
  return subgraphs
272
+ # return filter_relevant_subgraphs(subgraphs)
252
273
 
253
274
 
254
275
  def subgraphs_to_merge_node(
@@ -22,9 +22,14 @@ def gen_unnest_node(
22
22
  arguments = []
23
23
  if isinstance(concept.lineage, Function):
24
24
  arguments = concept.lineage.concept_arguments
25
+
26
+ equivalent_optional = [x for x in local_optional if x.lineage == concept.lineage]
27
+ non_equivalent_optional = [
28
+ x for x in local_optional if x not in equivalent_optional
29
+ ]
25
30
  if arguments or local_optional:
26
31
  parent = source_concepts(
27
- mandatory_list=arguments + local_optional,
32
+ mandatory_list=arguments + non_equivalent_optional,
28
33
  environment=environment,
29
34
  g=g,
30
35
  depth=depth + 1,
@@ -38,8 +43,8 @@ def gen_unnest_node(
38
43
  return None
39
44
 
40
45
  base = UnnestNode(
41
- unnest_concept=concept,
42
- input_concepts=arguments + local_optional,
46
+ unnest_concepts=[concept] + equivalent_optional,
47
+ input_concepts=arguments + non_equivalent_optional,
43
48
  output_concepts=[concept] + local_optional,
44
49
  environment=environment,
45
50
  g=g,
@@ -57,4 +62,6 @@ def gen_unnest_node(
57
62
  )
58
63
  qds = new.resolve()
59
64
  assert qds.source_map[concept.address] == {base.resolve()}
65
+ for x in equivalent_optional:
66
+ assert qds.source_map[x.address] == {base.resolve()}
60
67
  return new
@@ -61,26 +61,28 @@ def resolve_concept_map(
61
61
  for concept in input.output_concepts:
62
62
  if concept.address not in input.non_partial_concept_addresses:
63
63
  continue
64
- if concept.address not in inherited:
65
- continue
66
- if (
67
- isinstance(input, QueryDatasource)
68
- and concept.address in input.hidden_concepts
69
- ):
64
+ if isinstance(input, QueryDatasource) and concept.address in [
65
+ x.address for x in input.hidden_concepts
66
+ ]:
70
67
  continue
71
68
  if concept.address in full_addresses:
69
+
72
70
  concept_map[concept.address].add(input)
73
71
  elif concept.address not in concept_map:
72
+ # equi_targets = [x for x in targets if concept.address in x.pseudonyms or x.address in concept.pseudonyms]
73
+ # if equi_targets:
74
+ # for equi in equi_targets:
75
+ # concept_map[equi.address] = set()
74
76
  concept_map[concept.address].add(input)
77
+
75
78
  # second loop, include partials
76
79
  for input in inputs:
77
80
  for concept in input.output_concepts:
78
81
  if concept.address not in [t.address for t in inherited_inputs]:
79
82
  continue
80
- if (
81
- isinstance(input, QueryDatasource)
82
- and concept.address in input.hidden_concepts
83
- ):
83
+ if isinstance(input, QueryDatasource) and concept.address in [
84
+ x.address for x in input.hidden_concepts
85
+ ]:
84
86
  continue
85
87
  if len(concept_map.get(concept.address, [])) == 0:
86
88
  concept_map[concept.address].add(input)
@@ -89,6 +91,7 @@ def resolve_concept_map(
89
91
  if target.address not in inherited:
90
92
  # an empty source means it is defined in this CTE
91
93
  concept_map[target.address] = set()
94
+
92
95
  return concept_map
93
96
 
94
97
 
@@ -221,7 +224,10 @@ class StrategyNode:
221
224
  def remove_output_concepts(self, concepts: List[Concept]):
222
225
  for x in concepts:
223
226
  self.hidden_concepts.append(x)
224
- self.output_concepts = [x for x in self.output_concepts if x not in concepts]
227
+ addresses = [x.address for x in concepts]
228
+ self.output_concepts = [
229
+ x for x in self.output_concepts if x.address not in addresses
230
+ ]
225
231
  self.rebuild_cache()
226
232
 
227
233
  @property
@@ -252,6 +258,7 @@ class StrategyNode:
252
258
  targets=self.output_concepts,
253
259
  inherited_inputs=self.input_concepts + self.existence_concepts,
254
260
  )
261
+
255
262
  return QueryDatasource(
256
263
  input_concepts=self.input_concepts,
257
264
  output_concepts=self.output_concepts,
@@ -121,7 +121,6 @@ class GroupNode(StrategyNode):
121
121
  source_type=source_type,
122
122
  source_map=resolve_concept_map(
123
123
  parent_sources,
124
- # targets = self.output_concepts,
125
124
  targets=(
126
125
  unique(
127
126
  self.output_concepts + self.conditions.concept_arguments,
@@ -28,14 +28,18 @@ LOGGER_PREFIX = "[CONCEPT DETAIL - MERGE NODE]"
28
28
 
29
29
 
30
30
  def deduplicate_nodes(
31
- merged: dict[str, QueryDatasource | Datasource], logging_prefix: str
31
+ merged: dict[str, QueryDatasource | Datasource],
32
+ logging_prefix: str,
33
+ environment: Environment,
32
34
  ) -> tuple[bool, dict[str, QueryDatasource | Datasource], set[str]]:
33
35
  duplicates = False
34
36
  removed: set[str] = set()
35
37
  set_map: dict[str, set[str]] = {}
36
38
  for k, v in merged.items():
37
39
  unique_outputs = [
38
- x.address for x in v.output_concepts if x not in v.partial_concepts
40
+ environment.concepts[x.address].address
41
+ for x in v.output_concepts
42
+ if x not in v.partial_concepts
39
43
  ]
40
44
  set_map[k] = set(unique_outputs)
41
45
  for k1, v1 in set_map.items():
@@ -71,12 +75,15 @@ def deduplicate_nodes_and_joins(
71
75
  joins: List[NodeJoin] | None,
72
76
  merged: dict[str, QueryDatasource | Datasource],
73
77
  logging_prefix: str,
78
+ environment: Environment,
74
79
  ) -> Tuple[List[NodeJoin] | None, dict[str, QueryDatasource | Datasource]]:
75
80
  # it's possible that we have more sources than we need
76
81
  duplicates = True
77
82
  while duplicates:
78
83
  duplicates = False
79
- duplicates, merged, removed = deduplicate_nodes(merged, logging_prefix)
84
+ duplicates, merged, removed = deduplicate_nodes(
85
+ merged, logging_prefix, environment=environment
86
+ )
80
87
  # filter out any removed joins
81
88
  if joins is not None:
82
89
  joins = [
@@ -245,7 +252,7 @@ class MergeNode(StrategyNode):
245
252
 
246
253
  # it's possible that we have more sources than we need
247
254
  final_joins, merged = deduplicate_nodes_and_joins(
248
- final_joins, merged, self.logging_prefix
255
+ final_joins, merged, self.logging_prefix, self.environment
249
256
  )
250
257
  # early exit if we can just return the parent
251
258
  final_datasets: List[QueryDatasource | Datasource] = list(merged.values())
@@ -364,7 +371,7 @@ class MergeNode(StrategyNode):
364
371
  conditions=self.conditions,
365
372
  hidden_concepts=list(self.hidden_concepts),
366
373
  virtual_output_concepts=list(self.virtual_output_concepts),
367
- node_joins=self.node_joins,
374
+ node_joins=list(self.node_joins) if self.node_joins else None,
368
375
  join_concepts=list(self.join_concepts) if self.join_concepts else None,
369
376
  force_join_type=self.force_join_type,
370
377
  existence_concepts=list(self.existence_concepts),
@@ -6,6 +6,7 @@ from trilogy.core.models import (
6
6
  SourceType,
7
7
  Concept,
8
8
  UnnestJoin,
9
+ Function,
9
10
  )
10
11
  from trilogy.core.processing.nodes.base_node import StrategyNode
11
12
 
@@ -19,7 +20,7 @@ class UnnestNode(StrategyNode):
19
20
 
20
21
  def __init__(
21
22
  self,
22
- unnest_concept: Concept,
23
+ unnest_concepts: List[Concept],
23
24
  input_concepts: List[Concept],
24
25
  output_concepts: List[Concept],
25
26
  environment,
@@ -37,25 +38,28 @@ class UnnestNode(StrategyNode):
37
38
  parents=parents,
38
39
  depth=depth,
39
40
  )
40
- self.unnest_concept = unnest_concept
41
+ self.unnest_concepts = unnest_concepts
41
42
 
42
43
  def _resolve(self) -> QueryDatasource:
43
44
  """We need to ensure that any filtered values are removed from the output to avoid inappropriate references"""
44
45
  base = super()._resolve()
45
-
46
+ lineage = self.unnest_concepts[0].lineage
47
+ assert isinstance(lineage, Function)
48
+ final = "_".join(set([c.address for c in self.unnest_concepts]))
46
49
  unnest = UnnestJoin(
47
- concept=self.unnest_concept,
48
- alias=f'unnest_{self.unnest_concept.address.replace(".", "_")}',
50
+ concepts=self.unnest_concepts,
51
+ parent=lineage,
52
+ alias=f'unnest_{final.replace(".", "_")}',
49
53
  )
50
54
  base.joins.append(unnest)
51
-
52
- base.source_map[self.unnest_concept.address] = {unnest}
53
- base.join_derived_concepts = [self.unnest_concept]
55
+ for unnest_concept in self.unnest_concepts:
56
+ base.source_map[unnest_concept.address] = {unnest}
57
+ base.join_derived_concepts = [unnest_concept]
54
58
  return base
55
59
 
56
60
  def copy(self) -> "UnnestNode":
57
61
  return UnnestNode(
58
- unnest_concept=self.unnest_concept,
62
+ unnest_concepts=self.unnest_concepts,
59
63
  input_concepts=list(self.input_concepts),
60
64
  output_concepts=list(self.output_concepts),
61
65
  environment=self.environment,
@@ -285,7 +285,9 @@ def get_node_joins(
285
285
  raise SyntaxError(
286
286
  f"Could not find {joinc.address} in {right_datasource.identifier} output {[c.address for c in right_datasource.output_concepts]}"
287
287
  )
288
- join_tuples.append((left_arg, right_arg))
288
+ narg = (left_arg, right_arg)
289
+ if narg not in join_tuples:
290
+ join_tuples.append((left_arg, right_arg))
289
291
  final_joins_pre.append(
290
292
  BaseJoin(
291
293
  left_datasource=identifier_map[left],
@@ -35,7 +35,6 @@ from trilogy.core.ergonomics import CTE_NAMES
35
35
  from trilogy.core.optimization import optimize_ctes
36
36
  from math import ceil
37
37
  from collections import defaultdict
38
- from random import shuffle
39
38
 
40
39
  LOGGER_PREFIX = "[QUERY BUILD]"
41
40
 
@@ -46,7 +45,10 @@ def base_join_to_join(
46
45
  """This function converts joins at the datasource level
47
46
  to joins at the CTE level"""
48
47
  if isinstance(base_join, UnnestJoin):
49
- return InstantiatedUnnestJoin(concept=base_join.concept, alias=base_join.alias)
48
+ return InstantiatedUnnestJoin(
49
+ concept_to_unnest=base_join.parent.concept_arguments[0],
50
+ alias=base_join.alias,
51
+ )
50
52
  if base_join.left_datasource.identifier == base_join.right_datasource.identifier:
51
53
  raise ValueError(f"Joining on same datasource {base_join}")
52
54
  left_ctes = [
@@ -125,8 +127,6 @@ def generate_source_map(
125
127
  if qdk in output_address:
126
128
  source_map[qdk].append(cte.name)
127
129
  # now do a pass that accepts partials
128
- # TODO: move this into a second loop by first creationg all sub sources
129
- # then loop through this
130
130
  for cte in matches:
131
131
  if qdk not in source_map:
132
132
  source_map[qdk] = [cte.name]
@@ -177,7 +177,6 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
177
177
  int = ceil(idx / len(CTE_NAMES))
178
178
  suffix = f"_{int}"
179
179
  valid = [x for x in CTE_NAMES if x + suffix not in name_map.values()]
180
- shuffle(valid)
181
180
  lookup = valid[0]
182
181
  new_name = f"{lookup}{suffix}"
183
182
  name_map[full_name] = new_name
@@ -306,7 +305,11 @@ def datasource_to_ctes(
306
305
  if cte.grain != query_datasource.grain:
307
306
  raise ValueError("Grain was corrupted in CTE generation")
308
307
  for x in cte.output_columns:
309
- if x.address not in cte.source_map and CONFIG.validate_missing:
308
+ if (
309
+ x.address not in cte.source_map
310
+ and not any(y in cte.source_map for y in x.pseudonyms)
311
+ and CONFIG.validate_missing
312
+ ):
310
313
  raise ValueError(
311
314
  f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
312
315
  )
@@ -327,12 +330,13 @@ def append_existence_check(
327
330
  for subselect in where.existence_arguments:
328
331
  if not subselect:
329
332
  continue
330
- logger.info(
331
- f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
332
- )
333
+
333
334
  eds = source_query_concepts(
334
335
  [*subselect], environment=environment, g=graph, history=history
335
336
  )
337
+ logger.info(
338
+ f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
339
+ )
336
340
  node.add_parents([eds])
337
341
  node.add_existence_concepts([*subselect])
338
342
 
@@ -377,9 +381,7 @@ def get_query_node(
377
381
  if nest_where and statement.where_clause:
378
382
  if not all_aggregate:
379
383
  ods.conditions = statement.where_clause.conditional
380
- ods.output_concepts = statement.output_components
381
- # ods.hidden_concepts = where_delta
382
- ods.rebuild_cache()
384
+ ods.set_output_concepts(statement.output_components)
383
385
  append_existence_check(ods, environment, graph, history)
384
386
  ds = GroupNode(
385
387
  output_concepts=statement.output_components,