pytrilogy 0.0.1.117__py3-none-any.whl → 0.0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (46) hide show
  1. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.1.dist-info/RECORD +82 -0
  3. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +6 -0
  6. trilogy/core/enums.py +7 -2
  7. trilogy/core/env_processor.py +43 -19
  8. trilogy/core/functions.py +11 -0
  9. trilogy/core/models.py +737 -146
  10. trilogy/core/optimization.py +31 -28
  11. trilogy/core/optimizations/inline_constant.py +4 -1
  12. trilogy/core/optimizations/inline_datasource.py +25 -4
  13. trilogy/core/optimizations/predicate_pushdown.py +94 -54
  14. trilogy/core/processing/concept_strategies_v3.py +69 -39
  15. trilogy/core/processing/graph_utils.py +3 -3
  16. trilogy/core/processing/node_generators/__init__.py +0 -2
  17. trilogy/core/processing/node_generators/basic_node.py +30 -17
  18. trilogy/core/processing/node_generators/filter_node.py +3 -1
  19. trilogy/core/processing/node_generators/node_merge_node.py +345 -96
  20. trilogy/core/processing/node_generators/rowset_node.py +18 -16
  21. trilogy/core/processing/node_generators/select_node.py +44 -83
  22. trilogy/core/processing/nodes/__init__.py +2 -0
  23. trilogy/core/processing/nodes/base_node.py +22 -5
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +20 -2
  26. trilogy/core/processing/nodes/merge_node.py +32 -18
  27. trilogy/core/processing/nodes/select_node_v2.py +17 -3
  28. trilogy/core/processing/utility.py +100 -8
  29. trilogy/core/query_processor.py +77 -24
  30. trilogy/dialect/base.py +11 -46
  31. trilogy/dialect/bigquery.py +1 -1
  32. trilogy/dialect/common.py +11 -0
  33. trilogy/dialect/duckdb.py +1 -1
  34. trilogy/dialect/presto.py +1 -0
  35. trilogy/executor.py +29 -0
  36. trilogy/hooks/graph_hook.py +50 -5
  37. trilogy/hooks/query_debugger.py +1 -0
  38. trilogy/parsing/common.py +8 -5
  39. trilogy/parsing/parse_engine.py +48 -27
  40. trilogy/parsing/render.py +13 -6
  41. trilogy/parsing/trilogy.lark +12 -7
  42. pytrilogy-0.0.1.117.dist-info/RECORD +0 -83
  43. trilogy/core/processing/node_generators/concept_merge_node.py +0 -214
  44. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/LICENSE.md +0 -0
  45. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/entry_points.txt +0 -0
  46. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,6 @@ from trilogy.core.models import (
3
3
  SelectStatement,
4
4
  PersistStatement,
5
5
  MultiSelectStatement,
6
- Conditional,
7
- BooleanOperator,
8
6
  )
9
7
  from trilogy.core.enums import PurposeLineage
10
8
  from trilogy.constants import logger, CONFIG
@@ -60,8 +58,6 @@ def is_direct_return_eligible(
60
58
  if select.where_clause
61
59
  else set()
62
60
  )
63
- if conditions and select.limit:
64
- return False
65
61
  for x in derived_concepts:
66
62
  if x.derivation == PurposeLineage.WINDOW:
67
63
  return False
@@ -71,7 +67,7 @@ def is_direct_return_eligible(
71
67
  if x.address in conditions:
72
68
  return False
73
69
  logger.info(
74
- f"Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
70
+ f"[Optimization][EarlyReturn] Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
75
71
  )
76
72
  return eligible
77
73
 
@@ -93,39 +89,46 @@ def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
93
89
  def optimize_ctes(
94
90
  input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
95
91
  ) -> list[CTE]:
96
- complete = False
97
- REGISTERED_RULES: list["OptimizationRule"] = []
92
+
98
93
  if CONFIG.optimizations.direct_return and is_direct_return_eligible(
99
94
  root_cte, select
100
95
  ):
101
96
  root_cte.order_by = select.order_by
102
97
  root_cte.limit = select.limit
103
- if select.where_clause:
104
-
105
- if root_cte.condition:
106
- root_cte.condition = Conditional(
107
- left=root_cte.condition,
108
- operator=BooleanOperator.AND,
109
- right=select.where_clause.conditional,
110
- )
111
- else:
112
- root_cte.condition = select.where_clause.conditional
98
+ # if select.where_clause:
99
+
100
+ # if root_cte.condition:
101
+ # root_cte.condition = Conditional(
102
+ # left=root_cte.condition,
103
+ # operator=BooleanOperator.AND,
104
+ # right=select.where_clause.conditional,
105
+ # )
106
+ # else:
107
+ # root_cte.condition = select.where_clause.conditional
113
108
  root_cte.requires_nesting = False
114
109
  sort_select_output(root_cte, select)
110
+
111
+ REGISTERED_RULES: list["OptimizationRule"] = []
112
+ if CONFIG.optimizations.constant_inlining:
113
+ REGISTERED_RULES.append(InlineConstant())
115
114
  if CONFIG.optimizations.datasource_inlining:
116
115
  REGISTERED_RULES.append(InlineDatasource())
117
116
  if CONFIG.optimizations.predicate_pushdown:
118
117
  REGISTERED_RULES.append(PredicatePushdown())
119
- if CONFIG.optimizations.constant_inlining:
120
- REGISTERED_RULES.append(InlineConstant())
121
- loops = 0
122
- while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
123
- actions_taken = False
124
- for rule in REGISTERED_RULES:
125
- for cte in input:
126
- inverse_map = gen_inverse_map(input)
127
- actions_taken = actions_taken or rule.optimize(cte, inverse_map)
128
- complete = not actions_taken
129
- loops += 1
118
+
119
+ for rule in REGISTERED_RULES:
120
+ loops = 0
121
+ complete = False
122
+ while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
123
+ actions_taken = False
124
+ # assume we go through all CTEs once
125
+ look_at = [root_cte, *input]
126
+ inverse_map = gen_inverse_map(look_at)
127
+ for cte in look_at:
128
+ opt = rule.optimize(cte, inverse_map)
129
+ actions_taken = actions_taken or opt
130
+ complete = not actions_taken
131
+ loops += 1
132
+ logger.info(f"finished checking for {type(rule).__name__} in {loops} loops")
130
133
 
131
134
  return filter_irrelevant_ctes(input, root_cte)
@@ -21,9 +21,12 @@ class InlineConstant(OptimizationRule):
21
21
  if to_inline:
22
22
  inlined = False
23
23
  for c in to_inline:
24
- self.log(f"Inlining constant {c.address} on {cte.name}")
24
+ self.log(f"Attempting to inline constant {c.address} on {cte.name}")
25
25
  test = cte.inline_constant(c)
26
26
  if test:
27
+ self.log(f"Successfully inlined constant to {cte.name}")
27
28
  inlined = True
29
+ else:
30
+ self.log(f"Could not inline constant to {cte.name}")
28
31
  return inlined
29
32
  return False
@@ -4,15 +4,20 @@ from trilogy.core.models import (
4
4
  )
5
5
 
6
6
  from trilogy.core.optimizations.base_optimization import OptimizationRule
7
+ from collections import defaultdict
7
8
 
8
9
 
9
10
  class InlineDatasource(OptimizationRule):
10
11
 
12
+ def __init__(self):
13
+ super().__init__()
14
+ self.candidates = defaultdict(lambda: set())
15
+ self.count = defaultdict(lambda: 0)
16
+
11
17
  def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
12
18
  if not cte.parent_ctes:
13
19
  return False
14
20
 
15
- optimized = False
16
21
  self.log(
17
22
  f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
18
23
  )
@@ -25,6 +30,9 @@ class InlineDatasource(OptimizationRule):
25
30
  if parent_cte.parent_ctes:
26
31
  self.log(f"parent {parent_cte.name} has parents")
27
32
  continue
33
+ if parent_cte.condition:
34
+ self.log(f"parent {parent_cte.name} has condition, cannot be inlined")
35
+ continue
28
36
  raw_root = parent_cte.source.datasources[0]
29
37
  if not isinstance(raw_root, Datasource):
30
38
  self.log(f"parent {parent_cte.name} is not datasource")
@@ -34,7 +42,8 @@ class InlineDatasource(OptimizationRule):
34
42
  self.log(f"parent {parent_cte.name} datasource is not inlineable")
35
43
  continue
36
44
  root_outputs = {x.address for x in root.output_concepts}
37
- cte_outputs = {x.address for x in parent_cte.output_columns}
45
+ cte_outputs = {x.address for x in cte.output_columns}
46
+ # cte_inherited_outputs = {x.address for x in parent_cte.output_columns if parent_cte.source_map.get(x.address)}
38
47
  grain_components = {x.address for x in root.grain.components}
39
48
  if not cte_outputs.issubset(root_outputs):
40
49
  self.log(f"Not all {parent_cte.name} outputs are found on datasource")
@@ -44,11 +53,23 @@ class InlineDatasource(OptimizationRule):
44
53
  force_group = True
45
54
  to_inline.append(parent_cte)
46
55
 
56
+ optimized = False
47
57
  for replaceable in to_inline:
48
-
58
+ if replaceable.name not in self.candidates[cte.name]:
59
+ self.candidates[cte.name].add(replaceable.name)
60
+ self.count[replaceable.source.name] += 1
61
+ return True
62
+ if self.count[replaceable.source.name] > 1:
63
+ self.log(
64
+ f"Skipping inlining raw datasource {replaceable.source.name} ({replaceable.name}) due to multiple references"
65
+ )
66
+ continue
49
67
  result = cte.inline_parent_datasource(replaceable, force_group=force_group)
50
68
  if result:
51
- self.log(f"Inlined parent {replaceable.name}")
69
+ self.log(
70
+ f"Inlined parent {replaceable.name} with {replaceable.source.name}"
71
+ )
72
+ optimized = True
52
73
  else:
53
74
  self.log(f"Failed to inline {replaceable.name}")
54
75
  return optimized
@@ -21,85 +21,125 @@ def decompose_condition(conditional: Conditional):
21
21
 
22
22
 
23
23
  def is_child_of(a, comparison):
24
+ base = comparison == a
25
+ if base:
26
+ return True
24
27
  if isinstance(comparison, Conditional):
25
28
  return (
26
29
  is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
27
30
  ) and comparison.operator == BooleanOperator.AND
28
- return comparison == a
31
+ return base
29
32
 
30
33
 
31
34
  class PredicatePushdown(OptimizationRule):
32
35
 
36
+ def __init__(self, *args, **kwargs) -> None:
37
+ super().__init__(*args, **kwargs)
38
+ self.complete: dict[str, bool] = {}
39
+
40
+ def _check_parent(
41
+ self,
42
+ parent_cte: CTE,
43
+ candidate: Conditional,
44
+ inverse_map: dict[str, list[CTE]],
45
+ ):
46
+ conditions = {x.address for x in candidate.concept_arguments}
47
+ if is_child_of(candidate, parent_cte.condition):
48
+ return False
49
+
50
+ materialized = {k for k, v in parent_cte.source_map.items() if v != []}
51
+ if not conditions or not materialized:
52
+ return False
53
+ # if it's a root datasource, we can filter on _any_ of the output concepts
54
+ if parent_cte.is_root_datasource:
55
+ extra_check = {
56
+ x.address for x in parent_cte.source.datasources[0].output_concepts
57
+ }
58
+ if conditions.issubset(extra_check):
59
+ for x in conditions:
60
+ if x not in materialized:
61
+ materialized.add(x)
62
+ parent_cte.source_map[x] = [
63
+ parent_cte.source.datasources[0].name
64
+ ]
65
+ if conditions.issubset(materialized):
66
+ children = inverse_map.get(parent_cte.name, [])
67
+ if all([is_child_of(candidate, child.condition) for child in children]):
68
+ self.log(
69
+ f"All concepts are found on {parent_cte.name} with existing {parent_cte.condition} and all it's {len(children)} children include same filter; pushing up {candidate}"
70
+ )
71
+ if parent_cte.condition:
72
+ parent_cte.condition = Conditional(
73
+ left=parent_cte.condition,
74
+ operator=BooleanOperator.AND,
75
+ right=candidate,
76
+ )
77
+ else:
78
+ parent_cte.condition = candidate
79
+ return True
80
+ self.debug(
81
+ f"conditions {conditions} not subset of parent {parent_cte.name} parent has {materialized} "
82
+ )
83
+ return False
84
+
33
85
  def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
86
+ optimized = False
34
87
 
35
88
  if not cte.parent_ctes:
36
89
  self.debug(f"No parent CTEs for {cte.name}")
37
90
 
38
91
  return False
39
92
 
40
- optimized = False
41
93
  if not cte.condition:
42
94
  self.debug(f"No CTE condition for {cte.name}")
43
95
  return False
44
- self.log(
96
+ if all(
97
+ [
98
+ is_child_of(cte.condition, parent_cte.condition)
99
+ for parent_cte in cte.parent_ctes
100
+ ]
101
+ ) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
102
+ self.log(
103
+ f"All parents of {cte.name} have same filter, removing filter from {cte.name}"
104
+ )
105
+ cte.condition = None
106
+ return True
107
+ else:
108
+ mapping = {
109
+ parent.name: is_child_of(cte.condition, parent.condition)
110
+ for parent in cte.parent_ctes
111
+ }
112
+ self.log(
113
+ f"Could not remove filter from {cte.name}, as not all parents have the same filter: {mapping}"
114
+ )
115
+ if self.complete.get(cte.name):
116
+ self.debug("Have done this CTE before")
117
+ return False
118
+
119
+ self.debug(
45
120
  f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
46
121
  )
47
122
  if isinstance(cte.condition, Conditional):
48
123
  candidates = cte.condition.decompose()
49
124
  else:
50
125
  candidates = [cte.condition]
51
- self.log(f"Have {len(candidates)} candidates to try to push down")
126
+ self.debug(
127
+ f"Have {len(candidates)} candidates to try to push down from parent {type(cte.condition)}"
128
+ )
129
+ optimized = False
52
130
  for candidate in candidates:
53
- conditions = {x.address for x in candidate.concept_arguments}
131
+ self.debug(f"Checking candidate {candidate}")
54
132
  for parent_cte in cte.parent_ctes:
55
- if is_child_of(cte.condition, parent_cte.condition):
56
- continue
57
- materialized = {k for k, v in parent_cte.source_map.items() if v != []}
58
- # if it's a root datasource, we can filter on _any_ of the output concepts
59
- if parent_cte.is_root_datasource:
60
- extra_check = {
61
- x.address
62
- for x in parent_cte.source.datasources[0].output_concepts
63
- }
64
- if conditions.issubset(extra_check):
65
- for x in conditions:
66
- if x not in materialized:
67
- materialized.add(x)
68
- parent_cte.source_map[x] = [
69
- parent_cte.source.datasources[0].name
70
- ]
71
- if conditions.issubset(materialized):
72
- if all(
73
- [
74
- is_child_of(candidate, child.condition)
75
- for child in inverse_map.get(parent_cte.name, [])
76
- ]
77
- ):
78
- self.log(
79
- f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
80
- )
81
- if parent_cte.condition:
82
- parent_cte.condition = Conditional(
83
- left=parent_cte.condition,
84
- operator=BooleanOperator.AND,
85
- right=candidate,
86
- )
87
- else:
88
- parent_cte.condition = candidate
89
- optimized = True
90
- else:
91
- self.log(
92
- f"conditions {conditions} not subset of parent {parent_cte.name} parent has {materialized} "
93
- )
94
-
95
- if all(
96
- [
97
- is_child_of(cte.condition, parent_cte.condition)
98
- for parent_cte in cte.parent_ctes
99
- ]
100
- ) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
101
- self.log("All parents have same filter, removing filter")
102
- cte.condition = None
103
- optimized = True
133
+ local_pushdown = self._check_parent(
134
+ parent_cte=parent_cte, candidate=candidate, inverse_map=inverse_map
135
+ )
136
+ optimized = optimized or local_pushdown
137
+ if local_pushdown:
138
+ # taint a CTE again when something is pushed up to it.
139
+ self.complete[parent_cte.name] = False
140
+ self.debug(
141
+ f"Pushed down {candidate} from {cte.name} to {parent_cte.name}"
142
+ )
104
143
 
144
+ self.complete[cte.name] = True
105
145
  return optimized
@@ -27,7 +27,6 @@ from trilogy.core.processing.node_generators import (
27
27
  gen_group_to_node,
28
28
  gen_rowset_node,
29
29
  gen_multiselect_node,
30
- gen_concept_merge_node,
31
30
  )
32
31
 
33
32
  from enum import Enum
@@ -68,7 +67,6 @@ def get_priority_concept(
68
67
  # sometimes we need to scan intermediate concepts to get merge keys, so fall back
69
68
  # to exhaustive search
70
69
  pass_two = [c for c in all_concepts if c.address not in attempted_addresses]
71
-
72
70
  for remaining_concept in (pass_one, pass_two):
73
71
  priority = (
74
72
  # find anything that needs no joins first, so we can exit early
@@ -79,9 +77,6 @@ def get_priority_concept(
79
77
  and c.granularity == Granularity.SINGLE_ROW
80
78
  ]
81
79
  +
82
- # anything that requires merging concept universes
83
- [c for c in remaining_concept if c.derivation == PurposeLineage.MERGE]
84
- +
85
80
  # then multiselects to remove them from scope
86
81
  [c for c in remaining_concept if c.derivation == PurposeLineage.MULTISELECT]
87
82
  +
@@ -174,7 +169,9 @@ def get_priority_concept(
174
169
 
175
170
 
176
171
  def generate_candidates_restrictive(
177
- priority_concept: Concept, candidates: list[Concept], exhausted: set[str]
172
+ priority_concept: Concept,
173
+ candidates: list[Concept],
174
+ exhausted: set[str],
178
175
  ) -> List[List[Concept]]:
179
176
  # if it's single row, joins are irrelevant. Fetch without keys.
180
177
  if priority_concept.granularity == Granularity.SINGLE_ROW:
@@ -216,6 +213,7 @@ def generate_node(
216
213
  fail_if_not_found=False,
217
214
  accept_partial=accept_partial,
218
215
  accept_partial_optional=False,
216
+ source_concepts=source_concepts,
219
217
  )
220
218
 
221
219
  if candidate:
@@ -273,14 +271,6 @@ def generate_node(
273
271
  return gen_multiselect_node(
274
272
  concept, local_optional, environment, g, depth + 1, source_concepts, history
275
273
  )
276
- elif concept.derivation == PurposeLineage.MERGE:
277
- logger.info(
278
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
279
- )
280
- node = gen_concept_merge_node(
281
- concept, local_optional, environment, g, depth + 1, source_concepts, history
282
- )
283
- return node
284
274
  elif concept.derivation == PurposeLineage.CONSTANT:
285
275
  logger.info(
286
276
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
@@ -331,17 +321,60 @@ def generate_node(
331
321
  fail_if_not_found=False,
332
322
  accept_partial=accept_partial,
333
323
  accept_partial_optional=True,
324
+ source_concepts=source_concepts,
334
325
  )
335
326
  else:
336
327
  raise ValueError(f"Unknown derivation {concept.derivation}")
337
328
 
338
329
 
330
+ def validate_concept(
331
+ concept: Concept,
332
+ node: StrategyNode,
333
+ found_addresses: set[str],
334
+ non_partial_addresses: set[str],
335
+ partial_addresses: set[str],
336
+ virtual_addresses: set[str],
337
+ found_map: dict[str, set[Concept]],
338
+ accept_partial: bool,
339
+ ):
340
+ found_map[str(node)].add(concept)
341
+ if concept not in node.partial_concepts:
342
+
343
+ found_addresses.add(concept.address)
344
+ non_partial_addresses.add(concept.address)
345
+ # remove it from our partial tracking
346
+ if concept.address in partial_addresses:
347
+ partial_addresses.remove(concept.address)
348
+ if concept.address in virtual_addresses:
349
+ virtual_addresses.remove(concept.address)
350
+ if concept in node.partial_concepts:
351
+ if concept.address in non_partial_addresses:
352
+ return None
353
+ partial_addresses.add(concept.address)
354
+ if accept_partial:
355
+ found_addresses.add(concept.address)
356
+ found_map[str(node)].add(concept)
357
+ for _, v in concept.pseudonyms.items():
358
+ if v.address == concept.address:
359
+ return
360
+ validate_concept(
361
+ v,
362
+ node,
363
+ found_addresses,
364
+ non_partial_addresses,
365
+ partial_addresses,
366
+ virtual_addresses,
367
+ found_map,
368
+ accept_partial,
369
+ )
370
+
371
+
339
372
  def validate_stack(
340
373
  stack: List[StrategyNode],
341
374
  concepts: List[Concept],
342
375
  accept_partial: bool = False,
343
376
  ) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
344
- found_map = defaultdict(set)
377
+ found_map: dict[str, set[Concept]] = defaultdict(set)
345
378
  found_addresses: set[str] = set()
346
379
  non_partial_addresses: set[str] = set()
347
380
  partial_addresses: set[str] = set()
@@ -349,27 +382,22 @@ def validate_stack(
349
382
  for node in stack:
350
383
  resolved = node.resolve()
351
384
  for concept in resolved.output_concepts:
352
- found_map[str(node)].add(concept)
353
- if concept not in node.partial_concepts:
354
- found_addresses.add(concept.address)
355
- non_partial_addresses.add(concept.address)
356
- # remove it from our partial tracking
357
- if concept.address in partial_addresses:
358
- partial_addresses.remove(concept.address)
359
- if concept.address in virtual_addresses:
360
- virtual_addresses.remove(concept.address)
361
- if concept in node.partial_concepts:
362
- if concept.address in non_partial_addresses:
363
- continue
364
- partial_addresses.add(concept.address)
365
- if accept_partial:
366
- found_addresses.add(concept.address)
367
- found_map[str(node)].add(concept)
385
+ validate_concept(
386
+ concept,
387
+ node,
388
+ found_addresses,
389
+ non_partial_addresses,
390
+ partial_addresses,
391
+ virtual_addresses,
392
+ found_map,
393
+ accept_partial,
394
+ )
368
395
  for concept in node.virtual_output_concepts:
369
396
  if concept.address in non_partial_addresses:
370
397
  continue
371
398
  found_addresses.add(concept.address)
372
399
  virtual_addresses.add(concept.address)
400
+
373
401
  # zip in those we know we found
374
402
  if not all([c.address in found_addresses for c in concepts]):
375
403
  return (
@@ -379,7 +407,8 @@ def validate_stack(
379
407
  partial_addresses,
380
408
  virtual_addresses,
381
409
  )
382
- graph_count, graphs = get_disconnected_components(found_map)
410
+
411
+ graph_count, _ = get_disconnected_components(found_map)
383
412
  if graph_count in (0, 1):
384
413
  return (
385
414
  ValidationResult.COMPLETE,
@@ -415,7 +444,7 @@ def search_concepts(
415
444
  hist = history.get_history(mandatory_list, accept_partial)
416
445
  if hist is not False:
417
446
  logger.info(
418
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Returning search node from history for {[c.address for c in mandatory_list]} with accept_partial {accept_partial}"
447
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Returning search node from history ({'exists' if hist is not None else 'does not exist'}) for {[c.address for c in mandatory_list]} with accept_partial {accept_partial}"
419
448
  )
420
449
  assert not isinstance(hist, bool)
421
450
  return hist
@@ -445,6 +474,7 @@ def _search_concepts(
445
474
  ) -> StrategyNode | None:
446
475
 
447
476
  mandatory_list = unique(mandatory_list, "address")
477
+
448
478
  all_mandatory = set(c.address for c in mandatory_list)
449
479
  attempted: set[str] = set()
450
480
 
@@ -457,6 +487,7 @@ def _search_concepts(
457
487
  priority_concept = get_priority_concept(
458
488
  mandatory_list, attempted, found_concepts=found, depth=depth
459
489
  )
490
+
460
491
  logger.info(
461
492
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)}"
462
493
  )
@@ -467,16 +498,16 @@ def _search_concepts(
467
498
  candidate_lists = generate_candidates_restrictive(
468
499
  priority_concept, candidates, skip
469
500
  )
470
- for list in candidate_lists:
501
+ for clist in candidate_lists:
471
502
  logger.info(
472
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {str(priority_concept)}, accept_partial {accept_partial} optional {[str(v) for v in list]}, exhausted {[str(c) for c in skip]}"
503
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Beginning sourcing loop for {str(priority_concept)}, accept_partial {accept_partial}, optional {[str(v) for v in clist]}, exhausted {[str(c) for c in skip]}"
473
504
  )
474
505
  node = generate_node(
475
506
  priority_concept,
476
- list,
507
+ clist,
477
508
  environment,
478
509
  g,
479
- depth + 1,
510
+ depth,
480
511
  source_concepts=search_concepts,
481
512
  accept_partial=accept_partial,
482
513
  history=history,
@@ -494,7 +525,6 @@ def _search_concepts(
494
525
  PurposeLineage.ROWSET,
495
526
  PurposeLineage.BASIC,
496
527
  PurposeLineage.MULTISELECT,
497
- PurposeLineage.MERGE,
498
528
  ]:
499
529
  skip.add(priority_concept.address)
500
530
  break
@@ -504,7 +534,7 @@ def _search_concepts(
504
534
  )
505
535
 
506
536
  logger.info(
507
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is "
537
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is"
508
538
  f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}"
509
539
  )
510
540
  # early exit if we have a complete stack with one node
@@ -29,10 +29,10 @@ def extract_required_subgraphs(
29
29
  def extract_mandatory_subgraphs(paths: Dict[str, List[str]], g) -> List[List[Concept]]:
30
30
  final: list[list[str]] = []
31
31
  assocs: defaultdict[str, list] = defaultdict(list)
32
- for path in paths:
33
- extract_required_subgraphs(assocs, paths[path])
32
+ for path in paths.values():
33
+ extract_required_subgraphs(assocs, path)
34
34
 
35
- for k, v in assocs.items():
35
+ for _, v in assocs.items():
36
36
  final.append(v)
37
37
  final_concepts = []
38
38
  for value in final:
@@ -8,7 +8,6 @@ from .unnest_node import gen_unnest_node
8
8
  from .node_merge_node import gen_merge_node
9
9
  from .rowset_node import gen_rowset_node
10
10
  from .multiselect_node import gen_multiselect_node
11
- from .concept_merge_node import gen_concept_merge_node
12
11
 
13
12
  __all__ = [
14
13
  "gen_filter_node",
@@ -21,5 +20,4 @@ __all__ = [
21
20
  "gen_group_to_node",
22
21
  "gen_rowset_node",
23
22
  "gen_multiselect_node",
24
- "gen_concept_merge_node",
25
23
  ]