pytrilogy 0.0.1.109__py3-none-any.whl → 0.0.1.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (34) hide show
  1. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +34 -34
  3. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +11 -3
  6. trilogy/core/enums.py +1 -0
  7. trilogy/core/models.py +94 -67
  8. trilogy/core/optimization.py +134 -12
  9. trilogy/core/processing/concept_strategies_v3.py +44 -19
  10. trilogy/core/processing/node_generators/basic_node.py +2 -0
  11. trilogy/core/processing/node_generators/common.py +3 -1
  12. trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  13. trilogy/core/processing/node_generators/filter_node.py +36 -6
  14. trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  15. trilogy/core/processing/node_generators/rowset_node.py +37 -8
  16. trilogy/core/processing/node_generators/select_node.py +23 -9
  17. trilogy/core/processing/node_generators/unnest_node.py +24 -3
  18. trilogy/core/processing/node_generators/window_node.py +4 -2
  19. trilogy/core/processing/nodes/__init__.py +7 -6
  20. trilogy/core/processing/nodes/base_node.py +40 -6
  21. trilogy/core/processing/nodes/filter_node.py +15 -1
  22. trilogy/core/processing/nodes/group_node.py +20 -1
  23. trilogy/core/processing/nodes/merge_node.py +37 -10
  24. trilogy/core/processing/nodes/select_node_v2.py +34 -39
  25. trilogy/core/processing/nodes/unnest_node.py +12 -0
  26. trilogy/core/processing/nodes/window_node.py +11 -0
  27. trilogy/core/processing/utility.py +0 -14
  28. trilogy/core/query_processor.py +125 -29
  29. trilogy/dialect/base.py +45 -40
  30. trilogy/executor.py +31 -3
  31. trilogy/parsing/parse_engine.py +49 -17
  32. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
  33. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
  34. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
@@ -4,24 +4,32 @@ from trilogy.core.models import (
4
4
  PersistStatement,
5
5
  Datasource,
6
6
  MultiSelectStatement,
7
+ Conditional,
8
+ BooleanOperator,
7
9
  )
8
10
  from trilogy.core.enums import PurposeLineage
9
- from trilogy.constants import logger
11
+ from trilogy.constants import logger, CONFIG
10
12
  from abc import ABC
11
13
 
12
14
 
15
+ REGISTERED_RULES: list["OptimizationRule"] = []
16
+
17
+
13
18
  class OptimizationRule(ABC):
14
19
 
15
- def optimize(self, cte: CTE) -> bool:
20
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
16
21
  raise NotImplementedError
17
22
 
18
23
  def log(self, message: str):
19
24
  logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
20
25
 
26
+ def debug(self, message: str):
27
+ logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
28
+
21
29
 
22
30
  class InlineDatasource(OptimizationRule):
23
31
 
24
- def optimize(self, cte: CTE) -> bool:
32
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
25
33
  if not cte.parent_ctes:
26
34
  return False
27
35
 
@@ -30,6 +38,7 @@ class InlineDatasource(OptimizationRule):
30
38
  f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
31
39
  )
32
40
  to_inline: list[CTE] = []
41
+ force_group = False
33
42
  for parent_cte in cte.parent_ctes:
34
43
  if not parent_cte.is_root_datasource:
35
44
  self.log(f"parent {parent_cte.name} is not root")
@@ -47,23 +56,114 @@ class InlineDatasource(OptimizationRule):
47
56
  continue
48
57
  root_outputs = {x.address for x in root.output_concepts}
49
58
  cte_outputs = {x.address for x in parent_cte.output_columns}
59
+ grain_components = {x.address for x in root.grain.components}
50
60
  if not cte_outputs.issubset(root_outputs):
51
61
  self.log(f"Not all {parent_cte.name} outputs are found on datasource")
52
62
  continue
53
-
63
+ if not grain_components.issubset(cte_outputs):
64
+ self.log("Not all datasource components in cte outputs, forcing group")
65
+ force_group = True
54
66
  to_inline.append(parent_cte)
55
67
 
56
68
  for replaceable in to_inline:
57
69
  self.log(f"Inlining parent {replaceable.name}")
58
- cte.inline_parent_datasource(replaceable)
70
+ cte.inline_parent_datasource(replaceable, force_group=force_group)
59
71
 
60
72
  return optimized
61
73
 
62
74
 
63
- REGISTERED_RULES: list[OptimizationRule] = [InlineDatasource()]
75
+ # This will be used in the future for more complex condition decomposition
76
+ def decompose_condition(conditional: Conditional):
77
+ chunks = []
78
+ if conditional.operator == BooleanOperator.AND:
79
+ for val in [conditional.left, conditional.right]:
80
+ if isinstance(val, Conditional):
81
+ chunks.extend(decompose_condition(val))
82
+ else:
83
+ chunks.append(val)
84
+ else:
85
+ chunks.append(conditional)
86
+ return chunks
87
+
88
+
89
+ def is_child_of(a, comparison):
90
+ if isinstance(comparison, Conditional):
91
+ return (
92
+ is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
93
+ ) and comparison.operator == BooleanOperator.AND
94
+ return comparison == a
95
+
96
+
97
+ class PredicatePushdown(OptimizationRule):
64
98
 
99
+ def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
65
100
 
66
- def filter_irrelevant_ctes(input: list[CTE], root_cte: CTE):
101
+ if not cte.parent_ctes:
102
+ self.debug(f"No parent CTEs for {cte.name}")
103
+
104
+ return False
105
+
106
+ optimized = False
107
+ if not cte.condition:
108
+ self.debug(f"No CTE condition for {cte.name}")
109
+ return False
110
+ self.log(
111
+ f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
112
+ )
113
+ if isinstance(cte.condition, Conditional):
114
+ candidates = cte.condition.decompose()
115
+ else:
116
+ candidates = [cte.condition]
117
+ logger.info(f"Have {len(candidates)} candidates to try to push down")
118
+ for candidate in candidates:
119
+ conditions = {x.address for x in candidate.concept_arguments}
120
+ for parent_cte in cte.parent_ctes:
121
+ materialized = {k for k, v in parent_cte.source_map.items() if v != []}
122
+ if conditions.issubset(materialized):
123
+ if all(
124
+ [
125
+ is_child_of(candidate, child.condition)
126
+ for child in inverse_map[parent_cte.name]
127
+ ]
128
+ ):
129
+ self.log(
130
+ f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
131
+ )
132
+ if parent_cte.condition:
133
+ parent_cte.condition = Conditional(
134
+ left=parent_cte.condition,
135
+ operator=BooleanOperator.AND,
136
+ right=candidate,
137
+ )
138
+ else:
139
+ parent_cte.condition = candidate
140
+ optimized = True
141
+ else:
142
+ logger.info("conditions not subset of parent materialized")
143
+
144
+ if all(
145
+ [
146
+ is_child_of(cte.condition, parent_cte.condition)
147
+ for parent_cte in cte.parent_ctes
148
+ ]
149
+ ):
150
+ self.log("All parents have same filter, removing filter")
151
+ cte.condition = None
152
+ optimized = True
153
+
154
+ return optimized
155
+
156
+
157
+ if CONFIG.optimizations.datasource_inlining:
158
+ REGISTERED_RULES.append(InlineDatasource())
159
+ if CONFIG.optimizations.predicate_pushdown:
160
+ REGISTERED_RULES.append(PredicatePushdown())
161
+
162
+
163
+ def filter_irrelevant_ctes(
164
+ input: list[CTE],
165
+ root_cte: CTE,
166
+ ):
67
167
  relevant_ctes = set()
68
168
 
69
169
  def recurse(cte: CTE):
@@ -75,6 +175,16 @@ def filter_irrelevant_ctes(input: list[CTE], root_cte: CTE):
75
175
  return [cte for cte in input if cte.name in relevant_ctes]
76
176
 
77
177
 
178
+ def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
179
+ inverse_map: dict[str, list[CTE]] = {}
180
+ for cte in input:
181
+ for parent in cte.parent_ctes:
182
+ if parent.name not in inverse_map:
183
+ inverse_map[parent.name] = []
184
+ inverse_map[parent.name].append(cte)
185
+ return inverse_map
186
+
187
+
78
188
  def is_direct_return_eligible(
79
189
  cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
80
190
  ) -> bool:
@@ -94,6 +204,8 @@ def is_direct_return_eligible(
94
204
  for x in derived_concepts:
95
205
  if x.derivation == PurposeLineage.WINDOW:
96
206
  return False
207
+ if x.derivation == PurposeLineage.UNNEST:
208
+ return False
97
209
  if x.derivation == PurposeLineage.AGGREGATE:
98
210
  if x.address in conditions:
99
211
  return False
@@ -126,15 +238,25 @@ def optimize_ctes(
126
238
  actions_taken = False
127
239
  for rule in REGISTERED_RULES:
128
240
  for cte in input:
129
- actions_taken = rule.optimize(cte)
241
+ inverse_map = gen_inverse_map(input)
242
+ actions_taken = rule.optimize(cte, inverse_map)
130
243
  complete = not actions_taken
131
244
 
132
- if is_direct_return_eligible(root_cte, select):
245
+ if CONFIG.optimizations.direct_return and is_direct_return_eligible(
246
+ root_cte, select
247
+ ):
133
248
  root_cte.order_by = select.order_by
134
249
  root_cte.limit = select.limit
135
- root_cte.condition = (
136
- select.where_clause.conditional if select.where_clause else None
137
- )
250
+ if select.where_clause:
251
+
252
+ if root_cte.condition:
253
+ root_cte.condition = Conditional(
254
+ left=root_cte.condition,
255
+ operator=BooleanOperator.AND,
256
+ right=select.where_clause.conditional,
257
+ )
258
+ else:
259
+ root_cte.condition = select.where_clause.conditional
138
260
  root_cte.requires_nesting = False
139
261
  sort_select_output(root_cte, select)
140
262
 
@@ -1,7 +1,6 @@
1
1
  from collections import defaultdict
2
2
  from typing import List, Optional, Callable
3
3
 
4
-
5
4
  from trilogy.constants import logger
6
5
  from trilogy.core.enums import PurposeLineage, Granularity, FunctionType
7
6
  from trilogy.core.env_processor import generate_graph
@@ -278,9 +277,10 @@ def generate_node(
278
277
  logger.info(
279
278
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
280
279
  )
281
- return gen_concept_merge_node(
280
+ node = gen_concept_merge_node(
282
281
  concept, local_optional, environment, g, depth + 1, source_concepts, history
283
282
  )
283
+ return node
284
284
  elif concept.derivation == PurposeLineage.CONSTANT:
285
285
  logger.info(
286
286
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
@@ -340,13 +340,15 @@ def validate_stack(
340
340
  stack: List[StrategyNode],
341
341
  concepts: List[Concept],
342
342
  accept_partial: bool = False,
343
- ) -> tuple[ValidationResult, set[str], set[str], set[str]]:
343
+ ) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
344
344
  found_map = defaultdict(set)
345
345
  found_addresses: set[str] = set()
346
346
  non_partial_addresses: set[str] = set()
347
347
  partial_addresses: set[str] = set()
348
+ virtual_addresses: set[str] = set()
348
349
  for node in stack:
349
- for concept in node.resolve().output_concepts:
350
+ resolved = node.resolve()
351
+ for concept in resolved.output_concepts:
350
352
  found_map[str(node)].add(concept)
351
353
  if concept not in node.partial_concepts:
352
354
  found_addresses.add(concept.address)
@@ -354,11 +356,20 @@ def validate_stack(
354
356
  # remove it from our partial tracking
355
357
  if concept.address in partial_addresses:
356
358
  partial_addresses.remove(concept.address)
359
+ if concept.address in virtual_addresses:
360
+ virtual_addresses.remove(concept.address)
357
361
  if concept in node.partial_concepts:
362
+ if concept.address in non_partial_addresses:
363
+ continue
358
364
  partial_addresses.add(concept.address)
359
365
  if accept_partial:
360
366
  found_addresses.add(concept.address)
361
367
  found_map[str(node)].add(concept)
368
+ for concept in node.virtual_output_concepts:
369
+ if concept.address in non_partial_addresses:
370
+ continue
371
+ found_addresses.add(concept.address)
372
+ virtual_addresses.add(concept.address)
362
373
  # zip in those we know we found
363
374
  if not all([c.address in found_addresses for c in concepts]):
364
375
  return (
@@ -366,12 +377,25 @@ def validate_stack(
366
377
  found_addresses,
367
378
  {c.address for c in concepts if c.address not in found_addresses},
368
379
  partial_addresses,
380
+ virtual_addresses,
369
381
  )
370
382
  graph_count, graphs = get_disconnected_components(found_map)
371
383
  if graph_count in (0, 1):
372
- return ValidationResult.COMPLETE, found_addresses, set(), partial_addresses
384
+ return (
385
+ ValidationResult.COMPLETE,
386
+ found_addresses,
387
+ set(),
388
+ partial_addresses,
389
+ virtual_addresses,
390
+ )
373
391
  # if we have too many subgraphs, we need to keep searching
374
- return ValidationResult.DISCONNECTED, found_addresses, set(), partial_addresses
392
+ return (
393
+ ValidationResult.DISCONNECTED,
394
+ found_addresses,
395
+ set(),
396
+ partial_addresses,
397
+ virtual_addresses,
398
+ )
375
399
 
376
400
 
377
401
  def depth_to_prefix(depth: int) -> str:
@@ -404,7 +428,10 @@ def search_concepts(
404
428
  accept_partial=accept_partial,
405
429
  history=history,
406
430
  )
407
- history.search_to_history(mandatory_list, accept_partial, result)
431
+ # a node may be mutated after be cached; always store a copy
432
+ history.search_to_history(
433
+ mandatory_list, accept_partial, result.copy() if result else None
434
+ )
408
435
  return result
409
436
 
410
437
 
@@ -472,13 +499,13 @@ def _search_concepts(
472
499
  skip.add(priority_concept.address)
473
500
  break
474
501
  attempted.add(priority_concept.address)
475
- complete, found, missing, partial = validate_stack(
502
+ complete, found, missing, partial, virtual = validate_stack(
476
503
  stack, mandatory_list, accept_partial
477
504
  )
478
505
 
479
506
  logger.info(
480
507
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is "
481
- f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial}), attempted {attempted}"
508
+ f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}"
482
509
  )
483
510
  # early exit if we have a complete stack with one node
484
511
  # we can only early exit if we have a complete stack
@@ -489,7 +516,7 @@ def _search_concepts(
489
516
  break
490
517
 
491
518
  logger.info(
492
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}"
519
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}, virtual {virtual}"
493
520
  )
494
521
  if complete == ValidationResult.COMPLETE:
495
522
  all_partial = [
@@ -503,24 +530,22 @@ def _search_concepts(
503
530
  ]
504
531
  )
505
532
  ]
533
+ non_virtual = [c for c in mandatory_list if c.address not in virtual]
506
534
  if len(stack) == 1:
535
+ output = stack[0]
507
536
  logger.info(
508
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning just that node"
537
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
509
538
  )
510
- return stack[0]
539
+ return output
511
540
 
512
541
  output = MergeNode(
513
- input_concepts=mandatory_list,
514
- output_concepts=mandatory_list,
542
+ input_concepts=non_virtual,
543
+ output_concepts=non_virtual,
515
544
  environment=environment,
516
545
  g=g,
517
546
  parents=stack,
518
547
  depth=depth,
519
548
  partial_concepts=all_partial,
520
- # always hide merge concepts
521
- hidden_concepts=[
522
- x for x in mandatory_list if x.derivation == PurposeLineage.MERGE
523
- ],
524
549
  )
525
550
 
526
551
  # ensure we can resolve our final merge
@@ -573,7 +598,7 @@ def _search_concepts(
573
598
  )
574
599
  return partial_search
575
600
  logger.error(
576
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found}"
601
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found},"
577
602
  )
578
603
  return None
579
604
 
@@ -8,6 +8,7 @@ from trilogy.core.processing.nodes import StrategyNode, History, MergeNode
8
8
  from trilogy.core.processing.node_generators.common import (
9
9
  resolve_function_parent_concepts,
10
10
  )
11
+ from trilogy.utility import unique
11
12
  from trilogy.constants import logger
12
13
 
13
14
  LOGGER_PREFIX = "[GEN_BASIC_NODE]"
@@ -37,6 +38,7 @@ def gen_basic_node(
37
38
  attempts.append((parent_concepts + local_optional, local_optional + [concept]))
38
39
 
39
40
  for attempt, output in reversed(attempts):
41
+ attempt = unique(attempt, "address")
40
42
  parent_node = source_concepts(
41
43
  mandatory_list=attempt,
42
44
  environment=environment,
@@ -56,7 +56,9 @@ def resolve_filter_parent_concepts(
56
56
  base_existence = []
57
57
  base_rows = [direct_parent]
58
58
  base_rows += concept.lineage.where.row_arguments
59
- base_existence += concept.lineage.where.existence_arguments
59
+ # TODO: pass tuple groups through
60
+ for ctuple in concept.lineage.where.existence_arguments:
61
+ base_existence += list(ctuple)
60
62
  if direct_parent.grain:
61
63
  base_rows += direct_parent.grain.components_copy
62
64
  if (
@@ -56,6 +56,7 @@ def gen_concept_merge_node(
56
56
 
57
57
  # get additional concepts that should be merged across the environments
58
58
  additional_merge: List[Concept] = [*lineage.concepts]
59
+ target_namespaces = set(x.namespace for x in [concept] + local_optional)
59
60
  for x in local_optional:
60
61
  if x.address in environment.merged_concepts:
61
62
  ms = environment.merged_concepts[x.address].lineage
@@ -64,6 +65,8 @@ def gen_concept_merge_node(
64
65
 
65
66
  for select in lineage.concepts:
66
67
  # if it's a merge concept, filter it out of the optional
68
+ if select.namespace not in target_namespaces:
69
+ continue
67
70
  sub_optional = [
68
71
  x
69
72
  for x in local_optional
@@ -76,6 +79,9 @@ def gen_concept_merge_node(
76
79
  ]
77
80
  sub_optional += sub_additional_merge
78
81
  final: List[Concept] = unique([select] + sub_optional, "address")
82
+ logger.info(
83
+ f"{padding(depth)}{LOGGER_PREFIX} generating concept merge parent node with {[x.address for x in final]}"
84
+ )
79
85
  snode: StrategyNode = source_concepts(
80
86
  mandatory_list=final,
81
87
  environment=environment,
@@ -111,17 +117,18 @@ def gen_concept_merge_node(
111
117
 
112
118
  additional_relevant = [x for x in outputs if x.address in enrichment]
113
119
  final_outputs = outputs + additional_relevant + [concept]
120
+ virtual_outputs = [x for x in final_outputs if x.derivation == PurposeLineage.MERGE]
114
121
  node = MergeNode(
115
122
  input_concepts=[x for y in base_parents for x in y.output_concepts],
116
- output_concepts=[x for x in final_outputs],
117
- hidden_concepts=[
118
- x for x in final_outputs if x.derivation == PurposeLineage.MERGE
123
+ output_concepts=[
124
+ x for x in final_outputs if x.derivation != PurposeLineage.MERGE
119
125
  ],
120
126
  environment=environment,
121
127
  g=g,
122
128
  depth=depth,
123
129
  parents=base_parents,
124
130
  node_joins=node_joins,
131
+ virtual_output_concepts=virtual_outputs,
125
132
  )
126
133
 
127
134
  qds = node.rebuild_cache()
@@ -149,9 +156,17 @@ def gen_concept_merge_node(
149
156
  f"{padding(depth)}{LOGGER_PREFIX} all enriched concepts returned from base merge concept node; exiting early"
150
157
  )
151
158
  return node
159
+ missing = [
160
+ x
161
+ for x in local_optional
162
+ if x.address not in [y.address for y in node.output_concepts]
163
+ ]
164
+ logger.info(
165
+ f"{padding(depth)}{LOGGER_PREFIX} generating merge concept enrichment node for missing {[x.address for x in missing]}"
166
+ )
152
167
  enrich_node: MergeNode = source_concepts( # this fetches the parent + join keys
153
168
  # to then connect to the rest of the query
154
- mandatory_list=additional_relevant + local_optional,
169
+ mandatory_list=additional_relevant + missing,
155
170
  environment=environment,
156
171
  g=g,
157
172
  depth=depth + 1,
@@ -159,7 +174,7 @@ def gen_concept_merge_node(
159
174
  )
160
175
  if not enrich_node:
161
176
  logger.info(
162
- f"{padding(depth)}{LOGGER_PREFIX} Cannot generate merge concept enrichment node for {concept} with optional {local_optional}, returning just merge concept"
177
+ f"{padding(depth)}{LOGGER_PREFIX} Cannot generate merge concept enrichment node for {concept.address} with optional {[x.address for x in local_optional]}, returning just merge concept"
163
178
  )
164
179
  return node
165
180
 
@@ -170,12 +185,12 @@ def gen_concept_merge_node(
170
185
  return MergeNode(
171
186
  input_concepts=enrich_node.output_concepts + node.output_concepts,
172
187
  # also filter out the
173
- output_concepts=node.output_concepts + local_optional,
174
- hidden_concepts=[
188
+ output_concepts=[
175
189
  x
176
190
  for x in node.output_concepts + local_optional
177
- if x.derivation == PurposeLineage.MERGE
191
+ if x.derivation != PurposeLineage.MERGE
178
192
  ],
193
+ hidden_concepts=[],
179
194
  environment=environment,
180
195
  g=g,
181
196
  depth=depth,
@@ -195,4 +210,5 @@ def gen_concept_merge_node(
195
210
  )
196
211
  ],
197
212
  partial_concepts=node.partial_concepts,
213
+ virtual_output_concepts=virtual_outputs,
198
214
  )
@@ -2,11 +2,14 @@ from typing import List
2
2
 
3
3
 
4
4
  from trilogy.core.enums import JoinType
5
- from trilogy.core.models import (
6
- Concept,
7
- Environment,
5
+ from trilogy.core.models import Concept, Environment, FilterItem
6
+ from trilogy.core.processing.nodes import (
7
+ FilterNode,
8
+ MergeNode,
9
+ NodeJoin,
10
+ History,
11
+ StrategyNode,
8
12
  )
9
- from trilogy.core.processing.nodes import FilterNode, MergeNode, NodeJoin, History
10
13
  from trilogy.core.processing.node_generators.common import (
11
14
  resolve_filter_parent_concepts,
12
15
  )
@@ -25,16 +28,19 @@ def gen_filter_node(
25
28
  depth: int,
26
29
  source_concepts,
27
30
  history: History | None = None,
28
- ) -> MergeNode | FilterNode | None:
31
+ ) -> StrategyNode | None:
29
32
  immediate_parent, parent_row_concepts, parent_existence_concepts = (
30
33
  resolve_filter_parent_concepts(concept)
31
34
  )
35
+ if not isinstance(concept.lineage, FilterItem):
36
+ raise SyntaxError('Filter node must have a lineage of type "FilterItem"')
37
+ where = concept.lineage.where
32
38
 
33
39
  logger.info(
34
40
  f"{padding(depth)}{LOGGER_PREFIX} fetching filter node row parents {[x.address for x in parent_row_concepts]}"
35
41
  )
36
42
  core_parents = []
37
- parent = source_concepts(
43
+ parent: StrategyNode = source_concepts(
38
44
  mandatory_list=parent_row_concepts,
39
45
  environment=environment,
40
46
  g=g,
@@ -43,7 +49,28 @@ def gen_filter_node(
43
49
  )
44
50
 
45
51
  if not parent:
52
+ logger.info(
53
+ f"{padding(depth)}{LOGGER_PREFIX} filter node row parents {[x.address for x in parent_row_concepts]} could not be found"
54
+ )
46
55
  return None
56
+
57
+ if not local_optional and not parent_existence_concepts:
58
+ optimized_pushdown = True
59
+ else:
60
+ optimized_pushdown = False
61
+
62
+ if optimized_pushdown:
63
+ if parent.conditions:
64
+ parent.conditions = parent.conditions + where.conditional
65
+ else:
66
+ parent.conditions = where.conditional
67
+ parent.output_concepts = [concept]
68
+ parent.rebuild_cache()
69
+ logger.info(
70
+ f"{padding(depth)}{LOGGER_PREFIX} returning optimized filter node with pushdown to parent with condition {where.conditional}"
71
+ )
72
+ return parent
73
+
47
74
  core_parents.append(parent)
48
75
  if parent_existence_concepts:
49
76
  logger.info(
@@ -57,6 +84,9 @@ def gen_filter_node(
57
84
  history=history,
58
85
  )
59
86
  if not parent_existence:
87
+ logger.info(
88
+ f"{padding(depth)}{LOGGER_PREFIX} filter existence node parents could not be found"
89
+ )
60
90
  return None
61
91
  core_parents.append(parent_existence)
62
92