pytrilogy 0.0.1.110__py3-none-any.whl → 0.0.1.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (33) hide show
  1. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +33 -33
  3. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +1 -1
  6. trilogy/core/models.py +85 -67
  7. trilogy/core/optimization.py +23 -8
  8. trilogy/core/processing/concept_strategies_v3.py +44 -19
  9. trilogy/core/processing/node_generators/basic_node.py +2 -0
  10. trilogy/core/processing/node_generators/common.py +3 -1
  11. trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  12. trilogy/core/processing/node_generators/filter_node.py +36 -6
  13. trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  14. trilogy/core/processing/node_generators/rowset_node.py +30 -6
  15. trilogy/core/processing/node_generators/select_node.py +23 -9
  16. trilogy/core/processing/node_generators/unnest_node.py +24 -3
  17. trilogy/core/processing/node_generators/window_node.py +4 -2
  18. trilogy/core/processing/nodes/__init__.py +7 -6
  19. trilogy/core/processing/nodes/base_node.py +40 -6
  20. trilogy/core/processing/nodes/filter_node.py +15 -1
  21. trilogy/core/processing/nodes/group_node.py +20 -1
  22. trilogy/core/processing/nodes/merge_node.py +36 -7
  23. trilogy/core/processing/nodes/select_node_v2.py +34 -39
  24. trilogy/core/processing/nodes/unnest_node.py +12 -0
  25. trilogy/core/processing/nodes/window_node.py +11 -0
  26. trilogy/core/processing/utility.py +0 -14
  27. trilogy/core/query_processor.py +125 -29
  28. trilogy/dialect/base.py +45 -40
  29. trilogy/executor.py +31 -3
  30. trilogy/parsing/parse_engine.py +49 -17
  31. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
  32. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
  33. {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
1
1
  from collections import defaultdict
2
2
  from typing import List, Optional, Callable
3
3
 
4
-
5
4
  from trilogy.constants import logger
6
5
  from trilogy.core.enums import PurposeLineage, Granularity, FunctionType
7
6
  from trilogy.core.env_processor import generate_graph
@@ -278,9 +277,10 @@ def generate_node(
278
277
  logger.info(
279
278
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
280
279
  )
281
- return gen_concept_merge_node(
280
+ node = gen_concept_merge_node(
282
281
  concept, local_optional, environment, g, depth + 1, source_concepts, history
283
282
  )
283
+ return node
284
284
  elif concept.derivation == PurposeLineage.CONSTANT:
285
285
  logger.info(
286
286
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
@@ -340,13 +340,15 @@ def validate_stack(
340
340
  stack: List[StrategyNode],
341
341
  concepts: List[Concept],
342
342
  accept_partial: bool = False,
343
- ) -> tuple[ValidationResult, set[str], set[str], set[str]]:
343
+ ) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
344
344
  found_map = defaultdict(set)
345
345
  found_addresses: set[str] = set()
346
346
  non_partial_addresses: set[str] = set()
347
347
  partial_addresses: set[str] = set()
348
+ virtual_addresses: set[str] = set()
348
349
  for node in stack:
349
- for concept in node.resolve().output_concepts:
350
+ resolved = node.resolve()
351
+ for concept in resolved.output_concepts:
350
352
  found_map[str(node)].add(concept)
351
353
  if concept not in node.partial_concepts:
352
354
  found_addresses.add(concept.address)
@@ -354,11 +356,20 @@ def validate_stack(
354
356
  # remove it from our partial tracking
355
357
  if concept.address in partial_addresses:
356
358
  partial_addresses.remove(concept.address)
359
+ if concept.address in virtual_addresses:
360
+ virtual_addresses.remove(concept.address)
357
361
  if concept in node.partial_concepts:
362
+ if concept.address in non_partial_addresses:
363
+ continue
358
364
  partial_addresses.add(concept.address)
359
365
  if accept_partial:
360
366
  found_addresses.add(concept.address)
361
367
  found_map[str(node)].add(concept)
368
+ for concept in node.virtual_output_concepts:
369
+ if concept.address in non_partial_addresses:
370
+ continue
371
+ found_addresses.add(concept.address)
372
+ virtual_addresses.add(concept.address)
362
373
  # zip in those we know we found
363
374
  if not all([c.address in found_addresses for c in concepts]):
364
375
  return (
@@ -366,12 +377,25 @@ def validate_stack(
366
377
  found_addresses,
367
378
  {c.address for c in concepts if c.address not in found_addresses},
368
379
  partial_addresses,
380
+ virtual_addresses,
369
381
  )
370
382
  graph_count, graphs = get_disconnected_components(found_map)
371
383
  if graph_count in (0, 1):
372
- return ValidationResult.COMPLETE, found_addresses, set(), partial_addresses
384
+ return (
385
+ ValidationResult.COMPLETE,
386
+ found_addresses,
387
+ set(),
388
+ partial_addresses,
389
+ virtual_addresses,
390
+ )
373
391
  # if we have too many subgraphs, we need to keep searching
374
- return ValidationResult.DISCONNECTED, found_addresses, set(), partial_addresses
392
+ return (
393
+ ValidationResult.DISCONNECTED,
394
+ found_addresses,
395
+ set(),
396
+ partial_addresses,
397
+ virtual_addresses,
398
+ )
375
399
 
376
400
 
377
401
  def depth_to_prefix(depth: int) -> str:
@@ -404,7 +428,10 @@ def search_concepts(
404
428
  accept_partial=accept_partial,
405
429
  history=history,
406
430
  )
407
- history.search_to_history(mandatory_list, accept_partial, result)
431
+ # a node may be mutated after be cached; always store a copy
432
+ history.search_to_history(
433
+ mandatory_list, accept_partial, result.copy() if result else None
434
+ )
408
435
  return result
409
436
 
410
437
 
@@ -472,13 +499,13 @@ def _search_concepts(
472
499
  skip.add(priority_concept.address)
473
500
  break
474
501
  attempted.add(priority_concept.address)
475
- complete, found, missing, partial = validate_stack(
502
+ complete, found, missing, partial, virtual = validate_stack(
476
503
  stack, mandatory_list, accept_partial
477
504
  )
478
505
 
479
506
  logger.info(
480
507
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is "
481
- f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial}), attempted {attempted}"
508
+ f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}"
482
509
  )
483
510
  # early exit if we have a complete stack with one node
484
511
  # we can only early exit if we have a complete stack
@@ -489,7 +516,7 @@ def _search_concepts(
489
516
  break
490
517
 
491
518
  logger.info(
492
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}"
519
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}, virtual {virtual}"
493
520
  )
494
521
  if complete == ValidationResult.COMPLETE:
495
522
  all_partial = [
@@ -503,24 +530,22 @@ def _search_concepts(
503
530
  ]
504
531
  )
505
532
  ]
533
+ non_virtual = [c for c in mandatory_list if c.address not in virtual]
506
534
  if len(stack) == 1:
535
+ output = stack[0]
507
536
  logger.info(
508
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning just that node"
537
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
509
538
  )
510
- return stack[0]
539
+ return output
511
540
 
512
541
  output = MergeNode(
513
- input_concepts=mandatory_list,
514
- output_concepts=mandatory_list,
542
+ input_concepts=non_virtual,
543
+ output_concepts=non_virtual,
515
544
  environment=environment,
516
545
  g=g,
517
546
  parents=stack,
518
547
  depth=depth,
519
548
  partial_concepts=all_partial,
520
- # always hide merge concepts
521
- hidden_concepts=[
522
- x for x in mandatory_list if x.derivation == PurposeLineage.MERGE
523
- ],
524
549
  )
525
550
 
526
551
  # ensure we can resolve our final merge
@@ -573,7 +598,7 @@ def _search_concepts(
573
598
  )
574
599
  return partial_search
575
600
  logger.error(
576
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found}"
601
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found},"
577
602
  )
578
603
  return None
579
604
 
@@ -8,6 +8,7 @@ from trilogy.core.processing.nodes import StrategyNode, History, MergeNode
8
8
  from trilogy.core.processing.node_generators.common import (
9
9
  resolve_function_parent_concepts,
10
10
  )
11
+ from trilogy.utility import unique
11
12
  from trilogy.constants import logger
12
13
 
13
14
  LOGGER_PREFIX = "[GEN_BASIC_NODE]"
@@ -37,6 +38,7 @@ def gen_basic_node(
37
38
  attempts.append((parent_concepts + local_optional, local_optional + [concept]))
38
39
 
39
40
  for attempt, output in reversed(attempts):
41
+ attempt = unique(attempt, "address")
40
42
  parent_node = source_concepts(
41
43
  mandatory_list=attempt,
42
44
  environment=environment,
@@ -56,7 +56,9 @@ def resolve_filter_parent_concepts(
56
56
  base_existence = []
57
57
  base_rows = [direct_parent]
58
58
  base_rows += concept.lineage.where.row_arguments
59
- base_existence += concept.lineage.where.existence_arguments
59
+ # TODO: pass tuple groups through
60
+ for ctuple in concept.lineage.where.existence_arguments:
61
+ base_existence += list(ctuple)
60
62
  if direct_parent.grain:
61
63
  base_rows += direct_parent.grain.components_copy
62
64
  if (
@@ -56,6 +56,7 @@ def gen_concept_merge_node(
56
56
 
57
57
  # get additional concepts that should be merged across the environments
58
58
  additional_merge: List[Concept] = [*lineage.concepts]
59
+ target_namespaces = set(x.namespace for x in [concept] + local_optional)
59
60
  for x in local_optional:
60
61
  if x.address in environment.merged_concepts:
61
62
  ms = environment.merged_concepts[x.address].lineage
@@ -64,6 +65,8 @@ def gen_concept_merge_node(
64
65
 
65
66
  for select in lineage.concepts:
66
67
  # if it's a merge concept, filter it out of the optional
68
+ if select.namespace not in target_namespaces:
69
+ continue
67
70
  sub_optional = [
68
71
  x
69
72
  for x in local_optional
@@ -76,6 +79,9 @@ def gen_concept_merge_node(
76
79
  ]
77
80
  sub_optional += sub_additional_merge
78
81
  final: List[Concept] = unique([select] + sub_optional, "address")
82
+ logger.info(
83
+ f"{padding(depth)}{LOGGER_PREFIX} generating concept merge parent node with {[x.address for x in final]}"
84
+ )
79
85
  snode: StrategyNode = source_concepts(
80
86
  mandatory_list=final,
81
87
  environment=environment,
@@ -111,17 +117,18 @@ def gen_concept_merge_node(
111
117
 
112
118
  additional_relevant = [x for x in outputs if x.address in enrichment]
113
119
  final_outputs = outputs + additional_relevant + [concept]
120
+ virtual_outputs = [x for x in final_outputs if x.derivation == PurposeLineage.MERGE]
114
121
  node = MergeNode(
115
122
  input_concepts=[x for y in base_parents for x in y.output_concepts],
116
- output_concepts=[x for x in final_outputs],
117
- hidden_concepts=[
118
- x for x in final_outputs if x.derivation == PurposeLineage.MERGE
123
+ output_concepts=[
124
+ x for x in final_outputs if x.derivation != PurposeLineage.MERGE
119
125
  ],
120
126
  environment=environment,
121
127
  g=g,
122
128
  depth=depth,
123
129
  parents=base_parents,
124
130
  node_joins=node_joins,
131
+ virtual_output_concepts=virtual_outputs,
125
132
  )
126
133
 
127
134
  qds = node.rebuild_cache()
@@ -149,9 +156,17 @@ def gen_concept_merge_node(
149
156
  f"{padding(depth)}{LOGGER_PREFIX} all enriched concepts returned from base merge concept node; exiting early"
150
157
  )
151
158
  return node
159
+ missing = [
160
+ x
161
+ for x in local_optional
162
+ if x.address not in [y.address for y in node.output_concepts]
163
+ ]
164
+ logger.info(
165
+ f"{padding(depth)}{LOGGER_PREFIX} generating merge concept enrichment node for missing {[x.address for x in missing]}"
166
+ )
152
167
  enrich_node: MergeNode = source_concepts( # this fetches the parent + join keys
153
168
  # to then connect to the rest of the query
154
- mandatory_list=additional_relevant + local_optional,
169
+ mandatory_list=additional_relevant + missing,
155
170
  environment=environment,
156
171
  g=g,
157
172
  depth=depth + 1,
@@ -159,7 +174,7 @@ def gen_concept_merge_node(
159
174
  )
160
175
  if not enrich_node:
161
176
  logger.info(
162
- f"{padding(depth)}{LOGGER_PREFIX} Cannot generate merge concept enrichment node for {concept} with optional {local_optional}, returning just merge concept"
177
+ f"{padding(depth)}{LOGGER_PREFIX} Cannot generate merge concept enrichment node for {concept.address} with optional {[x.address for x in local_optional]}, returning just merge concept"
163
178
  )
164
179
  return node
165
180
 
@@ -170,12 +185,12 @@ def gen_concept_merge_node(
170
185
  return MergeNode(
171
186
  input_concepts=enrich_node.output_concepts + node.output_concepts,
172
187
  # also filter out the
173
- output_concepts=node.output_concepts + local_optional,
174
- hidden_concepts=[
188
+ output_concepts=[
175
189
  x
176
190
  for x in node.output_concepts + local_optional
177
- if x.derivation == PurposeLineage.MERGE
191
+ if x.derivation != PurposeLineage.MERGE
178
192
  ],
193
+ hidden_concepts=[],
179
194
  environment=environment,
180
195
  g=g,
181
196
  depth=depth,
@@ -195,4 +210,5 @@ def gen_concept_merge_node(
195
210
  )
196
211
  ],
197
212
  partial_concepts=node.partial_concepts,
213
+ virtual_output_concepts=virtual_outputs,
198
214
  )
@@ -2,11 +2,14 @@ from typing import List
2
2
 
3
3
 
4
4
  from trilogy.core.enums import JoinType
5
- from trilogy.core.models import (
6
- Concept,
7
- Environment,
5
+ from trilogy.core.models import Concept, Environment, FilterItem
6
+ from trilogy.core.processing.nodes import (
7
+ FilterNode,
8
+ MergeNode,
9
+ NodeJoin,
10
+ History,
11
+ StrategyNode,
8
12
  )
9
- from trilogy.core.processing.nodes import FilterNode, MergeNode, NodeJoin, History
10
13
  from trilogy.core.processing.node_generators.common import (
11
14
  resolve_filter_parent_concepts,
12
15
  )
@@ -25,16 +28,19 @@ def gen_filter_node(
25
28
  depth: int,
26
29
  source_concepts,
27
30
  history: History | None = None,
28
- ) -> MergeNode | FilterNode | None:
31
+ ) -> StrategyNode | None:
29
32
  immediate_parent, parent_row_concepts, parent_existence_concepts = (
30
33
  resolve_filter_parent_concepts(concept)
31
34
  )
35
+ if not isinstance(concept.lineage, FilterItem):
36
+ raise SyntaxError('Filter node must have a lineage of type "FilterItem"')
37
+ where = concept.lineage.where
32
38
 
33
39
  logger.info(
34
40
  f"{padding(depth)}{LOGGER_PREFIX} fetching filter node row parents {[x.address for x in parent_row_concepts]}"
35
41
  )
36
42
  core_parents = []
37
- parent = source_concepts(
43
+ parent: StrategyNode = source_concepts(
38
44
  mandatory_list=parent_row_concepts,
39
45
  environment=environment,
40
46
  g=g,
@@ -43,7 +49,28 @@ def gen_filter_node(
43
49
  )
44
50
 
45
51
  if not parent:
52
+ logger.info(
53
+ f"{padding(depth)}{LOGGER_PREFIX} filter node row parents {[x.address for x in parent_row_concepts]} could not be found"
54
+ )
46
55
  return None
56
+
57
+ if not local_optional and not parent_existence_concepts:
58
+ optimized_pushdown = True
59
+ else:
60
+ optimized_pushdown = False
61
+
62
+ if optimized_pushdown:
63
+ if parent.conditions:
64
+ parent.conditions = parent.conditions + where.conditional
65
+ else:
66
+ parent.conditions = where.conditional
67
+ parent.output_concepts = [concept]
68
+ parent.rebuild_cache()
69
+ logger.info(
70
+ f"{padding(depth)}{LOGGER_PREFIX} returning optimized filter node with pushdown to parent with condition {where.conditional}"
71
+ )
72
+ return parent
73
+
47
74
  core_parents.append(parent)
48
75
  if parent_existence_concepts:
49
76
  logger.info(
@@ -57,6 +84,9 @@ def gen_filter_node(
57
84
  history=history,
58
85
  )
59
86
  if not parent_existence:
87
+ logger.info(
88
+ f"{padding(depth)}{LOGGER_PREFIX} filter existence node parents could not be found"
89
+ )
60
90
  return None
61
91
  core_parents.append(parent_existence)
62
92
 
@@ -10,6 +10,7 @@ from trilogy.utility import unique
10
10
  from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
11
11
  from trilogy.core.processing.utility import padding
12
12
  from trilogy.core.processing.graph_utils import extract_mandatory_subgraphs
13
+ from trilogy.core.enums import PurposeLineage
13
14
 
14
15
  LOGGER_PREFIX = "[GEN_MERGE_NODE]"
15
16
 
@@ -65,13 +66,13 @@ def identify_ds_join_paths(
65
66
  ]
66
67
  if partial and not accept_partial:
67
68
  return None
68
- # join_candidates.append({"paths": paths, "datasource": datasource})
69
+
69
70
  return PathInfo(
70
71
  paths=paths,
71
72
  datasource=datasource,
72
73
  reduced_concepts=reduce_path_concepts(paths, g),
73
74
  concept_subgraphs=extract_mandatory_subgraphs(paths, g),
74
- ) # {"paths": paths, "datasource": datasource}
75
+ )
75
76
  return None
76
77
 
77
78
 
@@ -88,14 +89,7 @@ def gen_merge_node(
88
89
  join_candidates: List[PathInfo] = []
89
90
  # anchor on datasources
90
91
  final_all_concepts = []
91
- # implicit_upstream = {}
92
92
  for x in all_concepts:
93
- # if x.derivation in (PurposeLineage.AGGREGATE, PurposeLineage.BASIC):
94
- # final_all_concepts +=resolve_function_parent_concepts(x)
95
- # elif x.derivation == PurposeLineage.FILTER:
96
- # final_all_concepts +=resolve_filter_parent_concepts(x)
97
- # else:
98
- # final_all_concepts.append(x)
99
93
  final_all_concepts.append(x)
100
94
  for datasource in environment.datasources.values():
101
95
  path = identify_ds_join_paths(final_all_concepts, g, datasource, accept_partial)
@@ -104,18 +98,25 @@ def gen_merge_node(
104
98
  join_candidates.sort(key=lambda x: sum([len(v) for v in x.paths.values()]))
105
99
  if not join_candidates:
106
100
  return None
107
- for join_candidate in join_candidates:
108
- logger.info(
109
- f"{padding(depth)}{LOGGER_PREFIX} Join candidate: {join_candidate.paths}"
110
- )
111
- join_additions: List[set[str]] = []
101
+ join_additions: list[set[str]] = []
112
102
  for candidate in join_candidates:
113
103
  join_additions.append(candidate.reduced_concepts)
114
- if not all(
115
- [x.issubset(y) or y.issubset(x) for x in join_additions for y in join_additions]
116
- ):
104
+
105
+ common: set[str] = set()
106
+ final_candidates: list[set[str]] = []
107
+ # find all values that show up in every join_additions
108
+ for ja in join_additions:
109
+ if not common:
110
+ common = ja
111
+ else:
112
+ common = common.intersection(ja)
113
+ if all(ja.issubset(y) for y in join_additions):
114
+ final_candidates.append(ja)
115
+
116
+ if not final_candidates:
117
+ filtered_paths = [x.difference(common) for x in join_additions]
117
118
  raise AmbiguousRelationshipResolutionException(
118
- f"Ambiguous concept join resolution - possible paths = {join_additions}. Include an additional concept to disambiguate",
119
+ f"Ambiguous concept join resolution fetching {[x.address for x in all_concepts]} - unique values in possible paths = {filtered_paths}. Include an additional concept to disambiguate",
119
120
  join_additions,
120
121
  )
121
122
  if not join_candidates:
@@ -123,9 +124,10 @@ def gen_merge_node(
123
124
  f"{padding(depth)}{LOGGER_PREFIX} No additional join candidates could be found"
124
125
  )
125
126
  return None
126
- shortest: PathInfo = sorted(join_candidates, key=lambda x: len(x.reduced_concepts))[
127
- 0
128
- ]
127
+ shortest: PathInfo = sorted(
128
+ [x for x in join_candidates if x.reduced_concepts in final_candidates],
129
+ key=lambda x: len(x.reduced_concepts),
130
+ )[0]
129
131
  logger.info(f"{padding(depth)}{LOGGER_PREFIX} final path is {shortest.paths}")
130
132
  # logger.info(f'{padding(depth)}{LOGGER_PREFIX} final reduced concepts are {shortest.concs}')
131
133
  parents = []
@@ -145,11 +147,20 @@ def gen_merge_node(
145
147
  f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
146
148
  )
147
149
  return None
150
+ logger.info(
151
+ f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)}"
152
+ )
148
153
  parents.append(parent)
149
154
 
150
155
  return MergeNode(
151
- input_concepts=[environment.concepts[x] for x in shortest.reduced_concepts],
152
- output_concepts=all_concepts,
156
+ input_concepts=[
157
+ environment.concepts[x]
158
+ for x in shortest.reduced_concepts
159
+ if environment.concepts[x].derivation != PurposeLineage.MERGE
160
+ ],
161
+ output_concepts=[
162
+ x for x in all_concepts if x.derivation != PurposeLineage.MERGE
163
+ ],
153
164
  environment=environment,
154
165
  g=g,
155
166
  parents=parents,
@@ -35,8 +35,26 @@ def gen_rowset_node(
35
35
  lineage: RowsetItem = concept.lineage
36
36
  rowset: RowsetDerivationStatement = lineage.rowset
37
37
  select: SelectStatement | MultiSelectStatement = lineage.rowset.select
38
+ parents: List[StrategyNode] = []
38
39
  if where := select.where_clause:
39
- targets = select.output_components + where.conditional.concept_arguments
40
+ targets = select.output_components + where.conditional.row_arguments
41
+ for sub_select in where.conditional.existence_arguments:
42
+ logger.info(
43
+ f"{padding(depth)}{LOGGER_PREFIX} generating parent existence node with {[x.address for x in sub_select]}"
44
+ )
45
+ parent_check = source_concepts(
46
+ mandatory_list=sub_select,
47
+ environment=environment,
48
+ g=g,
49
+ depth=depth + 1,
50
+ history=history,
51
+ )
52
+ if not parent_check:
53
+ logger.info(
54
+ f"{padding(depth)}{LOGGER_PREFIX} Cannot generate parent existence node for rowset node for {concept}"
55
+ )
56
+ return None
57
+ parents.append(parent_check)
40
58
  else:
41
59
  targets = select.output_components
42
60
  node: StrategyNode = source_concepts(
@@ -46,6 +64,14 @@ def gen_rowset_node(
46
64
  depth=depth + 1,
47
65
  history=history,
48
66
  )
67
+
68
+ # add our existence concepts in
69
+ if parents:
70
+ node.parents += parents
71
+ for parent in parents:
72
+ for x in parent.output_concepts:
73
+ if x.address not in node.output_lcl:
74
+ node.existence_concepts.append(x)
49
75
  if not node:
50
76
  logger.info(
51
77
  f"{padding(depth)}{LOGGER_PREFIX} Cannot generate rowset node for {concept}"
@@ -53,11 +79,7 @@ def gen_rowset_node(
53
79
  return None
54
80
  node.conditions = select.where_clause.conditional if select.where_clause else None
55
81
  enrichment = set([x.address for x in local_optional])
56
- rowset_relevant = [
57
- x
58
- for x in rowset.derived_concepts
59
- # if x.address == concept.address or x.address in enrichment
60
- ]
82
+ rowset_relevant = [x for x in rowset.derived_concepts]
61
83
  select_hidden = set([x.address for x in select.hidden_components])
62
84
  rowset_hidden = [
63
85
  x
@@ -86,9 +108,11 @@ def gen_rowset_node(
86
108
  # but don't include anything aggregate at this point
87
109
  node.rebuild_cache()
88
110
  assert node.resolution_cache
111
+
89
112
  node.resolution_cache.grain = concept_list_to_grain(
90
113
  node.output_concepts, parent_sources=node.resolution_cache.datasources
91
114
  )
115
+
92
116
  possible_joins = concept_to_relevant_joins(additional_relevant)
93
117
  if not local_optional:
94
118
  logger.info(
@@ -53,7 +53,7 @@ def dm_to_strategy_node(
53
53
  # we have to group
54
54
  else:
55
55
  logger.info(
56
- f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(dm.matched)}, group to actual grain"
56
+ f"{padding(depth)}{LOGGER_PREFIX} not all grain components {target_grain} are in output {str(dm.matched)}, group to actual grain"
57
57
  )
58
58
  force_group = True
59
59
  elif all([x in dm.matched for x in datasource.grain.components]):
@@ -76,7 +76,7 @@ def dm_to_strategy_node(
76
76
  partial_concepts=dm.partial.concepts,
77
77
  accept_partial=accept_partial,
78
78
  datasource=datasource,
79
- grain=Grain(components=dm.matched.concepts),
79
+ grain=datasource.grain,
80
80
  )
81
81
  # we need to nest the group node one further
82
82
  if force_group is True:
@@ -317,13 +317,19 @@ def gen_select_node_from_table(
317
317
  )
318
318
  if target_grain and target_grain.issubset(datasource.grain):
319
319
 
320
- if all([x in all_lcl for x in target_grain.components]):
320
+ if (
321
+ all([x in all_lcl for x in target_grain.components])
322
+ and target_grain == datasource.grain
323
+ ):
324
+ logger.info(
325
+ f"{padding(depth)}{LOGGER_PREFIX} target grain components match all lcl, group to false"
326
+ )
321
327
  force_group = False
322
328
  # if we are not returning the grain
323
329
  # we have to group
324
330
  else:
325
331
  logger.info(
326
- f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(all_lcl)}, group to actual grain"
332
+ f"{padding(depth)}{LOGGER_PREFIX} not all grain components {target_grain} are in output {str(all_lcl)}, group to actual grain"
327
333
  )
328
334
  force_group = True
329
335
  elif all([x in all_lcl for x in datasource.grain.components]):
@@ -363,7 +369,7 @@ def gen_select_node_from_table(
363
369
  else:
364
370
  candidate = bcandidate
365
371
  logger.info(
366
- f"{padding(depth)}{LOGGER_PREFIX} found select node with {datasource.identifier}, returning {candidate.output_lcl}"
372
+ f"{padding(depth)}{LOGGER_PREFIX} found select node with {datasource.identifier}, force group is {force_group}, returning {candidate.output_lcl}"
367
373
  )
368
374
  candidates[datasource.identifier] = candidate
369
375
  scores[datasource.identifier] = -len(partial_concepts)
@@ -467,6 +473,8 @@ def gen_select_node(
467
473
  target_grain = Grain()
468
474
  for ac in all_concepts:
469
475
  target_grain += ac.grain
476
+ if target_grain.abstract:
477
+ target_grain = Grain(components=all_concepts)
470
478
  if materialized_lcl != all_lcl:
471
479
  logger.info(
472
480
  f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {concept.address} "
@@ -513,13 +521,15 @@ def gen_select_node(
513
521
  [c.address in [x.address for x in p.partial_concepts] for p in parents]
514
522
  )
515
523
  ]
516
- force_group = False
524
+ force_group = None
525
+ inferred_grain = sum([x.grain for x in parents if x.grain], Grain())
517
526
  for candidate in parents:
518
527
  if candidate.grain and not candidate.grain.issubset(target_grain):
519
528
  force_group = True
520
529
  if len(parents) == 1:
521
530
  candidate = parents[0]
522
531
  else:
532
+
523
533
  candidate = MergeNode(
524
534
  output_concepts=[concept] + found,
525
535
  input_concepts=[concept] + found,
@@ -528,13 +538,13 @@ def gen_select_node(
528
538
  parents=parents,
529
539
  depth=depth,
530
540
  partial_concepts=all_partial,
531
- grain=sum([x.grain for x in parents if x.grain], Grain()),
541
+ grain=inferred_grain,
532
542
  )
533
543
  candidate.depth += 1
534
- source_grain = candidate.grain
544
+ # source_grain = candidate.grain
535
545
  if force_group:
536
546
  logger.info(
537
- f"{padding(depth)}{LOGGER_PREFIX} datasource grain {source_grain} does not match target grain {target_grain} for select, adding group node"
547
+ f"{padding(depth)}{LOGGER_PREFIX} datasource grain {inferred_grain} does not match target grain {target_grain} for select, adding group node"
538
548
  )
539
549
  return GroupNode(
540
550
  output_concepts=candidate.output_concepts,
@@ -545,6 +555,10 @@ def gen_select_node(
545
555
  depth=depth,
546
556
  partial_concepts=candidate.partial_concepts,
547
557
  )
558
+ else:
559
+ logger.info(
560
+ f"{padding(depth)}{LOGGER_PREFIX} datasource grain {inferred_grain} matches target grain {target_grain} for select, returning without group"
561
+ )
548
562
  return candidate
549
563
 
550
564
  if not accept_partial_optional: