pytrilogy 0.0.2.50__py3-none-any.whl → 0.0.2.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (27) hide show
  1. {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.51.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.51.dist-info}/RECORD +27 -25
  3. trilogy/__init__.py +1 -1
  4. trilogy/core/internal.py +5 -1
  5. trilogy/core/models.py +124 -263
  6. trilogy/core/processing/concept_strategies_v3.py +14 -4
  7. trilogy/core/processing/node_generators/basic_node.py +7 -3
  8. trilogy/core/processing/node_generators/common.py +8 -3
  9. trilogy/core/processing/node_generators/filter_node.py +5 -5
  10. trilogy/core/processing/node_generators/group_node.py +24 -8
  11. trilogy/core/processing/node_generators/multiselect_node.py +4 -3
  12. trilogy/core/processing/node_generators/node_merge_node.py +14 -2
  13. trilogy/core/processing/node_generators/rowset_node.py +3 -4
  14. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  15. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +203 -0
  16. trilogy/core/processing/node_generators/select_merge_node.py +17 -9
  17. trilogy/core/processing/nodes/base_node.py +2 -33
  18. trilogy/core/processing/nodes/group_node.py +19 -10
  19. trilogy/core/processing/nodes/merge_node.py +2 -2
  20. trilogy/hooks/graph_hook.py +3 -1
  21. trilogy/parsing/common.py +54 -12
  22. trilogy/parsing/parse_engine.py +39 -20
  23. trilogy/parsing/render.py +8 -1
  24. {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.51.dist-info}/LICENSE.md +0 -0
  25. {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.51.dist-info}/WHEEL +0 -0
  26. {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.51.dist-info}/entry_points.txt +0 -0
  27. {pytrilogy-0.0.2.50.dist-info → pytrilogy-0.0.2.51.dist-info}/top_level.txt +0 -0
@@ -449,6 +449,7 @@ def generate_node(
449
449
  conditions=conditions,
450
450
  )
451
451
  if not check:
452
+
452
453
  logger.info(
453
454
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve root concepts, checking for expanded concepts"
454
455
  )
@@ -470,7 +471,6 @@ def generate_node(
470
471
  x
471
472
  for x in ex_resolve.output_concepts
472
473
  if x.address not in [y.address for y in root_targets]
473
- and x not in ex_resolve.grain.components
474
474
  ]
475
475
 
476
476
  pseudonyms = [
@@ -478,10 +478,19 @@ def generate_node(
478
478
  for x in extra
479
479
  if any(x.address in y.pseudonyms for y in root_targets)
480
480
  ]
481
- # if we're only connected by a pseudonym, keep those in output
482
- expanded.set_output_concepts(root_targets + pseudonyms)
481
+ logger.info(
482
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} reducing final outputs, was {[c.address for c in ex_resolve.output_concepts]} with extra {[c.address for c in extra]}"
483
+ )
484
+ base = [
485
+ x for x in ex_resolve.output_concepts if x.address not in extra
486
+ ]
487
+ for x in root_targets:
488
+ if x.address not in base:
489
+ base.append(x)
490
+ expanded.set_output_concepts(base)
483
491
  # but hide them
484
492
  if pseudonyms:
493
+ expanded.add_output_concepts(pseudonyms)
485
494
  logger.info(
486
495
  f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Hiding pseudonyms{[c.address for c in pseudonyms]}"
487
496
  )
@@ -908,6 +917,7 @@ def _search_concepts(
908
917
  parents=stack,
909
918
  depth=depth,
910
919
  )
920
+
911
921
  # ensure we can resolve our final merge
912
922
  output.resolve()
913
923
  if condition_required and conditions:
@@ -917,7 +927,7 @@ def _search_concepts(
917
927
  output, environment, g, where=conditions, history=history
918
928
  )
919
929
  logger.info(
920
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning merge node, partial {[c.address for c in output.partial_concepts]}"
930
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node partial {[c.address for c in output.partial_concepts]}"
921
931
  )
922
932
  return output
923
933
 
@@ -44,7 +44,7 @@ def gen_basic_node(
44
44
  conditions: WhereClause | None = None,
45
45
  ):
46
46
  depth_prefix = "\t" * depth
47
- parent_concepts = resolve_function_parent_concepts(concept)
47
+ parent_concepts = resolve_function_parent_concepts(concept, environment=environment)
48
48
 
49
49
  logger.info(
50
50
  f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} has parents {[x.address for x in parent_concepts]}"
@@ -61,12 +61,16 @@ def gen_basic_node(
61
61
  f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} has equivalent optional {[x.address for x in equivalent_optional]}"
62
62
  )
63
63
  for eo in equivalent_optional:
64
- parent_concepts += resolve_function_parent_concepts(eo)
64
+ parent_concepts += resolve_function_parent_concepts(eo, environment=environment)
65
65
  non_equivalent_optional = [
66
66
  x for x in local_optional if x not in equivalent_optional
67
67
  ]
68
+ all_parents = parent_concepts + non_equivalent_optional
69
+ logger.info(
70
+ f"{depth_prefix}{LOGGER_PREFIX} Fetching parents {[x.address for x in all_parents]}"
71
+ )
68
72
  parent_node: StrategyNode = source_concepts(
69
- mandatory_list=parent_concepts + non_equivalent_optional,
73
+ mandatory_list=all_parents,
70
74
  environment=environment,
71
75
  g=g,
72
76
  depth=depth + 1,
@@ -20,12 +20,16 @@ from trilogy.core.processing.nodes.merge_node import MergeNode
20
20
  from trilogy.utility import unique
21
21
 
22
22
 
23
- def resolve_function_parent_concepts(concept: Concept) -> List[Concept]:
23
+ def resolve_function_parent_concepts(
24
+ concept: Concept, environment: Environment
25
+ ) -> List[Concept]:
24
26
  if not isinstance(concept.lineage, (Function, AggregateWrapper)):
25
27
  raise ValueError(f"Concept {concept} lineage is not function or aggregate")
26
28
  if concept.derivation == PurposeLineage.AGGREGATE:
27
29
  if not concept.grain.abstract:
28
- base = concept.lineage.concept_arguments + concept.grain.components_copy
30
+ base = concept.lineage.concept_arguments + [
31
+ environment.concepts[c] for c in concept.grain.components
32
+ ]
29
33
  # if the base concept being aggregated is a property with a key
30
34
  # keep the key as a parent
31
35
  else:
@@ -56,6 +60,7 @@ def resolve_condition_parent_concepts(
56
60
 
57
61
  def resolve_filter_parent_concepts(
58
62
  concept: Concept,
63
+ environment: Environment,
59
64
  ) -> Tuple[Concept, List[Concept], List[Tuple[Concept, ...]]]:
60
65
  if not isinstance(concept.lineage, FilterItem):
61
66
  raise ValueError(
@@ -70,7 +75,7 @@ def resolve_filter_parent_concepts(
70
75
  base_rows += condition_rows
71
76
  base_existence += condition_existence
72
77
  if direct_parent.grain:
73
- base_rows += direct_parent.grain.components_copy
78
+ base_rows += [environment.concepts[c] for c in direct_parent.grain.components]
74
79
  if (
75
80
  isinstance(direct_parent, Concept)
76
81
  and direct_parent.purpose == Purpose.PROPERTY
@@ -28,7 +28,7 @@ def gen_filter_node(
28
28
  conditions: WhereClause | None = None,
29
29
  ) -> StrategyNode | None:
30
30
  immediate_parent, parent_row_concepts, parent_existence_concepts = (
31
- resolve_filter_parent_concepts(concept)
31
+ resolve_filter_parent_concepts(concept, environment)
32
32
  )
33
33
  if not isinstance(concept.lineage, FilterItem):
34
34
  raise SyntaxError('Filter node must have a lineage of type "FilterItem"')
@@ -136,8 +136,8 @@ def gen_filter_node(
136
136
  parent.add_existence_concepts(flattened_existence, False).set_output_concepts(
137
137
  expected_output, False
138
138
  )
139
- parent.grain = Grain(
140
- components=(
139
+ parent.grain = Grain.from_concepts(
140
+ (
141
141
  list(immediate_parent.keys)
142
142
  if immediate_parent.keys
143
143
  else [immediate_parent]
@@ -161,8 +161,8 @@ def gen_filter_node(
161
161
  output_concepts=[concept, immediate_parent] + parent_row_concepts,
162
162
  environment=environment,
163
163
  parents=core_parents,
164
- grain=Grain(
165
- components=[immediate_parent] + parent_row_concepts,
164
+ grain=Grain.from_concepts(
165
+ [immediate_parent] + parent_row_concepts,
166
166
  ),
167
167
  preexisting_conditions=conditions.conditional if conditions else None,
168
168
  )
@@ -34,7 +34,7 @@ def gen_group_node(
34
34
  # aggregates MUST always group to the proper grain
35
35
  # except when the
36
36
  parent_concepts: List[Concept] = unique(
37
- resolve_function_parent_concepts(concept), "address"
37
+ resolve_function_parent_concepts(concept, environment=environment), "address"
38
38
  )
39
39
  logger.info(
40
40
  f"{padding(depth)}{LOGGER_PREFIX} parent concepts are {[x.address for x in parent_concepts]} from group grain {concept.grain}"
@@ -43,18 +43,28 @@ def gen_group_node(
43
43
  # if the aggregation has a grain, we need to ensure these are the ONLY optional in the output of the select
44
44
  output_concepts = [concept]
45
45
 
46
- if concept.grain and len(concept.grain.components_copy) > 0:
47
- grain_components = (
48
- concept.grain.components_copy if not concept.grain.abstract else []
49
- )
46
+ if (
47
+ concept.grain
48
+ and len(concept.grain.components) > 0
49
+ and not concept.grain.abstract
50
+ ):
51
+ grain_components = [environment.concepts[c] for c in concept.grain.components]
50
52
  parent_concepts += grain_components
51
53
  output_concepts += grain_components
52
54
  for possible_agg in local_optional:
55
+
53
56
  if not isinstance(possible_agg.lineage, (AggregateWrapper, Function)):
54
57
  continue
58
+ logger.info(possible_agg)
59
+ if possible_agg.grain and possible_agg.grain != concept.grain:
60
+ logger.info(
61
+ f"{padding(depth)}{LOGGER_PREFIX} mismatched equivalent group by with grain {possible_agg.grain} for {concept.address}"
62
+ )
63
+
55
64
  if possible_agg.grain and possible_agg.grain == concept.grain:
56
65
  agg_parents: List[Concept] = resolve_function_parent_concepts(
57
- possible_agg
66
+ possible_agg,
67
+ environment=environment,
58
68
  )
59
69
  if set([x.address for x in agg_parents]).issubset(
60
70
  set([x.address for x in parent_concepts])
@@ -63,13 +73,19 @@ def gen_group_node(
63
73
  logger.info(
64
74
  f"{padding(depth)}{LOGGER_PREFIX} found equivalent group by optional concept {possible_agg.address} for {concept.address}"
65
75
  )
66
- elif Grain(components=agg_parents) == Grain(components=parent_concepts):
76
+ elif Grain.from_concepts(agg_parents) == Grain.from_concepts(
77
+ parent_concepts
78
+ ):
67
79
  extra = [x for x in agg_parents if x.address not in parent_concepts]
68
80
  parent_concepts += extra
69
81
  output_concepts.append(possible_agg)
70
82
  logger.info(
71
83
  f"{padding(depth)}{LOGGER_PREFIX} found equivalent group by optional concept {possible_agg.address} for {concept.address}"
72
84
  )
85
+ else:
86
+ logger.info(
87
+ f"{padding(depth)}{LOGGER_PREFIX} mismatched grain {Grain.from_concepts(agg_parents)} vs {Grain.from_concepts(parent_concepts)}"
88
+ )
73
89
  if parent_concepts:
74
90
  logger.info(
75
91
  f"{padding(depth)}{LOGGER_PREFIX} fetching group node parents {LooseConceptList(concepts=parent_concepts)}"
@@ -94,7 +110,7 @@ def gen_group_node(
94
110
 
95
111
  # the keys we group by
96
112
  # are what we can use for enrichment
97
- group_key_parents = concept.grain.components_copy
113
+ group_key_parents = [environment.concepts[c] for c in concept.grain.components]
98
114
 
99
115
  group_node = GroupNode(
100
116
  output_concepts=output_concepts,
@@ -8,12 +8,13 @@ from trilogy.core.models import (
8
8
  Concept,
9
9
  Conditional,
10
10
  Environment,
11
+ Grain,
11
12
  MultiSelectStatement,
12
13
  WhereClause,
13
14
  )
14
15
  from trilogy.core.processing.node_generators.common import resolve_join_order
15
16
  from trilogy.core.processing.nodes import History, MergeNode, NodeJoin
16
- from trilogy.core.processing.nodes.base_node import StrategyNode, concept_list_to_grain
17
+ from trilogy.core.processing.nodes.base_node import StrategyNode
17
18
  from trilogy.core.processing.utility import concept_to_relevant_joins, padding
18
19
 
19
20
  LOGGER_PREFIX = "[GEN_MULTISELECT_NODE]"
@@ -137,8 +138,8 @@ def gen_multiselect_node(
137
138
 
138
139
  # assume grain to be output of select
139
140
  # but don't include anything aggregate at this point
140
- node.resolution_cache.grain = concept_list_to_grain(
141
- node.output_concepts, parent_sources=node.resolution_cache.datasources
141
+ node.resolution_cache.grain = Grain.from_concepts(
142
+ node.output_concepts,
142
143
  )
143
144
  possible_joins = concept_to_relevant_joins(additional_relevant)
144
145
  if not local_optional:
@@ -327,11 +327,17 @@ def subgraphs_to_merge_node(
327
327
  for y in x.output_concepts:
328
328
  input_c.append(y)
329
329
  if len(parents) == 1 and enable_early_exit:
330
+ logger.info(
331
+ f"{padding(depth)}{LOGGER_PREFIX} only one parent node, exiting early w/ {[c.address for c in parents[0].output_concepts]}"
332
+ )
330
333
  return parents[0]
331
-
334
+ base_output = [x for x in all_concepts]
335
+ # for x in base_output:
336
+ # if x not in input_c:
337
+ # input_c.append(x)
332
338
  return MergeNode(
333
339
  input_concepts=unique(input_c, "address"),
334
- output_concepts=[x for x in all_concepts],
340
+ output_concepts=base_output,
335
341
  environment=environment,
336
342
  parents=parents,
337
343
  depth=depth,
@@ -368,6 +374,12 @@ def gen_merge_node(
368
374
  logger.info(
369
375
  f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
370
376
  )
377
+ for flat in log_graph:
378
+ if set(flat) == set([x.address for x in all_concepts]):
379
+ logger.info(
380
+ f"{padding(depth)}{LOGGER_PREFIX} expanded concept resolution was identical to search resolution; breaking to avoid recursion error."
381
+ )
382
+ return None
371
383
  return subgraphs_to_merge_node(
372
384
  weak_resolve,
373
385
  depth=depth,
@@ -5,6 +5,7 @@ from trilogy.core.enums import PurposeLineage
5
5
  from trilogy.core.models import (
6
6
  Concept,
7
7
  Environment,
8
+ Grain,
8
9
  MultiSelectStatement,
9
10
  RowsetDerivationStatement,
10
11
  RowsetItem,
@@ -12,7 +13,6 @@ from trilogy.core.models import (
12
13
  WhereClause,
13
14
  )
14
15
  from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
15
- from trilogy.core.processing.nodes.base_node import concept_list_to_grain
16
16
  from trilogy.core.processing.utility import concept_to_relevant_joins, padding
17
17
 
18
18
  LOGGER_PREFIX = "[GEN_ROWSET_NODE]"
@@ -74,7 +74,7 @@ def gen_rowset_node(
74
74
  assert node.resolution_cache
75
75
  # assume grain to be output of select
76
76
  # but don't include anything hidden(the non-rowset concepts)
77
- node.grain = concept_list_to_grain(
77
+ node.grain = Grain.from_concepts(
78
78
  [
79
79
  x
80
80
  for x in node.output_concepts
@@ -83,7 +83,6 @@ def gen_rowset_node(
83
83
  y for y in node.hidden_concepts if y.derivation != PurposeLineage.ROWSET
84
84
  ]
85
85
  ],
86
- parent_sources=node.resolution_cache.datasources,
87
86
  )
88
87
 
89
88
  node.rebuild_cache()
@@ -92,7 +91,7 @@ def gen_rowset_node(
92
91
  x.address in node.output_concepts for x in local_optional
93
92
  ):
94
93
  logger.info(
95
- f"{padding(depth)}{LOGGER_PREFIX} no enrichment required for rowset node as all optional found or no optional; exiting early."
94
+ f"{padding(depth)}{LOGGER_PREFIX} no enrichment required for rowset node as all optional {[x.address for x in local_optional]} found or no optional; exiting early."
96
95
  )
97
96
  return node
98
97
  possible_joins = concept_to_relevant_joins(
@@ -0,0 +1,203 @@
1
+ from collections import defaultdict
2
+ from datetime import date, datetime, timedelta
3
+ from typing import List, Tuple, TypeVar
4
+
5
+ from trilogy.core.enums import ComparisonOperator
6
+ from trilogy.core.models import (
7
+ Comparison,
8
+ Concept,
9
+ Conditional,
10
+ Datasource,
11
+ DataType,
12
+ Function,
13
+ FunctionType,
14
+ Parenthetical,
15
+ )
16
+
17
+ # Define a generic type that ensures start and end are the same type
18
+ T = TypeVar("T", int, date, datetime)
19
+
20
+
21
+ def reduce_expression(
22
+ var: Concept, group_tuple: list[tuple[ComparisonOperator, T]]
23
+ ) -> bool:
24
+ # Track ranges
25
+ lower_check: T
26
+ upper_check: T
27
+
28
+ # if var.datatype in (DataType.FLOAT,):
29
+ # lower_check = float("-inf") # type: ignore
30
+ # upper_check = float("inf") # type: ignore
31
+ if var.datatype == DataType.INTEGER:
32
+ lower_check = float("-inf") # type: ignore
33
+ upper_check = float("inf") # type: ignore
34
+ elif var.datatype == DataType.DATE:
35
+ lower_check = date.min # type: ignore
36
+ upper_check = date.max # type: ignore
37
+
38
+ elif var.datatype == DataType.DATETIME:
39
+ lower_check = datetime.min # type: ignore
40
+ upper_check = datetime.max # type: ignore
41
+ else:
42
+ raise ValueError(f"Invalid datatype: {var.datatype}")
43
+
44
+ ranges: list[Tuple[T, T]] = []
45
+ for op, value in group_tuple:
46
+ increment: int | timedelta
47
+ if isinstance(value, date):
48
+ increment = timedelta(days=1)
49
+ elif isinstance(value, datetime):
50
+ increment = timedelta(seconds=1)
51
+ elif isinstance(value, int):
52
+ increment = 1
53
+ # elif isinstance(value, float):
54
+ # value = Decimal(value)
55
+ # increment = Decimal(0.0000000001)
56
+
57
+ if op == ">":
58
+ ranges.append(
59
+ (
60
+ value + increment,
61
+ upper_check,
62
+ )
63
+ )
64
+ elif op == ">=":
65
+ ranges.append(
66
+ (
67
+ value,
68
+ upper_check,
69
+ )
70
+ )
71
+ elif op == "<":
72
+ ranges.append(
73
+ (
74
+ lower_check,
75
+ value - increment,
76
+ )
77
+ )
78
+ elif op == "<=":
79
+ ranges.append(
80
+ (
81
+ lower_check,
82
+ value,
83
+ )
84
+ )
85
+ elif op == "=":
86
+ ranges.append(
87
+ (
88
+ value,
89
+ value,
90
+ )
91
+ )
92
+ else:
93
+ raise ValueError(f"Invalid operator: {op}")
94
+ return is_fully_covered(lower_check, upper_check, ranges, increment)
95
+
96
+
97
+ def simplify_conditions(
98
+ conditions: list[Comparison | Conditional | Parenthetical],
99
+ ) -> bool:
100
+ # Group conditions by variable
101
+ grouped: dict[Concept, list[tuple[ComparisonOperator, datetime | int | date]]] = (
102
+ defaultdict(list)
103
+ )
104
+ for condition in conditions:
105
+ if not isinstance(condition, Comparison):
106
+ return False
107
+ if not isinstance(
108
+ condition.left, (int, date, datetime, Function)
109
+ ) and not isinstance(condition.right, (int, date, datetime, Function)):
110
+ return False
111
+ if not isinstance(condition.left, Concept) and not isinstance(
112
+ condition.right, Concept
113
+ ):
114
+ return False
115
+ vars = [condition.left, condition.right]
116
+ concept = [x for x in vars if isinstance(x, Concept)][0]
117
+ comparison = [x for x in vars if not isinstance(x, Concept)][0]
118
+ if isinstance(comparison, Function):
119
+ if not comparison.operator == FunctionType.CONSTANT:
120
+ return False
121
+ first_arg = comparison.arguments[0]
122
+ if not isinstance(first_arg, (int, date, datetime)):
123
+ return False
124
+ comparison = first_arg
125
+ if not isinstance(comparison, (int, date, datetime)):
126
+ return False
127
+
128
+ var = concept
129
+ op = condition.operator
130
+ grouped[var].append((op, comparison))
131
+
132
+ simplified = []
133
+ for var, group_tuple in grouped.items():
134
+ simplified.append(reduce_expression(var, group_tuple)) # type: ignore
135
+
136
+ # Final simplification
137
+ return True if all(isinstance(s, bool) and s for s in simplified) else False
138
+
139
+
140
+ def is_fully_covered(
141
+ start: T,
142
+ end: T,
143
+ ranges: List[Tuple[T, T]],
144
+ increment: int | timedelta,
145
+ ):
146
+ """
147
+ Check if the list of range pairs fully covers the set [start, end].
148
+
149
+ Parameters:
150
+ - start (int or float): The starting value of the set to cover.
151
+ - end (int or float): The ending value of the set to cover.
152
+ - ranges (list of tuples): List of range pairs [(start1, end1), (start2, end2), ...].
153
+
154
+ Returns:
155
+ - bool: True if the ranges fully cover [start, end], False otherwise.
156
+ """
157
+ # Sort ranges by their start values (and by end values for ties)
158
+ ranges.sort()
159
+
160
+ # Check for gaps
161
+ current_end = start
162
+ print(ranges)
163
+ for r_start, r_end in ranges:
164
+ print(r_start, r_end)
165
+ # If there's a gap between the current range and the previous coverage
166
+ print(r_start - current_end)
167
+ if (r_start - current_end) > increment: # type: ignore
168
+ print("gap")
169
+ return False
170
+ print("okay")
171
+ # Extend the current coverage
172
+ current_end = max(current_end, r_end)
173
+
174
+ # If the loop ends and we haven't reached the end, return False
175
+ print(current_end, end)
176
+ print(current_end >= end)
177
+ return current_end >= end
178
+
179
+
180
+ def get_union_sources(datasources: list[Datasource], concepts: list[Concept]):
181
+ candidates: list[Datasource] = []
182
+ for x in datasources:
183
+ if all([c.address in x.output_concepts for c in concepts]):
184
+ if (
185
+ any([c.address in x.partial_concepts for c in concepts])
186
+ and x.non_partial_for
187
+ ):
188
+ candidates.append(x)
189
+
190
+ assocs: dict[str, list[Datasource]] = defaultdict(list[Datasource])
191
+ for x in candidates:
192
+ if not x.non_partial_for:
193
+ continue
194
+ if not len(x.non_partial_for.concept_arguments) == 1:
195
+ continue
196
+ merge_key = x.non_partial_for.concept_arguments[0]
197
+ assocs[merge_key.address].append(x)
198
+ final: list[list[Datasource]] = []
199
+ for _, dses in assocs.items():
200
+ conditions = [c.non_partial_for.conditional for c in dses if c.non_partial_for]
201
+ if simplify_conditions(conditions):
202
+ final.append(dses)
203
+ return final
@@ -23,9 +23,6 @@ from trilogy.core.processing.nodes import (
23
23
  SelectNode,
24
24
  StrategyNode,
25
25
  )
26
- from trilogy.core.processing.nodes.base_node import (
27
- concept_list_to_grain,
28
- )
29
26
  from trilogy.core.processing.utility import padding
30
27
 
31
28
  LOGGER_PREFIX = "[GEN_ROOT_MERGE_NODE]"
@@ -79,6 +76,7 @@ def create_pruned_concept_graph(
79
76
  datasources: list[Datasource],
80
77
  accept_partial: bool = False,
81
78
  conditions: WhereClause | None = None,
79
+ depth: int = 0,
82
80
  ) -> nx.DiGraph:
83
81
  orig_g = g
84
82
  g = g.copy()
@@ -104,6 +102,8 @@ def create_pruned_concept_graph(
104
102
  # filter out synonyms
105
103
  if (x := concepts.get(n, None)) and x.address in target_addresses
106
104
  }
105
+ # from trilogy.hooks.graph_hook import GraphHook
106
+ # GraphHook().query_graph_built(g)
107
107
  relevant_concepts: list[str] = list(relevant_concepts_pre.keys())
108
108
  relevent_datasets: list[str] = []
109
109
  if not accept_partial:
@@ -159,8 +159,14 @@ def create_pruned_concept_graph(
159
159
 
160
160
  subgraphs = list(nx.connected_components(g.to_undirected()))
161
161
  if not subgraphs:
162
+ logger.info(
163
+ f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - no subgraphs after node prune"
164
+ )
162
165
  return None
163
166
  if subgraphs and len(subgraphs) != 1:
167
+ logger.info(
168
+ f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - subgraphs are split - have {len(subgraphs)} from {subgraphs}"
169
+ )
164
170
  return None
165
171
  # add back any relevant edges that might have been partially filtered
166
172
  relevant = set(relevant_concepts + relevent_datasets)
@@ -169,6 +175,9 @@ def create_pruned_concept_graph(
169
175
  g.add_edge(edge[0], edge[1])
170
176
  # if we have no ds nodes at all, for non constant, we can't find it
171
177
  if not any([n.startswith("ds~") for n in g.nodes]):
178
+ logger.info(
179
+ f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - No datasource nodes found"
180
+ )
172
181
  return None
173
182
  return g
174
183
 
@@ -231,7 +240,7 @@ def create_datasource_node(
231
240
  depth: int,
232
241
  conditions: WhereClause | None = None,
233
242
  ) -> tuple[StrategyNode, bool]:
234
- target_grain = Grain(components=all_concepts)
243
+ target_grain = Grain.from_concepts(all_concepts)
235
244
  force_group = False
236
245
  if not datasource.grain.issubset(target_grain):
237
246
  force_group = True
@@ -261,7 +270,7 @@ def create_datasource_node(
261
270
  nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
262
271
  accept_partial=accept_partial,
263
272
  datasource=datasource,
264
- grain=Grain(components=all_concepts),
273
+ grain=Grain.from_concepts(all_concepts),
265
274
  conditions=datasource.where.conditional if datasource.where else None,
266
275
  preexisting_conditions=(
267
276
  conditions.conditional if partial_is_full and conditions else None
@@ -383,6 +392,7 @@ def gen_select_merge_node(
383
392
  accept_partial=attempt,
384
393
  conditions=conditions,
385
394
  datasources=list(environment.datasources.values()),
395
+ depth=depth,
386
396
  )
387
397
  if pruned_concept_graph:
388
398
  logger.info(
@@ -391,9 +401,7 @@ def gen_select_merge_node(
391
401
  break
392
402
 
393
403
  if not pruned_concept_graph:
394
- logger.info(
395
- f"{padding(depth)}{LOGGER_PREFIX} no covering graph found {attempt}"
396
- )
404
+ logger.info(f"{padding(depth)}{LOGGER_PREFIX} no covering graph found.")
397
405
  return None
398
406
 
399
407
  sub_nodes = resolve_subgraphs(pruned_concept_graph, conditions)
@@ -446,7 +454,7 @@ def gen_select_merge_node(
446
454
  parents=parents,
447
455
  preexisting_conditions=preexisting_conditions,
448
456
  )
449
- target_grain = concept_list_to_grain(all_concepts, [])
457
+ target_grain = Grain.from_concepts(all_concepts)
450
458
  if not base.resolve().grain.issubset(target_grain):
451
459
  return GroupNode(
452
460
  output_concepts=all_concepts,
@@ -1,12 +1,10 @@
1
1
  from collections import defaultdict
2
2
  from dataclasses import dataclass
3
- from typing import List, Optional, Sequence
3
+ from typing import List, Optional
4
4
 
5
5
  from trilogy.core.enums import (
6
6
  BooleanOperator,
7
- Granularity,
8
7
  JoinType,
9
- Purpose,
10
8
  PurposeLineage,
11
9
  )
12
10
  from trilogy.core.models import (
@@ -26,31 +24,6 @@ from trilogy.core.models import (
26
24
  from trilogy.utility import unique
27
25
 
28
26
 
29
- def concept_list_to_grain(
30
- inputs: List[Concept], parent_sources: Sequence[QueryDatasource | Datasource]
31
- ) -> Grain:
32
- candidates = [
33
- c
34
- for c in inputs
35
- if c.purpose == Purpose.KEY and c.granularity != Granularity.SINGLE_ROW
36
- ]
37
- for x in inputs:
38
- if x.granularity == Granularity.SINGLE_ROW:
39
- continue
40
- if x.purpose == Purpose.PROPERTY and not any(
41
- [key in candidates for key in (x.keys or [])]
42
- ):
43
- candidates.append(x)
44
- elif x.purpose == Purpose.CONSTANT:
45
- candidates.append(x)
46
- elif x.purpose == Purpose.METRIC:
47
- # metrics that were previously calculated must be included in grain
48
- if any([x in parent.output_concepts for parent in parent_sources]):
49
- candidates.append(x)
50
-
51
- return Grain(components=candidates)
52
-
53
-
54
27
  def resolve_concept_map(
55
28
  inputs: List[QueryDatasource | Datasource],
56
29
  targets: List[Concept],
@@ -351,11 +324,7 @@ class StrategyNode:
351
324
  p.resolve() for p in self.parents
352
325
  ]
353
326
 
354
- grain = (
355
- self.grain
356
- if self.grain
357
- else concept_list_to_grain(self.output_concepts, [])
358
- )
327
+ grain = self.grain if self.grain else Grain.from_concepts(self.output_concepts)
359
328
  source_map = resolve_concept_map(
360
329
  parent_sources,
361
330
  targets=self.output_concepts,