pytrilogy 0.0.1.118__py3-none-any.whl → 0.0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (45) hide show
  1. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.1.dist-info/RECORD +82 -0
  3. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +6 -0
  6. trilogy/core/enums.py +7 -2
  7. trilogy/core/env_processor.py +43 -19
  8. trilogy/core/functions.py +1 -0
  9. trilogy/core/models.py +666 -146
  10. trilogy/core/optimization.py +31 -28
  11. trilogy/core/optimizations/inline_constant.py +4 -1
  12. trilogy/core/optimizations/inline_datasource.py +25 -4
  13. trilogy/core/optimizations/predicate_pushdown.py +94 -54
  14. trilogy/core/processing/concept_strategies_v3.py +69 -39
  15. trilogy/core/processing/graph_utils.py +3 -3
  16. trilogy/core/processing/node_generators/__init__.py +0 -2
  17. trilogy/core/processing/node_generators/basic_node.py +30 -17
  18. trilogy/core/processing/node_generators/filter_node.py +3 -1
  19. trilogy/core/processing/node_generators/node_merge_node.py +345 -96
  20. trilogy/core/processing/node_generators/rowset_node.py +18 -16
  21. trilogy/core/processing/node_generators/select_node.py +44 -83
  22. trilogy/core/processing/nodes/__init__.py +2 -0
  23. trilogy/core/processing/nodes/base_node.py +22 -5
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +20 -2
  26. trilogy/core/processing/nodes/merge_node.py +32 -18
  27. trilogy/core/processing/nodes/select_node_v2.py +17 -3
  28. trilogy/core/processing/utility.py +100 -8
  29. trilogy/core/query_processor.py +77 -24
  30. trilogy/dialect/base.py +11 -46
  31. trilogy/dialect/bigquery.py +1 -1
  32. trilogy/dialect/common.py +11 -0
  33. trilogy/dialect/duckdb.py +1 -1
  34. trilogy/dialect/presto.py +1 -0
  35. trilogy/hooks/graph_hook.py +50 -5
  36. trilogy/hooks/query_debugger.py +1 -0
  37. trilogy/parsing/common.py +8 -5
  38. trilogy/parsing/parse_engine.py +48 -27
  39. trilogy/parsing/render.py +13 -6
  40. trilogy/parsing/trilogy.lark +12 -7
  41. pytrilogy-0.0.1.118.dist-info/RECORD +0 -83
  42. trilogy/core/processing/node_generators/concept_merge_node.py +0 -214
  43. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/LICENSE.md +0 -0
  44. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/entry_points.txt +0 -0
  45. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ from typing import List
4
4
  from trilogy.core.models import (
5
5
  Concept,
6
6
  )
7
- from trilogy.core.processing.nodes import StrategyNode, History, MergeNode
7
+ from trilogy.core.processing.nodes import StrategyNode, History
8
8
  from trilogy.core.processing.node_generators.common import (
9
9
  resolve_function_parent_concepts,
10
10
  )
@@ -30,16 +30,25 @@ def gen_basic_node(
30
30
  f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} has parents {[x.address for x in parent_concepts]}"
31
31
  )
32
32
 
33
- output_concepts = [concept] + local_optional
34
- partials = []
33
+ local_optional_redundant = [x for x in local_optional if x in parent_concepts]
34
+ attempts = [(parent_concepts, [concept] + local_optional_redundant)]
35
+ from itertools import combinations
35
36
 
36
- attempts = [(parent_concepts, [concept])]
37
37
  if local_optional:
38
- attempts.append((parent_concepts + local_optional, local_optional + [concept]))
38
+ for combo in range(1, len(local_optional) + 1):
39
+ combos = combinations(local_optional, combo)
40
+ for optional_set in combos:
41
+ attempts.append(
42
+ (
43
+ unique(parent_concepts + list(optional_set), "address"),
44
+ list(optional_set) + [concept],
45
+ )
46
+ )
39
47
 
40
- for attempt, output in reversed(attempts):
48
+ for attempt, basic_output in reversed(attempts):
49
+ partials = []
41
50
  attempt = unique(attempt, "address")
42
- parent_node = source_concepts(
51
+ parent_node: StrategyNode = source_concepts(
43
52
  mandatory_list=attempt,
44
53
  environment=environment,
45
54
  g=g,
@@ -49,24 +58,28 @@ def gen_basic_node(
49
58
  if not parent_node:
50
59
  continue
51
60
  parents: List[StrategyNode] = [parent_node]
52
- for x in output_concepts:
61
+ for x in basic_output:
53
62
  sources = [p for p in parents if x in p.output_concepts]
54
63
  if not sources:
55
64
  continue
56
65
  if all(x in source.partial_concepts for source in sources):
57
66
  partials.append(x)
67
+ outputs = parent_node.output_concepts + [concept]
58
68
  logger.info(
59
- f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}"
69
+ f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in outputs]}"
60
70
  )
61
- return MergeNode(
62
- input_concepts=attempt,
63
- output_concepts=output,
64
- environment=environment,
65
- g=g,
66
- parents=parents,
67
- depth=depth,
68
- partial_concepts=partials,
71
+ # parents.resolve()
72
+
73
+ parent_node.add_output_concept(concept)
74
+
75
+ parent_node.remove_output_concepts(
76
+ [
77
+ x
78
+ for x in parent_node.output_concepts
79
+ if x.address not in [y.address for y in basic_output]
80
+ ]
69
81
  )
82
+ return parent_node
70
83
  logger.info(
71
84
  f"{depth_prefix}{LOGGER_PREFIX} No basic node could be generated for {concept}"
72
85
  )
@@ -2,7 +2,7 @@ from typing import List
2
2
 
3
3
 
4
4
  from trilogy.core.enums import JoinType
5
- from trilogy.core.models import Concept, Environment, FilterItem
5
+ from trilogy.core.models import Concept, Environment, FilterItem, Grain
6
6
  from trilogy.core.processing.nodes import (
7
7
  FilterNode,
8
8
  MergeNode,
@@ -65,7 +65,9 @@ def gen_filter_node(
65
65
  else:
66
66
  parent.conditions = where.conditional
67
67
  parent.output_concepts = [concept]
68
+ parent.grain = Grain(components=[concept])
68
69
  parent.rebuild_cache()
70
+
69
71
  logger.info(
70
72
  f"{padding(depth)}{LOGGER_PREFIX} returning optimized filter node with pushdown to parent with condition {where.conditional}"
71
73
  )
@@ -1,79 +1,301 @@
1
1
  from typing import List, Optional
2
2
 
3
- from trilogy.core.models import Concept, Environment, Datasource, Conditional
4
- from trilogy.core.processing.nodes import MergeNode, History
3
+ from trilogy.core.models import Concept, Environment, Conditional
4
+ from trilogy.core.processing.nodes import MergeNode, History, StrategyNode
5
5
  import networkx as nx
6
- from trilogy.core.graph_models import concept_to_node, datasource_to_node
6
+ from trilogy.core.graph_models import concept_to_node
7
7
  from trilogy.core.processing.utility import PathInfo
8
8
  from trilogy.constants import logger
9
9
  from trilogy.utility import unique
10
10
  from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
11
11
  from trilogy.core.processing.utility import padding
12
- from trilogy.core.processing.graph_utils import extract_mandatory_subgraphs
12
+ from networkx.algorithms import approximation as ax
13
13
  from trilogy.core.enums import PurposeLineage
14
14
 
15
+
15
16
  LOGGER_PREFIX = "[GEN_MERGE_NODE]"
17
+ AMBIGUITY_CHECK_LIMIT = 20
18
+
19
+
20
+ def filter_pseudonyms_for_source(ds_graph: nx.DiGraph, node: str):
21
+ to_remove = set()
22
+
23
+ for edge in ds_graph.edges:
24
+ if ds_graph.edges[edge].get("pseudonym", False):
25
+ lengths = {}
26
+ for n in edge:
27
+ lengths[n] = nx.shortest_path_length(ds_graph, node, n)
28
+ to_remove.add(max(lengths, key=lambda x: lengths.get(x, 0)))
29
+ for node in to_remove:
30
+ ds_graph.remove_node(node)
31
+
32
+
33
+ def extract_address(node: str):
34
+ return node.split("~")[1].split("@")[0]
35
+
36
+
37
+ def extract_concept(node: str, env: Environment):
38
+ if node in env.alias_origin_lookup:
39
+ return env.alias_origin_lookup[node]
40
+ return env.concepts[node]
16
41
 
17
42
 
18
- def reduce_path_concepts(paths, g) -> set[str]:
19
- concept_nodes: List[Concept] = []
20
- # along our path, find all the concepts required
21
- for _, value in paths.items():
22
- concept_nodes += [g.nodes[v]["concept"] for v in value if v.startswith("c~")]
23
- final: List[Concept] = unique(concept_nodes, "address")
24
- return set([x.address for x in final])
43
+ def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
44
+ unique_graphs: list[set[str]] = []
45
+ for graph in graphs:
46
+ if not any(set(graph).issubset(x) for x in unique_graphs):
47
+ unique_graphs.append(set(graph))
25
48
 
49
+ return [list(x) for x in unique_graphs]
26
50
 
27
- def identify_ds_join_paths(
51
+
52
+ def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]:
53
+ graphs = []
54
+ for node in g.nodes:
55
+ if node.startswith("ds~"):
56
+ local = g.copy()
57
+ filter_pseudonyms_for_source(local, node)
58
+ ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=10).copy()
59
+ graphs.append(
60
+ [
61
+ extract_address(x)
62
+ for x in ds_graph.nodes
63
+ if not str(x).startswith("ds~")
64
+ ]
65
+ )
66
+
67
+ graphs = filter_unique_graphs(graphs)
68
+ for node in nodelist:
69
+ parsed = extract_address(node)
70
+ if not any(parsed in x for x in graphs):
71
+ graphs.append([parsed])
72
+ return graphs
73
+
74
+
75
+ def determine_induced_minimal_nodes(
76
+ G: nx.DiGraph,
77
+ nodelist: list[str],
78
+ environment: Environment,
79
+ filter_downstream: bool,
80
+ accept_partial: bool = False,
81
+ ) -> nx.DiGraph | None:
82
+ H: nx.Graph = nx.to_undirected(G).copy()
83
+ nodes_to_remove = []
84
+ concepts = nx.get_node_attributes(G, "concept")
85
+ for node in G.nodes:
86
+ if concepts.get(node):
87
+ lookup = concepts[node]
88
+ if lookup.derivation not in (PurposeLineage.BASIC, PurposeLineage.ROOT):
89
+ nodes_to_remove.append(node)
90
+ elif lookup.derivation == PurposeLineage.BASIC and G.out_degree(node) == 0:
91
+ nodes_to_remove.append(node)
92
+ # purge a node if we're already looking for all it's parents
93
+ elif filter_downstream and lookup.derivation == PurposeLineage.BASIC:
94
+ nodes_to_remove.append(node)
95
+
96
+ H.remove_nodes_from(nodes_to_remove)
97
+
98
+ H.remove_nodes_from(list(nx.isolates(H)))
99
+
100
+ zero_out = list(x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist)
101
+ while zero_out:
102
+ H.remove_nodes_from(zero_out)
103
+ zero_out = list(
104
+ x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist
105
+ )
106
+ try:
107
+ paths = nx.multi_source_dijkstra_path(H, nodelist)
108
+ except nx.exception.NodeNotFound:
109
+ return None
110
+ H.remove_nodes_from(list(x for x in H.nodes if x not in paths))
111
+ sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist).copy()
112
+ final: nx.DiGraph = nx.subgraph(G, sG.nodes).copy()
113
+ for edge in G.edges:
114
+ if edge[1] in final.nodes and edge[0].startswith("ds~"):
115
+ ds_name = extract_address(edge[0])
116
+ ds = environment.datasources[ds_name]
117
+ concept = environment.concepts[extract_address(edge[1])]
118
+ if concept.address in [x.address for x in ds.partial_concepts]:
119
+ if not accept_partial:
120
+ continue
121
+ final.add_edge(*edge)
122
+ # all concept nodes must have a parent
123
+
124
+ if not all(
125
+ [final.in_degree(node) > 0 for node in final.nodes if node.startswith("c~")]
126
+ ):
127
+ return None
128
+ if not all([node in final.nodes for node in nodelist]):
129
+ return None
130
+ return final
131
+
132
+
133
+ def detect_ambiguity_and_raise(all_concepts, reduced_concept_sets) -> None:
134
+ final_candidates: list[set[str]] = []
135
+ common: set[str] = set()
136
+ # find all values that show up in every join_additions
137
+ for ja in reduced_concept_sets:
138
+ if not common:
139
+ common = ja
140
+ else:
141
+ common = common.intersection(ja)
142
+ if all(set(ja).issubset(y) for y in reduced_concept_sets):
143
+ final_candidates.append(ja)
144
+
145
+ if not final_candidates:
146
+ filtered_paths = [x.difference(common) for x in reduced_concept_sets]
147
+ raise AmbiguousRelationshipResolutionException(
148
+ message=f"Multiple possible concept injections found for {[x.address for x in all_concepts]}, got {' or '.join([str(x) for x in reduced_concept_sets])}",
149
+ parents=filtered_paths,
150
+ )
151
+
152
+
153
+ def resolve_weak_components(
28
154
  all_concepts: List[Concept],
29
- g,
30
- datasource: Datasource,
155
+ environment: Environment,
156
+ environment_graph: nx.DiGraph,
157
+ filter_downstream: bool = True,
31
158
  accept_partial: bool = False,
32
- fail: bool = False,
33
- ) -> PathInfo | None:
34
- all_found = True
35
- any_direct_found = False
36
- paths = {}
37
- for bitem in all_concepts:
38
- item = bitem.with_default_grain()
39
- target_node = concept_to_node(item)
159
+ ) -> list[list[Concept]] | None:
160
+
161
+ break_flag = False
162
+ found = []
163
+ search_graph = environment_graph.copy()
164
+ reduced_concept_sets: list[set[str]] = []
165
+
166
+ # loop through, removing new nodes we find
167
+ # to ensure there are not ambiguous loops
168
+ # (if we did not care about raising ambiguity errors, we could just use the first one)
169
+ count = 0
170
+ node_list = [
171
+ concept_to_node(c.with_default_grain())
172
+ for c in all_concepts
173
+ if "__preql_internal" not in c.address
174
+ ]
175
+ while break_flag is not True:
176
+ count += 1
177
+ if count > AMBIGUITY_CHECK_LIMIT:
178
+ break_flag = True
40
179
  try:
41
- path = nx.shortest_path(
42
- g,
43
- source=datasource_to_node(datasource),
44
- target=target_node,
180
+ g = determine_induced_minimal_nodes(
181
+ search_graph,
182
+ node_list,
183
+ filter_downstream=filter_downstream,
184
+ accept_partial=accept_partial,
185
+ environment=environment,
45
186
  )
46
- paths[target_node] = path
47
- if sum([1 for x in path if x.startswith("ds~")]) == 1:
48
- any_direct_found = True
49
- except nx.exception.NodeNotFound:
50
- # TODO: support Verbose logging mode configuration and reenable these
51
- all_found = False
52
- if fail:
53
- raise
54
- return None
187
+
188
+ if not g or not g.nodes:
189
+ break_flag = True
190
+ continue
191
+ if not nx.is_weakly_connected(g):
192
+ break_flag = True
193
+ continue
194
+
195
+ all_graph_concepts = [
196
+ extract_concept(extract_address(node), environment)
197
+ for node in g.nodes
198
+ if node.startswith("c~")
199
+ ]
200
+ new = [
201
+ x
202
+ for x in all_graph_concepts
203
+ if x.address not in [y.address for y in all_concepts]
204
+ ]
205
+
206
+ new_addresses = set([x.address for x in new])
207
+ if not new:
208
+ break_flag = True
209
+ # remove our new nodes for the next search path
210
+ for n in new:
211
+ node = concept_to_node(n)
212
+ if node in search_graph:
213
+ search_graph.remove_node(node)
214
+ # TODO: figure out better place for debugging
215
+ # from trilogy.hooks.graph_hook import GraphHook
216
+ # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
217
+ found.append(g)
218
+ reduced_concept_sets.append(new_addresses)
219
+
55
220
  except nx.exception.NetworkXNoPath:
56
- all_found = False
57
- if fail:
58
- raise
59
- return None
60
- if all_found and any_direct_found:
61
- partial = [
62
- c.concept
63
- for c in datasource.columns
64
- if not c.is_complete
65
- and c.concept.address in [x.address for x in all_concepts]
66
- ]
67
- if partial and not accept_partial:
68
- return None
221
+ break_flag = True
222
+ if g and not g.nodes:
223
+ break_flag = True
224
+ if not found:
225
+ return None
226
+
227
+ detect_ambiguity_and_raise(all_concepts, reduced_concept_sets)
228
+
229
+ # take our first one as the actual graph
230
+ g = found[0]
69
231
 
70
- return PathInfo(
71
- paths=paths,
72
- datasource=datasource,
73
- reduced_concepts=reduce_path_concepts(paths, g),
74
- concept_subgraphs=extract_mandatory_subgraphs(paths, g),
232
+ subgraphs: list[list[Concept]] = []
233
+ # components = nx.strongly_connected_components(g)
234
+ components = extract_ds_components(g, node_list)
235
+ for component in components:
236
+ # we need to take unique again as different addresses may map to the same concept
237
+ sub_component = unique(
238
+ # sorting here is required for reproducibility
239
+ # todo: we should sort in an optimized order
240
+ [extract_concept(x, environment) for x in sorted(component)],
241
+ "address",
75
242
  )
76
- return None
243
+ if not sub_component:
244
+ continue
245
+ subgraphs.append(sub_component)
246
+ return subgraphs
247
+
248
+
249
+ def subgraphs_to_merge_node(
250
+ concept_subgraphs: list[list[Concept]],
251
+ depth: int,
252
+ all_concepts: List[Concept],
253
+ environment,
254
+ g,
255
+ source_concepts,
256
+ history,
257
+ conditions,
258
+ ):
259
+ parents: List[StrategyNode] = []
260
+ logger.info(
261
+ f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {[[c.address for c in subgraph] for subgraph in concept_subgraphs]}"
262
+ )
263
+ for graph in concept_subgraphs:
264
+ logger.info(
265
+ f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
266
+ )
267
+
268
+ parent: StrategyNode | None = source_concepts(
269
+ mandatory_list=graph,
270
+ environment=environment,
271
+ g=g,
272
+ depth=depth + 1,
273
+ history=history,
274
+ )
275
+ if not parent:
276
+ logger.info(
277
+ f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
278
+ )
279
+ return None
280
+ logger.info(
281
+ f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)} w/ {[c.address for c in parent.output_concepts]}"
282
+ )
283
+ parents.append(parent)
284
+ input_c = []
285
+ for x in parents:
286
+ for y in x.output_concepts:
287
+ input_c.append(y)
288
+
289
+ return MergeNode(
290
+ input_concepts=unique(input_c, "address"),
291
+ output_concepts=[x for x in all_concepts],
292
+ environment=environment,
293
+ g=g,
294
+ parents=parents,
295
+ depth=depth,
296
+ conditions=conditions,
297
+ # node_joins=[]
298
+ )
77
299
 
78
300
 
79
301
  def gen_merge_node(
@@ -87,15 +309,32 @@ def gen_merge_node(
87
309
  conditions: Conditional | None = None,
88
310
  ) -> Optional[MergeNode]:
89
311
  join_candidates: List[PathInfo] = []
90
- # anchor on datasources
91
- final_all_concepts = []
92
- for x in all_concepts:
93
- final_all_concepts.append(x)
94
- for datasource in environment.datasources.values():
95
- path = identify_ds_join_paths(final_all_concepts, g, datasource, accept_partial)
96
- if path and path.reduced_concepts:
97
- join_candidates.append(path)
98
- join_candidates.sort(key=lambda x: sum([len(v) for v in x.paths.values()]))
312
+
313
+ # inject new concepts into search, and identify if two dses can reach there
314
+ if not join_candidates:
315
+ for filter_downstream in [True, False]:
316
+ weak_resolve = resolve_weak_components(
317
+ all_concepts,
318
+ environment,
319
+ g,
320
+ filter_downstream=filter_downstream,
321
+ accept_partial=accept_partial,
322
+ )
323
+ if weak_resolve:
324
+ log_graph = [[y.address for y in x] for x in weak_resolve]
325
+ logger.info(
326
+ f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
327
+ )
328
+ return subgraphs_to_merge_node(
329
+ weak_resolve,
330
+ depth=depth,
331
+ all_concepts=all_concepts,
332
+ environment=environment,
333
+ g=g,
334
+ source_concepts=source_concepts,
335
+ history=history,
336
+ conditions=conditions,
337
+ )
99
338
  if not join_candidates:
100
339
  return None
101
340
  join_additions: list[set[str]] = []
@@ -129,41 +368,51 @@ def gen_merge_node(
129
368
  key=lambda x: len(x.reduced_concepts),
130
369
  )[0]
131
370
  logger.info(f"{padding(depth)}{LOGGER_PREFIX} final path is {shortest.paths}")
132
- # logger.info(f'{padding(depth)}{LOGGER_PREFIX} final reduced concepts are {shortest.concs}')
133
- parents = []
134
- for graph in shortest.concept_subgraphs:
135
- logger.info(
136
- f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
137
- )
138
- parent = source_concepts(
139
- mandatory_list=graph,
140
- environment=environment,
141
- g=g,
142
- depth=depth + 1,
143
- history=history,
144
- )
145
- if not parent:
146
- logger.info(
147
- f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
148
- )
149
- return None
150
- logger.info(
151
- f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)}"
152
- )
153
- parents.append(parent)
154
371
 
155
- return MergeNode(
156
- input_concepts=[
157
- environment.concepts[x]
158
- for x in shortest.reduced_concepts
159
- if environment.concepts[x].derivation != PurposeLineage.MERGE
160
- ],
161
- output_concepts=[
162
- x for x in all_concepts if x.derivation != PurposeLineage.MERGE
163
- ],
372
+ return subgraphs_to_merge_node(
373
+ shortest.concept_subgraphs,
374
+ depth=depth,
375
+ all_concepts=all_concepts,
164
376
  environment=environment,
165
377
  g=g,
166
- parents=parents,
167
- depth=depth,
378
+ source_concepts=source_concepts,
379
+ history=history,
168
380
  conditions=conditions,
169
381
  )
382
+ # parents = []
383
+ # for graph in shortest.concept_subgraphs:
384
+ # logger.info(
385
+ # f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
386
+ # )
387
+ # parent = source_concepts(
388
+ # mandatory_list=graph,
389
+ # environment=environment,
390
+ # g=g,
391
+ # depth=depth + 1,
392
+ # history=history,
393
+ # )
394
+ # if not parent:
395
+ # logger.info(
396
+ # f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
397
+ # )
398
+ # return None
399
+ # logger.info(
400
+ # f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)}"
401
+ # )
402
+ # parents.append(parent)
403
+
404
+ # return MergeNode(
405
+ # input_concepts=[
406
+ # environment.concepts[x]
407
+ # for x in shortest.reduced_concepts
408
+ # if environment.concepts[x].derivation != PurposeLineage.MERGE
409
+ # ],
410
+ # output_concepts=[
411
+ # x for x in all_concepts if x.derivation != PurposeLineage.MERGE
412
+ # ],
413
+ # environment=environment,
414
+ # g=g,
415
+ # parents=parents,
416
+ # depth=depth,
417
+ # conditions=conditions,
418
+ # )
@@ -35,7 +35,7 @@ def gen_rowset_node(
35
35
  lineage: RowsetItem = concept.lineage
36
36
  rowset: RowsetDerivationStatement = lineage.rowset
37
37
  select: SelectStatement | MultiSelectStatement = lineage.rowset.select
38
- parents: List[StrategyNode] = []
38
+ existence_parents: List[StrategyNode] = []
39
39
  if where := select.where_clause:
40
40
  targets = select.output_components + where.conditional.row_arguments
41
41
  for sub_select in where.conditional.existence_arguments:
@@ -54,7 +54,7 @@ def gen_rowset_node(
54
54
  f"{padding(depth)}{LOGGER_PREFIX} Cannot generate parent existence node for rowset node for {concept}"
55
55
  )
56
56
  return None
57
- parents.append(parent_check)
57
+ existence_parents.append(parent_check)
58
58
  else:
59
59
  targets = select.output_components
60
60
  node: StrategyNode = source_concepts(
@@ -65,18 +65,22 @@ def gen_rowset_node(
65
65
  history=history,
66
66
  )
67
67
 
68
- # add our existence concepts in
69
- if parents:
70
- node.parents += parents
71
- for parent in parents:
72
- for x in parent.output_concepts:
73
- if x.address not in node.output_lcl:
74
- node.existence_concepts.append(x)
75
68
  if not node:
76
69
  logger.info(
77
70
  f"{padding(depth)}{LOGGER_PREFIX} Cannot generate rowset node for {concept}"
78
71
  )
79
72
  return None
73
+ # add our existence concepts in
74
+ if existence_parents:
75
+ node.parents += existence_parents
76
+ # we don't need to join to any existence parents
77
+ if isinstance(node, MergeNode):
78
+ node.node_joins = []
79
+ for parent in existence_parents:
80
+ for x in parent.output_concepts:
81
+ if x.address not in node.output_lcl:
82
+ node.existence_concepts.append(x)
83
+
80
84
  node.conditions = select.where_clause.conditional if select.where_clause else None
81
85
  enrichment = set([x.address for x in local_optional])
82
86
  rowset_relevant = [x for x in rowset.derived_concepts]
@@ -91,22 +95,21 @@ def gen_rowset_node(
91
95
  x for x in select.output_components if x.address in enrichment
92
96
  ]
93
97
  # add in other other concepts
94
- for item in rowset_relevant:
95
- node.output_concepts.append(item)
96
- for item in additional_relevant:
97
- node.output_concepts.append(item)
98
+
99
+ node.add_output_concepts(rowset_relevant + additional_relevant)
98
100
  if select.where_clause:
99
101
  for item in additional_relevant:
100
102
  node.partial_concepts.append(item)
101
- node.hidden_concepts = rowset_hidden + [
103
+
104
+ final_hidden = rowset_hidden + [
102
105
  x
103
106
  for x in node.output_concepts
104
107
  if x.address not in [y.address for y in local_optional + [concept]]
105
108
  and x.derivation != PurposeLineage.ROWSET
106
109
  ]
110
+ node.hide_output_concepts(final_hidden)
107
111
  # assume grain to be output of select
108
112
  # but don't include anything aggregate at this point
109
- node.rebuild_cache()
110
113
  assert node.resolution_cache
111
114
 
112
115
  node.resolution_cache.grain = concept_list_to_grain(
@@ -143,7 +146,6 @@ def gen_rowset_node(
143
146
  f"{padding(depth)}{LOGGER_PREFIX} Cannot generate rowset enrichment node for {concept} with optional {local_optional}, returning just rowset node"
144
147
  )
145
148
  return node
146
-
147
149
  return MergeNode(
148
150
  input_concepts=enrich_node.output_concepts + node.output_concepts,
149
151
  output_concepts=node.output_concepts + local_optional,