pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,653 @@
1
+ from itertools import combinations
2
+ from typing import Callable, List, Optional
3
+
4
+ import networkx as nx
5
+ from networkx.algorithms import approximation as ax
6
+
7
+ from trilogy.constants import logger
8
+ from trilogy.core.enums import Derivation, FunctionType
9
+ from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
10
+ from trilogy.core.graph_models import (
11
+ ReferenceGraph,
12
+ concept_to_node,
13
+ prune_sources_for_conditions,
14
+ )
15
+ from trilogy.core.models.build import (
16
+ BuildConcept,
17
+ BuildConditional,
18
+ BuildFunction,
19
+ BuildGrain,
20
+ BuildWhereClause,
21
+ )
22
+ from trilogy.core.models.build_environment import BuildEnvironment
23
+ from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
24
+ from trilogy.core.processing.utility import padding
25
+ from trilogy.utility import unique
26
+
27
+ LOGGER_PREFIX = "[GEN_MERGE_NODE]"
28
+ AMBIGUITY_CHECK_LIMIT = 20
29
+
30
+
31
+ def filter_pseudonyms_for_source(
32
+ ds_graph: nx.DiGraph, node: str, pseudonyms: set[tuple[str, str]]
33
+ ):
34
+ to_remove = set()
35
+ for edge in ds_graph.edges:
36
+ if edge in pseudonyms:
37
+ lengths = {}
38
+ for n in edge:
39
+ try:
40
+ lengths[n] = nx.shortest_path_length(ds_graph, node, n)
41
+ except nx.NetworkXNoPath:
42
+ lengths[n] = 999
43
+ to_remove.add(max(lengths, key=lambda x: lengths.get(x, 0)))
44
+ for node in to_remove:
45
+ ds_graph.remove_node(node)
46
+
47
+
48
+ def extract_address(node: str):
49
+ return node.split("~")[1].split("@")[0]
50
+
51
+
52
+ def extract_concept(node: str, env: BuildEnvironment):
53
+ # removing this as part of canonical mapping
54
+ # if node in env.alias_origin_lookup:
55
+ # return env.alias_origin_lookup[node]
56
+ return env.canonical_concepts[node]
57
+
58
+
59
+ def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
60
+ unique_graphs: list[set[str]] = []
61
+
62
+ # sort graphs from largest to smallest
63
+ graphs.sort(key=lambda x: len(x), reverse=True)
64
+ for graph in graphs:
65
+ if not any(set(graph).issubset(x) for x in unique_graphs):
66
+ unique_graphs.append(set(graph))
67
+
68
+ return [list(x) for x in unique_graphs]
69
+
70
+
71
+ def extract_ds_components(
72
+ g: nx.DiGraph, nodelist: list[str], pseudonyms: set[tuple[str, str]]
73
+ ) -> list[list[str]]:
74
+ graphs = []
75
+ for node in g.nodes:
76
+ if node.startswith("ds~"):
77
+ local = g.copy()
78
+ filter_pseudonyms_for_source(local, node, pseudonyms)
79
+ ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=10).copy()
80
+ graphs.append(
81
+ [
82
+ extract_address(x)
83
+ for x in ds_graph.nodes
84
+ if not str(x).startswith("ds~")
85
+ ]
86
+ )
87
+ # if we had no ego graphs, return all concepts
88
+ if not graphs:
89
+ return [[extract_address(node) for node in nodelist]]
90
+ graphs = filter_unique_graphs(graphs)
91
+ for node in nodelist:
92
+ parsed = extract_address(node)
93
+ if not any(parsed in x for x in graphs):
94
+ graphs.append([parsed])
95
+ return graphs
96
+
97
+
98
+ def prune_and_merge(
99
+ G: nx.DiGraph,
100
+ keep_node_lambda: Callable[[str], bool],
101
+ ) -> nx.DiGraph:
102
+ """
103
+ Prune nodes of one type and create direct connections between remaining nodes.
104
+
105
+ Args:
106
+ G: NetworkX graph
107
+ keep_node_type: The node type to keep
108
+ node_type_attr: Attribute name that stores node type
109
+
110
+ Returns:
111
+ New graph with only keep_node_type nodes and merged connections
112
+ """
113
+ # Get nodes to keep
114
+ nodes_to_keep = [n for n in G.nodes if keep_node_lambda(n)]
115
+ # Create new graph
116
+ new_graph = G.subgraph(nodes_to_keep).copy()
117
+
118
+ # Find paths between nodes to keep through removed nodes
119
+ nodes_to_remove = [n for n in G.nodes() if n not in nodes_to_keep]
120
+
121
+ for node_pair in combinations(nodes_to_keep, 2):
122
+ n1, n2 = node_pair
123
+
124
+ # Check if there's a path through removed nodes
125
+ try:
126
+ path = nx.shortest_path(G, n1, n2)
127
+ # If path exists and goes through nodes we're removing
128
+ if len(path) > 2 or any(node in nodes_to_remove for node in path[1:-1]):
129
+ new_graph.add_edge(n1, n2)
130
+ except nx.NetworkXNoPath:
131
+ continue
132
+
133
+ return new_graph
134
+
135
+
136
+ def reinject_common_join_keys_v2(
137
+ G: ReferenceGraph,
138
+ final: nx.DiGraph,
139
+ nodelist: list[str],
140
+ synonyms: set[str] = set(),
141
+ ) -> bool:
142
+ # when we've discovered a concept join, for each pair of ds nodes
143
+ # check if they have more keys in common
144
+ # and inject those to discovery as join conditions
145
+ def is_ds_node(n: str) -> bool:
146
+ return n.startswith("ds~")
147
+
148
+ ds_graph = prune_and_merge(final, is_ds_node)
149
+ injected = False
150
+
151
+ for datasource in ds_graph.nodes:
152
+ node1 = G.datasources[datasource]
153
+ neighbors = nx.all_neighbors(ds_graph, datasource)
154
+ for neighbor in neighbors:
155
+ node2 = G.datasources[neighbor]
156
+ common_concepts = set(
157
+ x.concept.address for x in node1.columns
158
+ ).intersection(set(x.concept.address for x in node2.columns))
159
+ concrete_concepts = [
160
+ x.concept for x in node1.columns if x.concept.address in common_concepts
161
+ ]
162
+ reduced = BuildGrain.from_concepts(concrete_concepts).components
163
+ existing_addresses = set()
164
+ for concrete in concrete_concepts:
165
+ cnode = concept_to_node(concrete.with_default_grain())
166
+ if cnode in final.nodes:
167
+ existing_addresses.add(concrete.address)
168
+ continue
169
+ for concrete in concrete_concepts:
170
+ if concrete.address in synonyms:
171
+ continue
172
+ if concrete.address not in reduced:
173
+ continue
174
+ if concrete.address in existing_addresses:
175
+ continue
176
+ # skip anything that is already in the graph pseudonyms
177
+ if any(x in concrete.pseudonyms for x in existing_addresses):
178
+ continue
179
+ cnode = concept_to_node(concrete.with_default_grain())
180
+ final.add_edge(datasource, cnode)
181
+ final.add_edge(neighbor, cnode)
182
+ logger.debug(
183
+ f"{LOGGER_PREFIX} reinjecting common join key {cnode} to list {nodelist} between {datasource} and {neighbor}, existing {existing_addresses}"
184
+ )
185
+ injected = True
186
+ return injected
187
+
188
+
189
+ def determine_induced_minimal_nodes(
190
+ G: ReferenceGraph,
191
+ nodelist: list[str],
192
+ environment: BuildEnvironment,
193
+ filter_downstream: bool,
194
+ accept_partial: bool = False,
195
+ synonyms: set[str] = set(),
196
+ ) -> nx.DiGraph | None:
197
+ H: nx.Graph = nx.to_undirected(G).copy()
198
+
199
+ # Add weights to edges based on target node's derivation type
200
+ for edge in G.edges():
201
+ _, target = edge
202
+ target_lookup = G.concepts.get(target)
203
+
204
+ weight = 1 # default weight
205
+ # If either node is BASIC, set higher weight
206
+ if target_lookup and target_lookup.derivation == Derivation.BASIC:
207
+ if (
208
+ isinstance(target_lookup.lineage, BuildFunction)
209
+ and target_lookup.lineage.operator == FunctionType.ATTR_ACCESS
210
+ ):
211
+ weight = 1
212
+ else:
213
+ # raise SyntaxError(target_lookup.lineage.operator)
214
+ weight = 50
215
+
216
+ H.edges[edge]["weight"] = weight
217
+
218
+ nodes_to_remove = []
219
+ for node, lookup in G.concepts.items():
220
+ # inclusion of aggregates can create ambiguous node relation chains
221
+ # there may be a better way to handle this
222
+ # can be revisited if we need to connect a derived synonym based on an aggregate
223
+ if lookup.derivation in (
224
+ Derivation.CONSTANT,
225
+ Derivation.AGGREGATE,
226
+ Derivation.FILTER,
227
+ ):
228
+ nodes_to_remove.append(node)
229
+ # purge a node if we're already looking for all it's parents
230
+ if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
231
+ nodes_to_remove.append(node)
232
+ if nodes_to_remove:
233
+ # logger.debug(f"Removing nodes {nodes_to_remove} from graph")
234
+ H.remove_nodes_from(nodes_to_remove)
235
+ isolates = list(nx.isolates(H))
236
+ if isolates:
237
+ # logger.debug(f"Removing isolates {isolates} from graph")
238
+ H.remove_nodes_from(isolates)
239
+
240
+ zero_out = list(x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist)
241
+ while zero_out:
242
+ logger.debug(f"Removing zero out nodes {zero_out} from graph")
243
+ H.remove_nodes_from(zero_out)
244
+ zero_out = list(
245
+ x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist
246
+ )
247
+ try:
248
+ # Use weight attribute for Dijkstra pathfinding
249
+ paths = nx.multi_source_dijkstra_path(H, nodelist, weight="weight")
250
+ # logger.debug(f"Paths found for {nodelist} {paths}")
251
+ except nx.exception.NodeNotFound as e:
252
+ logger.debug(f"Unable to find paths for {nodelist}- {str(e)}")
253
+ return None
254
+ path_removals = list(x for x in H.nodes if x not in paths)
255
+ if path_removals:
256
+ # logger.debug(f"Removing paths {path_removals} from graph")
257
+ H.remove_nodes_from(path_removals)
258
+ # logger.debug(f"Graph after path removal {H.nodes}")
259
+ sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist, weight="weight").copy()
260
+ if not sG.nodes:
261
+ logger.debug(f"No Steiner tree found for nodes {nodelist}")
262
+ return None
263
+
264
+ logger.debug(f"Steiner tree found for nodes {nodelist} {sG.nodes}")
265
+ final: nx.DiGraph = nx.subgraph(G, sG.nodes).copy()
266
+
267
+ for edge in G.edges:
268
+ if edge[1] in final.nodes and edge[0].startswith("ds~"):
269
+ ds_name = extract_address(edge[0])
270
+ ds = environment.datasources[ds_name]
271
+ concept = environment.canonical_concepts[extract_address(edge[1])]
272
+ if concept.address in [x.address for x in ds.partial_concepts]:
273
+ if not accept_partial:
274
+ continue
275
+ final.add_edge(*edge)
276
+
277
+ reinject_common_join_keys_v2(G, final, nodelist, synonyms)
278
+
279
+ # all concept nodes must have a parent
280
+ if not all(
281
+ [
282
+ final.in_degree(node) > 0
283
+ for node in final.nodes
284
+ if node.startswith("c~") and node in nodelist
285
+ ]
286
+ ):
287
+ missing = [
288
+ node
289
+ for node in final.nodes
290
+ if node.startswith("c~") and final.in_degree(node) == 0
291
+ ]
292
+ logger.debug(f"Skipping graph for {nodelist} as no in_degree {missing}")
293
+ return None
294
+
295
+ if not all([node in final.nodes for node in nodelist]):
296
+ missing = [node for node in nodelist if node not in final.nodes]
297
+ logger.debug(
298
+ f"Skipping graph for initial list {nodelist} as missing nodes {missing} from final graph {final.nodes}"
299
+ )
300
+
301
+ return None
302
+ logger.debug(f"Found final graph {final.nodes}")
303
+ return final
304
+
305
+
306
+ def canonicalize_addresses(
307
+ reduced_concept_set: set[str], environment: BuildEnvironment
308
+ ) -> set[str]:
309
+ """
310
+ Convert a set of concept addresses to their canonical form.
311
+ This is necessary to ensure that we can compare concepts correctly,
312
+ especially when dealing with aliases or pseudonyms.
313
+ """
314
+ return set(
315
+ environment.concepts[x].address if x in environment.concepts else x
316
+ for x in reduced_concept_set
317
+ )
318
+
319
+
320
+ def detect_ambiguity_and_raise(
321
+ all_concepts: list[BuildConcept],
322
+ reduced_concept_sets_raw: list[set[str]],
323
+ environment: BuildEnvironment,
324
+ ) -> None:
325
+ final_candidates: list[set[str]] = []
326
+ common: set[str] = set()
327
+ # find all values that show up in every join_additions
328
+ reduced_concept_sets = [
329
+ canonicalize_addresses(x, environment) for x in reduced_concept_sets_raw
330
+ ]
331
+ for ja in reduced_concept_sets:
332
+ if not common:
333
+ common = ja
334
+ else:
335
+ common = common.intersection(ja)
336
+ if all(set(ja).issubset(y) for y in reduced_concept_sets):
337
+ final_candidates.append(ja)
338
+ if not final_candidates:
339
+ filtered_paths = [x.difference(common) for x in reduced_concept_sets]
340
+ raise AmbiguousRelationshipResolutionException(
341
+ message=f"Multiple possible concept injections found to resolve {[x.address for x in all_concepts]}, have {' or '.join([str(x) for x in reduced_concept_sets])}: {filtered_paths}",
342
+ parents=filtered_paths,
343
+ )
344
+
345
+
346
+ def has_synonym(concept: BuildConcept, others: list[list[BuildConcept]]) -> bool:
347
+ return any(
348
+ c.address in concept.pseudonyms or concept.address in c.pseudonyms
349
+ for sublist in others
350
+ for c in sublist
351
+ )
352
+
353
+
354
+ def filter_relevant_subgraphs(
355
+ subgraphs: list[list[BuildConcept]],
356
+ ) -> list[list[BuildConcept]]:
357
+ return [
358
+ subgraph
359
+ for subgraph in subgraphs
360
+ if len(subgraph) > 1
361
+ or (
362
+ len(subgraph) == 1
363
+ and not has_synonym(subgraph[0], [x for x in subgraphs if x != subgraph])
364
+ )
365
+ ]
366
+
367
+
368
+ # 2025-11-18 - removing this as it was causing us to drop
369
+ # partial concept required parents
370
+ # but leaving here for possible future use
371
+ # def filter_duplicate_subgraphs(
372
+ # subgraphs: list[list[BuildConcept]], environment
373
+ # ) -> list[list[BuildConcept]]:
374
+ # seen: list[set[str]] = []
375
+
376
+ # for graph in subgraphs:
377
+ # seen.append(
378
+ # canonicalize_addresses(set([x.address for x in graph]), environment)
379
+ # )
380
+ # final = []
381
+ # # sometimes w can get two subcomponents that are the same
382
+ # # due to alias resolution
383
+ # # if so, drop any that are strict subsets.
384
+ # for graph in subgraphs:
385
+ # logger.info(f"{LOGGER_PREFIX} Checking graph {graph} for duplicates in {seen}")
386
+ # set_x = canonicalize_addresses(set([x.address for x in graph]), environment)
387
+ # if any([set_x.issubset(y) and set_x != y for y in seen]):
388
+ # continue
389
+ # final.append(graph)
390
+ # return final
391
+
392
+
393
+ def resolve_weak_components(
394
+ all_concepts: List[BuildConcept],
395
+ environment: BuildEnvironment,
396
+ environment_graph: ReferenceGraph,
397
+ filter_downstream: bool = True,
398
+ accept_partial: bool = False,
399
+ search_conditions: BuildWhereClause | None = None,
400
+ ) -> list[list[BuildConcept]] | None:
401
+ break_flag = False
402
+ found = []
403
+ search_graph = environment_graph.copy()
404
+ prune_sources_for_conditions(
405
+ search_graph, accept_partial, conditions=search_conditions
406
+ )
407
+ reduced_concept_sets: list[set[str]] = []
408
+
409
+ # prune properties
410
+ # to_remove = []
411
+ # for node in search_graph.nodes:
412
+ # if not node.startswith("c~"):
413
+ # continue
414
+ # try:
415
+ # concept = extract_concept(extract_address(node), environment)
416
+ # if concept.purpose == Purpose.PROPERTY and concept.address not in all_concepts:
417
+ # to_remove.append(node)
418
+ # except Exception as e:
419
+ # logger.error(f"Error extracting concept from node {node}: {e}")
420
+ # raise ValueError('FIX THIS TO BE MORE PRECISEj,,j')
421
+ # for node in to_remove:
422
+ # search_graph.remove_node(node)
423
+
424
+ count = 0
425
+ node_list = sorted(
426
+ [
427
+ concept_to_node(c.with_default_grain())
428
+ for c in all_concepts
429
+ if "__preql_internal" not in c.address
430
+ ]
431
+ )
432
+ synonyms: set[str] = set()
433
+ for x in all_concepts:
434
+ synonyms = synonyms.union(x.pseudonyms)
435
+ # from trilogy.hooks.graph_hook import GraphHook
436
+ # GraphHook().query_graph_built(search_graph, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
437
+
438
+ # loop through, removing new nodes we find
439
+ # to ensure there are not ambiguous discovery paths
440
+ # (if we did not care about raising ambiguity errors, we could just use the first one)
441
+ while break_flag is not True:
442
+ count += 1
443
+ if count > AMBIGUITY_CHECK_LIMIT:
444
+ break_flag = True
445
+ try:
446
+ g = determine_induced_minimal_nodes(
447
+ search_graph,
448
+ node_list,
449
+ filter_downstream=filter_downstream,
450
+ accept_partial=accept_partial,
451
+ environment=environment,
452
+ synonyms=synonyms,
453
+ )
454
+
455
+ if not g or not g.nodes:
456
+ break_flag = True
457
+ continue
458
+ if not nx.is_weakly_connected(g):
459
+ break_flag = True
460
+ continue
461
+ # from trilogy.hooks.graph_hook import GraphHook
462
+ # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
463
+ all_graph_concepts = [
464
+ extract_concept(extract_address(node), environment)
465
+ for node in g.nodes
466
+ if node.startswith("c~")
467
+ ]
468
+ new = [x for x in all_graph_concepts if x.address not in all_concepts]
469
+
470
+ if not new:
471
+ break_flag = True
472
+ # remove our new nodes for the next search path
473
+ for n in new:
474
+ node = concept_to_node(n)
475
+ if node in search_graph:
476
+ search_graph.remove_node(node)
477
+ # TODO: figure out better place for debugging
478
+ # from trilogy.hooks.graph_hook import GraphHook
479
+ # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
480
+ found.append(g)
481
+ new_addresses = set([x.address for x in new if x.address not in synonyms])
482
+ reduced_concept_sets.append(new_addresses)
483
+
484
+ except nx.exception.NetworkXNoPath:
485
+ break_flag = True
486
+ if g and not g.nodes:
487
+ break_flag = True
488
+ if not found:
489
+ return None
490
+
491
+ detect_ambiguity_and_raise(all_concepts, reduced_concept_sets, environment)
492
+
493
+ # take our first one as the actual graph
494
+ g = found[0]
495
+
496
+ subgraphs: list[list[BuildConcept]] = []
497
+ # components = nx.strongly_connected_components(g)
498
+ node_list = [x for x in g.nodes if x.startswith("c~")]
499
+ components = extract_ds_components(g, node_list, environment_graph.pseudonyms)
500
+ logger.debug(f"Extracted components {components} from {node_list}")
501
+ for component in components:
502
+ # we need to take unique again as different addresses may map to the same concept
503
+ sub_component = unique(
504
+ # sorting here is required for reproducibility
505
+ # todo: we should sort in an optimized order
506
+ [extract_concept(x, environment) for x in sorted(component)],
507
+ "address",
508
+ )
509
+ if not sub_component:
510
+ continue
511
+ subgraphs.append(sub_component)
512
+
513
+ return subgraphs
514
+
515
+
516
+ def subgraphs_to_merge_node(
517
+ concept_subgraphs: list[list[BuildConcept]],
518
+ depth: int,
519
+ all_concepts: List[BuildConcept],
520
+ environment,
521
+ g,
522
+ source_concepts,
523
+ history,
524
+ conditions,
525
+ output_concepts: List[BuildConcept],
526
+ search_conditions: BuildWhereClause | None = None,
527
+ enable_early_exit: bool = True,
528
+ ):
529
+
530
+ parents: List[StrategyNode] = []
531
+ logger.info(
532
+ f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {[[c.address for c in subgraph] for subgraph in concept_subgraphs]}"
533
+ )
534
+ for graph in concept_subgraphs:
535
+ logger.info(
536
+ f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
537
+ )
538
+
539
+ parent: StrategyNode | None = source_concepts(
540
+ mandatory_list=graph,
541
+ environment=environment,
542
+ g=g,
543
+ depth=depth + 1,
544
+ history=history,
545
+ # conditions=search_conditions,
546
+ )
547
+ if not parent:
548
+ logger.info(
549
+ f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
550
+ )
551
+ return None
552
+ logger.info(
553
+ f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)} w/ {[c.address for c in parent.output_concepts]}"
554
+ )
555
+ parents.append(parent)
556
+ input_c = []
557
+ output_c = []
558
+ for x in parents:
559
+ for y in x.usable_outputs:
560
+ input_c.append(y)
561
+ if y in output_concepts:
562
+ output_c.append(y)
563
+ elif any(y.address in c.pseudonyms for c in output_concepts) or any(
564
+ c.address in y.pseudonyms for c in output_concepts
565
+ ):
566
+ output_c.append(y)
567
+
568
+ if len(parents) == 1 and enable_early_exit:
569
+
570
+ logger.info(
571
+ f"{padding(depth)}{LOGGER_PREFIX} only one parent node, exiting early w/ {[c.address for c in parents[0].output_concepts]}"
572
+ )
573
+ parent = parents[0]
574
+ return parent
575
+
576
+ rval = MergeNode(
577
+ input_concepts=unique(input_c, "address"),
578
+ output_concepts=output_concepts,
579
+ environment=environment,
580
+ parents=parents,
581
+ depth=depth,
582
+ # hidden_concepts=[]
583
+ # conditions=conditions,
584
+ # conditions=search_conditions.conditional,
585
+ # preexisting_conditions=search_conditions.conditional,
586
+ # node_joins=[]
587
+ )
588
+ return rval
589
+
590
+
591
+ def gen_merge_node(
592
+ all_concepts: List[BuildConcept],
593
+ g: nx.DiGraph,
594
+ environment: BuildEnvironment,
595
+ depth: int,
596
+ source_concepts,
597
+ accept_partial: bool = False,
598
+ history: History | None = None,
599
+ conditions: BuildConditional | None = None,
600
+ search_conditions: BuildWhereClause | None = None,
601
+ ) -> Optional[MergeNode]:
602
+
603
+ # we do not actually APPLY these conditions anywhere
604
+ # though we could look at doing that as an optimization
605
+ # it's important to include them so the base discovery loop that was generating
606
+ # the merge node can then add them automatically
607
+ # so we should not return a node with preexisting conditions
608
+ if search_conditions:
609
+ all_search_concepts = unique(
610
+ all_concepts + list(search_conditions.row_arguments), "address"
611
+ )
612
+ else:
613
+ all_search_concepts = all_concepts
614
+ all_search_concepts = sorted(all_search_concepts, key=lambda x: x.address)
615
+ break_set = set([x.address for x in all_search_concepts])
616
+ for filter_downstream in [True, False]:
617
+ weak_resolve = resolve_weak_components(
618
+ all_search_concepts,
619
+ environment,
620
+ g,
621
+ filter_downstream=filter_downstream,
622
+ accept_partial=accept_partial,
623
+ search_conditions=search_conditions,
624
+ )
625
+ if not weak_resolve:
626
+ logger.info(
627
+ f"{padding(depth)}{LOGGER_PREFIX} wasn't able to resolve graph through intermediate concept injection with accept_partial {accept_partial}, filter_downstream {filter_downstream}"
628
+ )
629
+ continue
630
+
631
+ log_graph = [[y.address for y in x] for x in weak_resolve]
632
+ logger.info(
633
+ f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
634
+ )
635
+ for flat in log_graph:
636
+ if set(flat) == break_set:
637
+ logger.info(
638
+ f"{padding(depth)}{LOGGER_PREFIX} expanded concept resolution was identical to search resolution; breaking to avoid recursion error."
639
+ )
640
+ return None
641
+ return subgraphs_to_merge_node(
642
+ weak_resolve,
643
+ depth=depth,
644
+ all_concepts=all_search_concepts,
645
+ environment=environment,
646
+ g=g,
647
+ source_concepts=source_concepts,
648
+ history=history,
649
+ conditions=conditions,
650
+ search_conditions=search_conditions,
651
+ output_concepts=all_concepts,
652
+ )
653
+ return None