pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
@@ -0,0 +1,695 @@
1
+ from itertools import combinations
2
+ from typing import Callable, List, Optional
3
+
4
+ import networkx as nx
5
+ from networkx.algorithms import approximation as ax
6
+
7
+ from trilogy.constants import logger
8
+ from trilogy.core.enums import Derivation, FunctionType
9
+ from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
10
+ from trilogy.core.graph_models import (
11
+ ReferenceGraph,
12
+ concept_to_node,
13
+ prune_sources_for_conditions,
14
+ )
15
+ from trilogy.core.models.build import (
16
+ BuildConcept,
17
+ BuildConditional,
18
+ BuildFunction,
19
+ BuildGrain,
20
+ BuildWhereClause,
21
+ )
22
+ from trilogy.core.models.build_environment import BuildEnvironment
23
+ from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
24
+ from trilogy.core.processing.utility import padding
25
+ from trilogy.utility import unique
26
+
27
+ LOGGER_PREFIX = "[GEN_MERGE_NODE]"
28
+ AMBIGUITY_CHECK_LIMIT = 20
29
+ EGO_RADIUS = 10
30
+
31
+
32
+ def filter_pseudonyms_for_source(
33
+ ds_graph: nx.DiGraph, node: str, pseudonyms: set[tuple[str, str]]
34
+ ):
35
+ to_remove = set()
36
+ for edge in ds_graph.edges:
37
+ if edge in pseudonyms:
38
+ lengths = {}
39
+ for n in edge:
40
+ try:
41
+ lengths[n] = nx.shortest_path_length(ds_graph, node, n)
42
+ except nx.NetworkXNoPath:
43
+ lengths[n] = 999
44
+ to_remove.add(max(lengths, key=lambda x: lengths.get(x, 0)))
45
+ for node in to_remove:
46
+ ds_graph.remove_node(node)
47
+
48
+
49
+ def extract_address(node: str):
50
+ return node.split("~")[1].split("@")[0]
51
+
52
+
53
+ def extract_concept(node: str, env: BuildEnvironment):
54
+ # removing this as part of canonical mapping
55
+ # if node in env.alias_origin_lookup:
56
+ # return env.alias_origin_lookup[node]
57
+ return env.canonical_concepts[node]
58
+
59
+
60
+ def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
61
+ unique_graphs: list[set[str]] = []
62
+
63
+ # sort graphs from largest to smallest
64
+ graphs.sort(key=lambda x: len(x), reverse=True)
65
+ for graph in graphs:
66
+ if not any(set(graph).issubset(x) for x in unique_graphs):
67
+ unique_graphs.append(set(graph))
68
+
69
+ return [list(x) for x in unique_graphs]
70
+
71
+
72
+ def extract_ds_components(
73
+ g: nx.DiGraph, nodelist: list[str], pseudonyms: set[tuple[str, str]]
74
+ ) -> list[list[str]]:
75
+ graphs = []
76
+ for node in g.nodes:
77
+ if node.startswith("ds~"):
78
+ local = g.copy()
79
+ filter_pseudonyms_for_source(local, node, pseudonyms)
80
+ ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=EGO_RADIUS).copy()
81
+ graphs.append(
82
+ [
83
+ extract_address(x)
84
+ for x in ds_graph.nodes
85
+ if not str(x).startswith("ds~")
86
+ ]
87
+ )
88
+ # if we had no ego graphs, return all concepts
89
+ if not graphs:
90
+ return [[extract_address(node) for node in nodelist]]
91
+ graphs = filter_unique_graphs(graphs)
92
+ for node in nodelist:
93
+ parsed = extract_address(node)
94
+ if not any(parsed in x for x in graphs):
95
+ graphs.append([parsed])
96
+ return graphs
97
+
98
+
99
+ def prune_and_merge(
100
+ G: ReferenceGraph,
101
+ keep_node_lambda: Callable[[str], bool],
102
+ ) -> ReferenceGraph:
103
+ """
104
+ Prune nodes of one type and create direct connections between remaining nodes.
105
+
106
+ Args:
107
+ G: NetworkX graph
108
+ keep_node_type: The node type to keep
109
+ node_type_attr: Attribute name that stores node type
110
+
111
+ Returns:
112
+ New graph with only keep_node_type nodes and merged connections
113
+ """
114
+ # Get nodes to keep
115
+ nodes_to_keep = [n for n in G.nodes if keep_node_lambda(n)]
116
+ # Create new graph
117
+ new_graph = G.subgraph(nodes_to_keep).copy()
118
+
119
+ # Find paths between nodes to keep through removed nodes
120
+ nodes_to_remove = [n for n in G.nodes() if n not in nodes_to_keep]
121
+
122
+ for node_pair in combinations(nodes_to_keep, 2):
123
+ n1, n2 = node_pair
124
+
125
+ # Check if there's a path through removed nodes
126
+ try:
127
+ path = nx.shortest_path(G, n1, n2)
128
+ # If path exists and goes through nodes we're removing
129
+ if len(path) > 2 or any(node in nodes_to_remove for node in path[1:-1]):
130
+ new_graph.add_edge(n1, n2)
131
+ except nx.NetworkXNoPath:
132
+ continue
133
+
134
+ return new_graph
135
+
136
+
137
+ def reinject_common_join_keys_v2(
138
+ G: ReferenceGraph,
139
+ final: nx.DiGraph,
140
+ nodelist: list[str],
141
+ synonyms: set[str] = set(),
142
+ ) -> bool:
143
+ # when we've discovered a concept join, for each pair of ds nodes
144
+ # check if they have more keys in common
145
+ # and inject those to discovery as join conditions
146
+ def is_ds_node(n: str) -> bool:
147
+ return n.startswith("ds~")
148
+
149
+ ds_graph = prune_and_merge(final, is_ds_node)
150
+ injected = False
151
+
152
+ for datasource in ds_graph.nodes:
153
+ node1 = G.datasources[datasource]
154
+ neighbors = nx.all_neighbors(ds_graph, datasource)
155
+ for neighbor in neighbors:
156
+ node2 = G.datasources[neighbor]
157
+ common_concepts = set(
158
+ x.concept.address for x in node1.columns
159
+ ).intersection(set(x.concept.address for x in node2.columns))
160
+ concrete_concepts = [
161
+ x.concept for x in node1.columns if x.concept.address in common_concepts
162
+ ]
163
+ reduced = BuildGrain.from_concepts(concrete_concepts).components
164
+ existing_addresses = set()
165
+ for concrete in concrete_concepts:
166
+ cnode = concept_to_node(concrete.with_default_grain())
167
+ if cnode in final.nodes:
168
+ existing_addresses.add(concrete.address)
169
+ continue
170
+ for concrete in concrete_concepts:
171
+ if concrete.address in synonyms:
172
+ continue
173
+ if concrete.address not in reduced:
174
+ continue
175
+ if concrete.address in existing_addresses:
176
+ continue
177
+ # skip anything that is already in the graph pseudonyms
178
+ if any(x in concrete.pseudonyms for x in existing_addresses):
179
+ continue
180
+ cnode = concept_to_node(concrete.with_default_grain())
181
+ final.add_edge(datasource, cnode)
182
+ final.add_edge(neighbor, cnode)
183
+ logger.debug(
184
+ f"{LOGGER_PREFIX} reinjecting common join key {cnode} to list {nodelist} between {datasource} and {neighbor}, existing {existing_addresses}"
185
+ )
186
+ injected = True
187
+
188
+ return injected
189
+
190
+
191
+ def reinject_basic_parents(
192
+ G: ReferenceGraph,
193
+ final: nx.DiGraph,
194
+ ) -> bool:
195
+
196
+ injected = False
197
+
198
+ for concept in G.concepts:
199
+ if concept not in final:
200
+ continue
201
+ logger.debug(
202
+ f"{LOGGER_PREFIX} checking concept {concept} for basic parent reinjection"
203
+ )
204
+ node1 = G.concepts[concept]
205
+ if node1.derivation != Derivation.BASIC:
206
+ continue
207
+ cnode = concept_to_node(node1)
208
+ neighbors = nx.all_neighbors(G, concept)
209
+ for neighbor in neighbors:
210
+ node2 = G.concepts.get(neighbor)
211
+ if not node2:
212
+ continue
213
+ if node2.address in node1.concept_arguments:
214
+ cnode2 = concept_to_node(node2)
215
+ final.add_edge(cnode2, cnode)
216
+ logger.debug(
217
+ f"{LOGGER_PREFIX} reinjecting upstream inputs {cnode2} to basic derivation {cnode}"
218
+ )
219
+ injected = True
220
+
221
+ return injected
222
+
223
+
224
+ def determine_induced_minimal_nodes(
225
+ G: ReferenceGraph,
226
+ nodelist: list[str],
227
+ environment: BuildEnvironment,
228
+ filter_downstream: bool,
229
+ accept_partial: bool = False,
230
+ synonyms: set[str] = set(),
231
+ ) -> nx.DiGraph | None:
232
+ H: nx.Graph = nx.to_undirected(G).copy()
233
+ nodelist_set = set(nodelist)
234
+
235
+ # Add weights to edges based on target node's derivation type
236
+ g_concepts = G.concepts
237
+ for edge in G.edges():
238
+ _, target = edge
239
+ target_lookup = g_concepts.get(target)
240
+
241
+ weight = 1 # default weight
242
+ # If either node is BASIC, set higher weight
243
+ if target_lookup and target_lookup.derivation == Derivation.BASIC:
244
+ if (
245
+ isinstance(target_lookup.lineage, BuildFunction)
246
+ and target_lookup.lineage.operator == FunctionType.ATTR_ACCESS
247
+ ):
248
+ weight = 1
249
+ else:
250
+ # raise SyntaxError(target_lookup.lineage.operator)
251
+ weight = 50
252
+
253
+ H.edges[edge]["weight"] = weight
254
+
255
+ nodes_to_remove = []
256
+ derivations_to_remove = (
257
+ Derivation.CONSTANT,
258
+ Derivation.AGGREGATE,
259
+ Derivation.FILTER,
260
+ )
261
+ for node, lookup in g_concepts.items():
262
+ # inclusion of aggregates can create ambiguous node relation chains
263
+ # there may be a better way to handle this
264
+ # can be revisited if we need to connect a derived synonym based on an aggregate
265
+ if lookup.derivation in derivations_to_remove:
266
+ nodes_to_remove.append(node)
267
+ # purge a node if we're already looking for all it's parents
268
+ elif filter_downstream and lookup.derivation != Derivation.ROOT:
269
+ nodes_to_remove.append(node)
270
+ if nodes_to_remove:
271
+ # logger.debug(f"Removing nodes {nodes_to_remove} from graph")
272
+ H.remove_nodes_from(nodes_to_remove)
273
+ isolates = list(nx.isolates(H))
274
+ if isolates:
275
+ # logger.debug(f"Removing isolates {isolates} from graph")
276
+ H.remove_nodes_from(isolates)
277
+
278
+ zero_out = [x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist_set]
279
+ while zero_out:
280
+ logger.debug(f"Removing zero out nodes {zero_out} from graph")
281
+ H.remove_nodes_from(zero_out)
282
+ zero_out = [
283
+ x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist_set
284
+ ]
285
+ try:
286
+ # Use weight attribute for Dijkstra pathfinding
287
+ paths = nx.multi_source_dijkstra_path(H, nodelist, weight="weight")
288
+ # logger.debug(f"Paths found for {nodelist} {paths}")
289
+ except nx.exception.NodeNotFound as e:
290
+ logger.debug(f"Unable to find paths for {nodelist}- {str(e)}")
291
+ return None
292
+ path_removals = list(x for x in H.nodes if x not in paths)
293
+ if path_removals:
294
+ # logger.debug(f"Removing paths {path_removals} from graph")
295
+ H.remove_nodes_from(path_removals)
296
+ # logger.debug(f"Graph after path removal {H.nodes}")
297
+ sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist, weight="weight").copy()
298
+ if not sG.nodes:
299
+ logger.debug(f"No Steiner tree found for nodes {nodelist}")
300
+ return None
301
+
302
+ logger.debug(f"Steiner tree found for nodes {nodelist} {sG.nodes}")
303
+ final: nx.DiGraph = nx.subgraph(G, sG.nodes).copy()
304
+
305
+ final_nodes = set(final.nodes)
306
+ for edge in G.edges:
307
+ if edge[1] in final_nodes and edge[0].startswith("ds~"):
308
+ ds = G.datasources[edge[0]]
309
+ concept = environment.canonical_concepts[extract_address(edge[1])]
310
+ if not accept_partial:
311
+ partial_addresses = {x.address for x in ds.partial_concepts}
312
+ if concept.address in partial_addresses:
313
+ continue
314
+ final.add_edge(*edge)
315
+
316
+ # readd concepts that need to be in the output for proper discovery
317
+ reinject_common_join_keys_v2(G, final, nodelist, synonyms)
318
+
319
+ reinject_basic_parents(G, final)
320
+
321
+ # all concept nodes must have a parent
322
+ if not all(
323
+ [
324
+ final.in_degree(node) > 0
325
+ for node in final.nodes
326
+ if node.startswith("c~") and node in nodelist
327
+ ]
328
+ ):
329
+ missing = [
330
+ node
331
+ for node in final.nodes
332
+ if node.startswith("c~") and final.in_degree(node) == 0
333
+ ]
334
+ logger.debug(f"Skipping graph for {nodelist} as no in_degree {missing}")
335
+ return None
336
+
337
+ if not all([node in final.nodes for node in nodelist]):
338
+ missing = [node for node in nodelist if node not in final.nodes]
339
+ logger.debug(
340
+ f"Skipping graph for initial list {nodelist} as missing nodes {missing} from final graph {final.nodes}"
341
+ )
342
+
343
+ return None
344
+ logger.debug(f"Found final graph {final.nodes}")
345
+ return final
346
+
347
+
348
+ def canonicalize_addresses(
349
+ reduced_concept_set: set[str], environment: BuildEnvironment
350
+ ) -> set[str]:
351
+ """
352
+ Convert a set of concept addresses to their canonical form.
353
+ This is necessary to ensure that we can compare concepts correctly,
354
+ especially when dealing with aliases or pseudonyms.
355
+ """
356
+ return set(
357
+ environment.concepts[x].address if x in environment.concepts else x
358
+ for x in reduced_concept_set
359
+ )
360
+
361
+
362
+ def detect_ambiguity_and_raise(
363
+ all_concepts: list[BuildConcept],
364
+ reduced_concept_sets_raw: list[set[str]],
365
+ environment: BuildEnvironment,
366
+ ) -> None:
367
+ final_candidates: list[set[str]] = []
368
+ common: set[str] = set()
369
+ # find all values that show up in every join_additions
370
+ reduced_concept_sets = [
371
+ canonicalize_addresses(x, environment) for x in reduced_concept_sets_raw
372
+ ]
373
+ for ja in reduced_concept_sets:
374
+ if not common:
375
+ common = ja
376
+ else:
377
+ common = common.intersection(ja)
378
+ if all(set(ja).issubset(y) for y in reduced_concept_sets):
379
+ final_candidates.append(ja)
380
+ if not final_candidates:
381
+ filtered_paths = [x.difference(common) for x in reduced_concept_sets]
382
+ raise AmbiguousRelationshipResolutionException(
383
+ message=f"Multiple possible concept additions (intermediate join keys) found to resolve {[x.address for x in all_concepts]}, have {' or '.join([str(x) for x in reduced_concept_sets])}. Different paths are is: {filtered_paths}",
384
+ parents=filtered_paths,
385
+ )
386
+
387
+
388
+ def has_synonym(concept: BuildConcept, others: list[list[BuildConcept]]) -> bool:
389
+ return any(
390
+ c.address in concept.pseudonyms or concept.address in c.pseudonyms
391
+ for sublist in others
392
+ for c in sublist
393
+ )
394
+
395
+
396
+ def filter_relevant_subgraphs(
397
+ subgraphs: list[list[BuildConcept]],
398
+ ) -> list[list[BuildConcept]]:
399
+ return [
400
+ subgraph
401
+ for subgraph in subgraphs
402
+ if len(subgraph) > 1
403
+ or (
404
+ len(subgraph) == 1
405
+ and not has_synonym(subgraph[0], [x for x in subgraphs if x != subgraph])
406
+ )
407
+ ]
408
+
409
+
410
+ # 2025-11-18 - removing this as it was causing us to drop
411
+ # partial concept required parents
412
+ # but leaving here for possible future use
413
+ # def filter_duplicate_subgraphs(
414
+ # subgraphs: list[list[BuildConcept]], environment
415
+ # ) -> list[list[BuildConcept]]:
416
+ # seen: list[set[str]] = []
417
+
418
+ # for graph in subgraphs:
419
+ # seen.append(
420
+ # canonicalize_addresses(set([x.address for x in graph]), environment)
421
+ # )
422
+ # final = []
423
+ # # sometimes w can get two subcomponents that are the same
424
+ # # due to alias resolution
425
+ # # if so, drop any that are strict subsets.
426
+ # for graph in subgraphs:
427
+ # logger.info(f"{LOGGER_PREFIX} Checking graph {graph} for duplicates in {seen}")
428
+ # set_x = canonicalize_addresses(set([x.address for x in graph]), environment)
429
+ # if any([set_x.issubset(y) and set_x != y for y in seen]):
430
+ # continue
431
+ # final.append(graph)
432
+ # return final
433
+
434
+
435
+ def resolve_weak_components(
436
+ all_concepts: List[BuildConcept],
437
+ environment: BuildEnvironment,
438
+ environment_graph: ReferenceGraph,
439
+ filter_downstream: bool = True,
440
+ accept_partial: bool = False,
441
+ search_conditions: BuildWhereClause | None = None,
442
+ ) -> list[list[BuildConcept]] | None:
443
+ break_flag = False
444
+ found = []
445
+ search_graph = environment_graph.copy()
446
+ prune_sources_for_conditions(
447
+ search_graph, accept_partial, conditions=search_conditions
448
+ )
449
+ reduced_concept_sets: list[set[str]] = []
450
+
451
+ # prune properties
452
+ # to_remove = []
453
+ # for node in search_graph.nodes:
454
+ # if not node.startswith("c~"):
455
+ # continue
456
+ # try:
457
+ # concept = extract_concept(extract_address(node), environment)
458
+ # if concept.purpose == Purpose.PROPERTY and concept.address not in all_concepts:
459
+ # to_remove.append(node)
460
+ # except Exception as e:
461
+ # logger.error(f"Error extracting concept from node {node}: {e}")
462
+ # raise ValueError('FIX THIS TO BE MORE PRECISEj,,j')
463
+ # for node in to_remove:
464
+ # search_graph.remove_node(node)
465
+
466
+ count = 0
467
+ node_list = sorted(
468
+ [
469
+ concept_to_node(c.with_default_grain())
470
+ for c in all_concepts
471
+ if "__preql_internal" not in c.address
472
+ ]
473
+ )
474
+ synonyms: set[str] = set()
475
+ for x in all_concepts:
476
+ synonyms.update(x.pseudonyms)
477
+ # from trilogy.hooks.graph_hook import GraphHook
478
+ # GraphHook().query_graph_built(search_graph, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
479
+
480
+ # loop through, removing new nodes we find
481
+ # to ensure there are not ambiguous discovery paths
482
+ # (if we did not care about raising ambiguity errors, we could just use the first one)
483
+ while break_flag is not True:
484
+ count += 1
485
+ if count > AMBIGUITY_CHECK_LIMIT:
486
+ break_flag = True
487
+ try:
488
+ g = determine_induced_minimal_nodes(
489
+ search_graph,
490
+ node_list,
491
+ filter_downstream=filter_downstream,
492
+ accept_partial=accept_partial,
493
+ environment=environment,
494
+ synonyms=synonyms,
495
+ )
496
+
497
+ if not g or not g.nodes:
498
+ break_flag = True
499
+ continue
500
+ if not nx.is_weakly_connected(g):
501
+ break_flag = True
502
+ continue
503
+ # from trilogy.hooks.graph_hook import GraphHook
504
+ # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
505
+ all_graph_concepts = [
506
+ extract_concept(extract_address(node), environment)
507
+ for node in g.nodes
508
+ if node.startswith("c~")
509
+ ]
510
+ new = [x for x in all_graph_concepts if x.address not in all_concepts]
511
+
512
+ if not new:
513
+ break_flag = True
514
+ # remove our new nodes for the next search path
515
+ for n in new:
516
+ node = concept_to_node(n)
517
+ if node in search_graph:
518
+ search_graph.remove_node(node)
519
+ # TODO: figure out better place for debugging
520
+ # from trilogy.hooks.graph_hook import GraphHook
521
+ # GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
522
+ found.append(g)
523
+ new_addresses = set([x.address for x in new if x.address not in synonyms])
524
+ reduced_concept_sets.append(new_addresses)
525
+
526
+ except nx.exception.NetworkXNoPath:
527
+ break_flag = True
528
+ if g and not g.nodes:
529
+ break_flag = True
530
+ if not found:
531
+ return None
532
+
533
+ detect_ambiguity_and_raise(all_concepts, reduced_concept_sets, environment)
534
+
535
+ # take our first one as the actual graph
536
+ g = found[0]
537
+
538
+ subgraphs: list[list[BuildConcept]] = []
539
+ # components = nx.strongly_connected_components(g)
540
+ node_list = [x for x in g.nodes if x.startswith("c~")]
541
+ components = extract_ds_components(g, node_list, environment_graph.pseudonyms)
542
+ logger.debug(f"Extracted components {components} from {node_list}")
543
+ for component in components:
544
+ # we need to take unique again as different addresses may map to the same concept
545
+ sub_component = unique(
546
+ # sorting here is required for reproducibility
547
+ # todo: we should sort in an optimized order
548
+ [extract_concept(x, environment) for x in sorted(component)],
549
+ "address",
550
+ )
551
+ if not sub_component:
552
+ continue
553
+ subgraphs.append(sub_component)
554
+
555
+ return subgraphs
556
+
557
+
558
+ def subgraphs_to_merge_node(
559
+ concept_subgraphs: list[list[BuildConcept]],
560
+ depth: int,
561
+ all_concepts: List[BuildConcept],
562
+ environment,
563
+ g,
564
+ source_concepts,
565
+ history,
566
+ conditions,
567
+ output_concepts: List[BuildConcept],
568
+ search_conditions: BuildWhereClause | None = None,
569
+ enable_early_exit: bool = True,
570
+ ):
571
+
572
+ parents: List[StrategyNode] = []
573
+ logger.info(
574
+ f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {[[c.address for c in subgraph] for subgraph in concept_subgraphs]}"
575
+ )
576
+ for graph in concept_subgraphs:
577
+ logger.info(
578
+ f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
579
+ )
580
+
581
+ parent: StrategyNode | None = source_concepts(
582
+ mandatory_list=graph,
583
+ environment=environment,
584
+ g=g,
585
+ depth=depth + 1,
586
+ history=history,
587
+ # conditions=search_conditions,
588
+ )
589
+ if not parent:
590
+ logger.info(
591
+ f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
592
+ )
593
+ return None
594
+ logger.info(
595
+ f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)} w/ {[c.address for c in parent.output_concepts]}"
596
+ )
597
+ parents.append(parent)
598
+ input_c = []
599
+ output_c = []
600
+ for x in parents:
601
+ for y in x.usable_outputs:
602
+ input_c.append(y)
603
+ if y in output_concepts:
604
+ output_c.append(y)
605
+ elif any(y.address in c.pseudonyms for c in output_concepts) or any(
606
+ c.address in y.pseudonyms for c in output_concepts
607
+ ):
608
+ output_c.append(y)
609
+
610
+ if len(parents) == 1 and enable_early_exit:
611
+
612
+ logger.info(
613
+ f"{padding(depth)}{LOGGER_PREFIX} only one parent node, exiting early w/ {[c.address for c in parents[0].output_concepts]}"
614
+ )
615
+ parent = parents[0]
616
+ return parent
617
+
618
+ rval = MergeNode(
619
+ input_concepts=unique(input_c, "address"),
620
+ output_concepts=output_concepts,
621
+ environment=environment,
622
+ parents=parents,
623
+ depth=depth,
624
+ # hidden_concepts=[]
625
+ # conditions=conditions,
626
+ # conditions=search_conditions.conditional,
627
+ # preexisting_conditions=search_conditions.conditional,
628
+ # node_joins=[]
629
+ )
630
+ return rval
631
+
632
+
633
+ def gen_merge_node(
634
+ all_concepts: List[BuildConcept],
635
+ g: nx.DiGraph,
636
+ environment: BuildEnvironment,
637
+ depth: int,
638
+ source_concepts,
639
+ accept_partial: bool = False,
640
+ history: History | None = None,
641
+ conditions: BuildConditional | None = None,
642
+ search_conditions: BuildWhereClause | None = None,
643
+ ) -> Optional[MergeNode]:
644
+
645
+ # we do not actually APPLY these conditions anywhere
646
+ # though we could look at doing that as an optimization
647
+ # it's important to include them so the base discovery loop that was generating
648
+ # the merge node can then add them automatically
649
+ # so we should not return a node with preexisting conditions
650
+ if search_conditions:
651
+ all_search_concepts = unique(
652
+ all_concepts + list(search_conditions.row_arguments), "address"
653
+ )
654
+ else:
655
+ all_search_concepts = all_concepts
656
+ all_search_concepts = sorted(all_search_concepts, key=lambda x: x.address)
657
+ break_set = set([x.address for x in all_search_concepts])
658
+ for filter_downstream in [True, False]:
659
+ weak_resolve = resolve_weak_components(
660
+ all_search_concepts,
661
+ environment,
662
+ g,
663
+ filter_downstream=filter_downstream,
664
+ accept_partial=accept_partial,
665
+ search_conditions=search_conditions,
666
+ )
667
+ if not weak_resolve:
668
+ logger.info(
669
+ f"{padding(depth)}{LOGGER_PREFIX} wasn't able to resolve graph through intermediate concept injection with accept_partial {accept_partial}, filter_downstream {filter_downstream}"
670
+ )
671
+ continue
672
+
673
+ log_graph = [[y.address for y in x] for x in weak_resolve]
674
+ logger.info(
675
+ f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
676
+ )
677
+ for flat in log_graph:
678
+ if set(flat) == break_set:
679
+ logger.info(
680
+ f"{padding(depth)}{LOGGER_PREFIX} expanded concept resolution was identical to search resolution; breaking to avoid recursion error."
681
+ )
682
+ return None
683
+ return subgraphs_to_merge_node(
684
+ weak_resolve,
685
+ depth=depth,
686
+ all_concepts=all_search_concepts,
687
+ environment=environment,
688
+ g=g,
689
+ source_concepts=source_concepts,
690
+ history=history,
691
+ conditions=conditions,
692
+ search_conditions=search_conditions,
693
+ output_concepts=all_concepts,
694
+ )
695
+ return None