pytrilogy 0.0.2.17__py3-none-any.whl → 0.0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (40) hide show
  1. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/METADATA +12 -8
  2. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/RECORD +40 -39
  3. trilogy/__init__.py +1 -1
  4. trilogy/constants.py +1 -1
  5. trilogy/core/enums.py +1 -0
  6. trilogy/core/functions.py +11 -0
  7. trilogy/core/models.py +89 -47
  8. trilogy/core/optimization.py +15 -9
  9. trilogy/core/processing/concept_strategies_v3.py +372 -145
  10. trilogy/core/processing/node_generators/basic_node.py +27 -55
  11. trilogy/core/processing/node_generators/common.py +6 -7
  12. trilogy/core/processing/node_generators/filter_node.py +28 -31
  13. trilogy/core/processing/node_generators/group_node.py +27 -5
  14. trilogy/core/processing/node_generators/group_to_node.py +3 -1
  15. trilogy/core/processing/node_generators/multiselect_node.py +3 -0
  16. trilogy/core/processing/node_generators/node_merge_node.py +14 -10
  17. trilogy/core/processing/node_generators/rowset_node.py +12 -12
  18. trilogy/core/processing/node_generators/select_merge_node.py +317 -0
  19. trilogy/core/processing/node_generators/select_node.py +7 -511
  20. trilogy/core/processing/node_generators/unnest_node.py +4 -3
  21. trilogy/core/processing/node_generators/window_node.py +12 -37
  22. trilogy/core/processing/nodes/__init__.py +0 -2
  23. trilogy/core/processing/nodes/base_node.py +69 -20
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +18 -17
  26. trilogy/core/processing/nodes/merge_node.py +4 -10
  27. trilogy/core/processing/nodes/select_node_v2.py +28 -14
  28. trilogy/core/processing/nodes/window_node.py +1 -2
  29. trilogy/core/processing/utility.py +51 -4
  30. trilogy/core/query_processor.py +17 -73
  31. trilogy/dialect/base.py +7 -3
  32. trilogy/dialect/duckdb.py +4 -1
  33. trilogy/dialect/sql_server.py +3 -3
  34. trilogy/hooks/query_debugger.py +5 -3
  35. trilogy/parsing/parse_engine.py +66 -38
  36. trilogy/parsing/trilogy.lark +2 -1
  37. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/LICENSE.md +0 -0
  38. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/WHEEL +0 -0
  39. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/entry_points.txt +0 -0
  40. {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,317 @@
1
+ from typing import List, Optional
2
+
3
+ from trilogy.core.models import (
4
+ Concept,
5
+ Environment,
6
+ Grain,
7
+ Datasource,
8
+ WhereClause,
9
+ LooseConceptList,
10
+ )
11
+ from trilogy.core.processing.nodes import (
12
+ MergeNode,
13
+ StrategyNode,
14
+ GroupNode,
15
+ ConstantNode,
16
+ SelectNode,
17
+ )
18
+ import networkx as nx
19
+ from trilogy.core.graph_models import concept_to_node
20
+ from trilogy.constants import logger
21
+ from trilogy.core.processing.utility import padding
22
+ from trilogy.core.enums import PurposeLineage
23
+
24
+ LOGGER_PREFIX = "[GEN_ROOT_MERGE_NODE]"
25
+
26
+
27
+ def extract_address(node: str):
28
+ return node.split("~")[1].split("@")[0]
29
+
30
+
31
+ def get_graph_partial_nodes(g: nx.DiGraph) -> dict[str, list[str]]:
32
+ datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
33
+ partial: dict[str, list[str]] = {}
34
+ for node in g.nodes:
35
+ if node in datasources:
36
+ partial[node] = [
37
+ concept_to_node(c) for c in datasources[node].partial_concepts
38
+ ]
39
+ return partial
40
+
41
+
42
+ def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
43
+ datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
44
+ partial: dict[str, int] = {}
45
+ for node in g.nodes:
46
+ if node in datasources:
47
+ partial[node] = len(datasources[node].grain.components)
48
+ return partial
49
+
50
+
51
+ def create_pruned_concept_graph(
52
+ g: nx.DiGraph, all_concepts: List[Concept], accept_partial: bool = False
53
+ ) -> nx.DiGraph:
54
+ orig_g = g
55
+ g = g.copy()
56
+ target_addresses = set([c.address for c in all_concepts])
57
+ concepts: dict[str, Concept] = nx.get_node_attributes(orig_g, "concept")
58
+ datasources: dict[str, Datasource] = nx.get_node_attributes(orig_g, "datasource")
59
+ relevant_concepts_pre = {
60
+ n: x.address
61
+ for n in g.nodes()
62
+ # filter out synonyms
63
+ if (x := concepts.get(n, None)) and x.address in target_addresses
64
+ }
65
+ relevant_concepts: list[str] = list(relevant_concepts_pre.keys())
66
+ relevent_datasets: list[str] = []
67
+ if not accept_partial:
68
+ partial = {}
69
+ for node in g.nodes:
70
+ if node in datasources:
71
+ partial[node] = [
72
+ concept_to_node(c) for c in datasources[node].partial_concepts
73
+ ]
74
+ to_remove = []
75
+ for edge in g.edges:
76
+ if (
77
+ edge[0] in datasources
78
+ and (pnodes := partial.get(edge[0], []))
79
+ and edge[1] in pnodes
80
+ ):
81
+ to_remove.append(edge)
82
+ if (
83
+ edge[1] in datasources
84
+ and (pnodes := partial.get(edge[1], []))
85
+ and edge[0] in pnodes
86
+ ):
87
+ to_remove.append(edge)
88
+ for edge in to_remove:
89
+ g.remove_edge(*edge)
90
+ for n in g.nodes():
91
+ if not n.startswith("ds~"):
92
+ continue
93
+ actual_neighbors = [
94
+ x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
95
+ ]
96
+ if actual_neighbors:
97
+ relevent_datasets.append(n)
98
+
99
+ # for injecting extra join concepts that are shared between datasets
100
+ # use the original graph, pre-partial pruning
101
+ for n in orig_g.nodes:
102
+ # readd ignoring grain
103
+ # we want to join inclusive of all concepts
104
+ roots: dict[str, set[str]] = {}
105
+ if n.startswith("c~") and n not in relevant_concepts:
106
+ root = n.split("@")[0]
107
+ neighbors = roots.get(root, set())
108
+ for neighbor in nx.all_neighbors(orig_g, n):
109
+ if neighbor in relevent_datasets:
110
+ neighbors.add(neighbor)
111
+ if len(neighbors) > 1:
112
+ relevant_concepts.append(n)
113
+ roots[root] = set()
114
+ g.remove_nodes_from(
115
+ [
116
+ n
117
+ for n in g.nodes()
118
+ if n not in relevent_datasets and n not in relevant_concepts
119
+ ]
120
+ )
121
+
122
+ subgraphs = list(nx.connected_components(g.to_undirected()))
123
+ if not subgraphs:
124
+ return None
125
+ if subgraphs and len(subgraphs) != 1:
126
+ return None
127
+ # add back any relevant edges that might have been partially filtered
128
+ relevant = set(relevant_concepts + relevent_datasets)
129
+ for edge in orig_g.edges():
130
+ if edge[0] in relevant and edge[1] in relevant:
131
+ g.add_edge(edge[0], edge[1])
132
+
133
+ return g
134
+
135
+
136
+ def resolve_subgraphs(g: nx.DiGraph) -> dict[str, list[str]]:
137
+ datasources = [n for n in g.nodes if n.startswith("ds~")]
138
+ subgraphs = {ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources}
139
+ partial_map = get_graph_partial_nodes(g)
140
+ grain_length = get_graph_grain_length(g)
141
+ non_partial = {
142
+ ds: [c for c in subgraphs[ds] if c not in partial_map[ds]] for ds in datasources
143
+ }
144
+ pruned_subgraphs = {}
145
+ for key, value in subgraphs.items():
146
+ is_subset = False
147
+ matches = set()
148
+ # Compare current list with other lists
149
+ for other_key, other_value in non_partial.items():
150
+ if key != other_key and set(value).issubset(set(other_value)):
151
+ if len(value) < len(other_value):
152
+ is_subset = True
153
+ break
154
+ elif len(value) == len(other_value):
155
+ matches.add(other_key)
156
+ matches.add(key)
157
+ if matches:
158
+ is_subset = key is not min(matches, key=lambda x: (grain_length[x], x))
159
+ if not is_subset:
160
+ pruned_subgraphs[key] = value
161
+ return pruned_subgraphs
162
+
163
+
164
+ def create_select_node(
165
+ ds_name: str,
166
+ subgraph: list[str],
167
+ accept_partial: bool,
168
+ g,
169
+ environment: Environment,
170
+ depth: int,
171
+ ) -> StrategyNode:
172
+ ds_name = ds_name.split("~")[1]
173
+ all_concepts = [
174
+ environment.concepts[extract_address(c)] for c in subgraph if c.startswith("c~")
175
+ ]
176
+
177
+ all_lcl = LooseConceptList(concepts=all_concepts)
178
+ if all([c.derivation == PurposeLineage.CONSTANT for c in all_concepts]):
179
+ logger.info(
180
+ f"{padding(depth)}{LOGGER_PREFIX} All concepts {[x.address for x in all_concepts]} are constants, returning constant node"
181
+ )
182
+ return ConstantNode(
183
+ output_concepts=all_concepts,
184
+ input_concepts=[],
185
+ environment=environment,
186
+ g=g,
187
+ parents=[],
188
+ depth=depth,
189
+ # no partial for constants
190
+ partial_concepts=[],
191
+ force_group=False,
192
+ )
193
+
194
+ datasource = environment.datasources[ds_name]
195
+ target_grain = Grain(components=all_concepts)
196
+ force_group = False
197
+ if not datasource.grain.issubset(target_grain):
198
+ force_group = True
199
+ partial_concepts = [
200
+ c.concept
201
+ for c in datasource.columns
202
+ if not c.is_complete and c.concept in all_lcl
203
+ ]
204
+ partial_lcl = LooseConceptList(concepts=partial_concepts)
205
+ nullable_concepts = [
206
+ c.concept for c in datasource.columns if c.is_nullable and c.concept in all_lcl
207
+ ]
208
+ nullable_lcl = LooseConceptList(concepts=nullable_concepts)
209
+
210
+ bcandidate: StrategyNode = SelectNode(
211
+ input_concepts=[c.concept for c in datasource.columns],
212
+ output_concepts=all_concepts,
213
+ environment=environment,
214
+ g=g,
215
+ parents=[],
216
+ depth=depth,
217
+ partial_concepts=[c for c in all_concepts if c in partial_lcl],
218
+ nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
219
+ accept_partial=accept_partial,
220
+ datasource=datasource,
221
+ grain=Grain(components=all_concepts),
222
+ conditions=datasource.where.conditional if datasource.where else None,
223
+ )
224
+
225
+ # we need to nest the group node one further
226
+ if force_group is True:
227
+ candidate: StrategyNode = GroupNode(
228
+ output_concepts=all_concepts,
229
+ input_concepts=all_concepts,
230
+ environment=environment,
231
+ g=g,
232
+ parents=[bcandidate],
233
+ depth=depth,
234
+ partial_concepts=bcandidate.partial_concepts,
235
+ nullable_concepts=bcandidate.nullable_concepts,
236
+ )
237
+ else:
238
+ candidate = bcandidate
239
+ return candidate
240
+
241
+
242
+ def gen_select_merge_node(
243
+ all_concepts: List[Concept],
244
+ g: nx.DiGraph,
245
+ environment: Environment,
246
+ depth: int,
247
+ accept_partial: bool = False,
248
+ conditions: WhereClause | None = None,
249
+ ) -> Optional[StrategyNode]:
250
+ non_constant = [c for c in all_concepts if c.derivation != PurposeLineage.CONSTANT]
251
+ constants = [c for c in all_concepts if c.derivation == PurposeLineage.CONSTANT]
252
+ if not non_constant and constants:
253
+ return ConstantNode(
254
+ output_concepts=constants,
255
+ input_concepts=[],
256
+ environment=environment,
257
+ g=g,
258
+ parents=[],
259
+ depth=depth,
260
+ partial_concepts=[],
261
+ force_group=False,
262
+ )
263
+ for attempt in [False, True]:
264
+ pruned_concept_graph = create_pruned_concept_graph(g, non_constant, attempt)
265
+ if pruned_concept_graph:
266
+ logger.info(
267
+ f"{padding(depth)}{LOGGER_PREFIX} found covering graph w/ partial flag {attempt}"
268
+ )
269
+ break
270
+
271
+ if not pruned_concept_graph:
272
+ logger.info(
273
+ f"{padding(depth)}{LOGGER_PREFIX} no covering graph found {attempt}"
274
+ )
275
+ return None
276
+
277
+ sub_nodes = resolve_subgraphs(pruned_concept_graph)
278
+
279
+ logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
280
+ parents = [
281
+ create_select_node(
282
+ k,
283
+ subgraph,
284
+ g=g,
285
+ accept_partial=accept_partial,
286
+ environment=environment,
287
+ depth=depth,
288
+ )
289
+ for k, subgraph in sub_nodes.items()
290
+ ]
291
+ if not parents:
292
+ return None
293
+
294
+ if constants:
295
+ parents.append(
296
+ ConstantNode(
297
+ output_concepts=constants,
298
+ input_concepts=[],
299
+ environment=environment,
300
+ g=g,
301
+ parents=[],
302
+ depth=depth,
303
+ partial_concepts=[],
304
+ force_group=False,
305
+ )
306
+ )
307
+
308
+ if len(parents) == 1:
309
+ return parents[0]
310
+ return MergeNode(
311
+ output_concepts=all_concepts,
312
+ input_concepts=non_constant,
313
+ environment=environment,
314
+ g=g,
315
+ depth=depth,
316
+ parents=parents,
317
+ )