pytrilogy 0.0.2.15__py3-none-any.whl → 0.0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (44) hide show
  1. {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/METADATA +12 -8
  2. pytrilogy-0.0.2.18.dist-info/RECORD +83 -0
  3. trilogy/__init__.py +1 -1
  4. trilogy/constants.py +1 -1
  5. trilogy/core/enums.py +1 -0
  6. trilogy/core/functions.py +11 -0
  7. trilogy/core/models.py +105 -59
  8. trilogy/core/optimization.py +15 -9
  9. trilogy/core/processing/concept_strategies_v3.py +372 -145
  10. trilogy/core/processing/node_generators/basic_node.py +27 -55
  11. trilogy/core/processing/node_generators/common.py +6 -7
  12. trilogy/core/processing/node_generators/filter_node.py +28 -31
  13. trilogy/core/processing/node_generators/group_node.py +14 -2
  14. trilogy/core/processing/node_generators/group_to_node.py +3 -1
  15. trilogy/core/processing/node_generators/multiselect_node.py +3 -0
  16. trilogy/core/processing/node_generators/node_merge_node.py +14 -9
  17. trilogy/core/processing/node_generators/rowset_node.py +12 -12
  18. trilogy/core/processing/node_generators/select_merge_node.py +302 -0
  19. trilogy/core/processing/node_generators/select_node.py +7 -511
  20. trilogy/core/processing/node_generators/unnest_node.py +4 -3
  21. trilogy/core/processing/node_generators/window_node.py +12 -37
  22. trilogy/core/processing/nodes/__init__.py +0 -2
  23. trilogy/core/processing/nodes/base_node.py +69 -20
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +18 -17
  26. trilogy/core/processing/nodes/merge_node.py +4 -10
  27. trilogy/core/processing/nodes/select_node_v2.py +28 -14
  28. trilogy/core/processing/nodes/window_node.py +1 -2
  29. trilogy/core/processing/utility.py +51 -3
  30. trilogy/core/query_processor.py +17 -73
  31. trilogy/dialect/base.py +8 -3
  32. trilogy/dialect/common.py +65 -10
  33. trilogy/dialect/duckdb.py +4 -1
  34. trilogy/dialect/sql_server.py +3 -3
  35. trilogy/executor.py +5 -0
  36. trilogy/hooks/query_debugger.py +5 -3
  37. trilogy/parsing/parse_engine.py +67 -39
  38. trilogy/parsing/render.py +2 -0
  39. trilogy/parsing/trilogy.lark +6 -3
  40. pytrilogy-0.0.2.15.dist-info/RECORD +0 -82
  41. {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/LICENSE.md +0 -0
  42. {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/WHEEL +0 -0
  43. {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/entry_points.txt +0 -0
  44. {pytrilogy-0.0.2.15.dist-info → pytrilogy-0.0.2.18.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,302 @@
1
+ from typing import List, Optional
2
+
3
+ from trilogy.core.models import (
4
+ Concept,
5
+ Environment,
6
+ Grain,
7
+ Datasource,
8
+ WhereClause,
9
+ LooseConceptList,
10
+ )
11
+ from trilogy.core.processing.nodes import (
12
+ MergeNode,
13
+ StrategyNode,
14
+ GroupNode,
15
+ ConstantNode,
16
+ SelectNode,
17
+ )
18
+ import networkx as nx
19
+ from trilogy.core.graph_models import concept_to_node
20
+ from trilogy.constants import logger
21
+ from trilogy.core.processing.utility import padding
22
+ from trilogy.core.enums import PurposeLineage
23
+
24
+
25
+ LOGGER_PREFIX = "[GEN_ROOT_MERGE_NODE]"
26
+
27
+
28
+ def extract_address(node: str):
29
+ return node.split("~")[1].split("@")[0]
30
+
31
+
32
+ def get_graph_partial_nodes(g: nx.DiGraph) -> dict[str, list[str]]:
33
+ datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
34
+ partial: dict[str, list[str]] = {}
35
+ for node in g.nodes:
36
+ if node in datasources:
37
+ partial[node] = [
38
+ concept_to_node(c) for c in datasources[node].partial_concepts
39
+ ]
40
+ return partial
41
+
42
+
43
+ def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
44
+ datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
45
+ partial: dict[str, int] = {}
46
+ for node in g.nodes:
47
+ if node in datasources:
48
+ partial[node] = len(datasources[node].grain.components)
49
+ return partial
50
+
51
+
52
+ def create_pruned_concept_graph(
53
+ g: nx.DiGraph, all_concepts: List[Concept], accept_partial: bool = False
54
+ ) -> nx.DiGraph:
55
+ g = g.copy()
56
+ target_addresses = set([c.address for c in all_concepts])
57
+ concepts: dict[str, Concept] = nx.get_node_attributes(g, "concept")
58
+ datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
59
+ relevant_concepts_pre = {
60
+ n: x.address
61
+ for n in g.nodes()
62
+ # filter out synonyms
63
+ if (x := concepts.get(n, None)) and x.address in target_addresses
64
+ }
65
+ relevant_concepts = list(relevant_concepts_pre.keys())
66
+ relevent_datasets = []
67
+ if not accept_partial:
68
+ partial = {}
69
+ for node in g.nodes:
70
+ if node in datasources:
71
+ partial[node] = [
72
+ concept_to_node(c) for c in datasources[node].partial_concepts
73
+ ]
74
+ to_remove = []
75
+ for edge in g.edges:
76
+ if (
77
+ edge[0] in datasources
78
+ and (pnodes := partial.get(edge[0], []))
79
+ and edge[1] in pnodes
80
+ ):
81
+ to_remove.append(edge)
82
+ if (
83
+ edge[1] in datasources
84
+ and (pnodes := partial.get(edge[1], []))
85
+ and edge[0] in pnodes
86
+ ):
87
+ to_remove.append(edge)
88
+ for edge in to_remove:
89
+ g.remove_edge(*edge)
90
+ for n in g.nodes():
91
+ if not n.startswith("ds~"):
92
+ continue
93
+ actual_neighbors = [
94
+ x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
95
+ ]
96
+ if actual_neighbors:
97
+ relevent_datasets.append(n)
98
+ for n in g.nodes():
99
+ if n.startswith("c~") and n not in relevant_concepts:
100
+ neighbor_count = 0
101
+ for x in nx.all_neighbors(g, n):
102
+ if x in relevent_datasets:
103
+ neighbor_count += 1
104
+ if neighbor_count > 1:
105
+ relevant_concepts.append(concepts.get(n))
106
+
107
+ g.remove_nodes_from(
108
+ [
109
+ n
110
+ for n in g.nodes()
111
+ if n not in relevent_datasets and n not in relevant_concepts
112
+ ]
113
+ )
114
+ return g
115
+
116
+
117
+ def resolve_subgraphs(g: nx.DiGraph) -> dict[str, list[str]]:
118
+ datasources = [n for n in g.nodes if n.startswith("ds~")]
119
+ subgraphs = {ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources}
120
+ partial_map = get_graph_partial_nodes(g)
121
+ grain_length = get_graph_grain_length(g)
122
+ non_partial = {
123
+ ds: [c for c in subgraphs[ds] if c not in partial_map[ds]] for ds in datasources
124
+ }
125
+ pruned_subgraphs = {}
126
+ for key, value in subgraphs.items():
127
+ is_subset = False
128
+ matches = set()
129
+ # Compare current list with other lists
130
+ for other_key, other_value in non_partial.items():
131
+ if key != other_key and set(value).issubset(set(other_value)):
132
+ if len(value) < len(other_value):
133
+ is_subset = True
134
+ break
135
+ elif len(value) == len(other_value):
136
+ matches.add(other_key)
137
+ matches.add(key)
138
+ if matches:
139
+ is_subset = key is not min(matches, key=lambda x: (grain_length[x], x))
140
+ if not is_subset:
141
+ pruned_subgraphs[key] = value
142
+ return pruned_subgraphs
143
+
144
+
145
+ def create_select_node(
146
+ ds_name: str,
147
+ subgraph: list[str],
148
+ accept_partial: bool,
149
+ g,
150
+ environment: Environment,
151
+ depth: int,
152
+ ) -> StrategyNode:
153
+ ds_name = ds_name.split("~")[1]
154
+ all_concepts = [
155
+ environment.concepts[extract_address(c)] for c in subgraph if c.startswith("c~")
156
+ ]
157
+
158
+ all_lcl = LooseConceptList(concepts=all_concepts)
159
+ if all([c.derivation == PurposeLineage.CONSTANT for c in all_concepts]):
160
+ logger.info(
161
+ f"{padding(depth)}{LOGGER_PREFIX} All concepts {[x.address for x in all_concepts]} are constants, returning constant node"
162
+ )
163
+ return ConstantNode(
164
+ output_concepts=all_concepts,
165
+ input_concepts=[],
166
+ environment=environment,
167
+ g=g,
168
+ parents=[],
169
+ depth=depth,
170
+ # no partial for constants
171
+ partial_concepts=[],
172
+ force_group=False,
173
+ )
174
+
175
+ datasource = environment.datasources[ds_name]
176
+ target_grain = Grain(components=all_concepts)
177
+ force_group = False
178
+ if not datasource.grain.issubset(target_grain):
179
+ force_group = True
180
+ partial_concepts = [
181
+ c.concept
182
+ for c in datasource.columns
183
+ if not c.is_complete and c.concept in all_lcl
184
+ ]
185
+ partial_lcl = LooseConceptList(concepts=partial_concepts)
186
+ nullable_concepts = [
187
+ c.concept for c in datasource.columns if c.is_nullable and c.concept in all_lcl
188
+ ]
189
+ nullable_lcl = LooseConceptList(concepts=nullable_concepts)
190
+
191
+ bcandidate: StrategyNode = SelectNode(
192
+ input_concepts=[c.concept for c in datasource.columns],
193
+ output_concepts=all_concepts,
194
+ environment=environment,
195
+ g=g,
196
+ parents=[],
197
+ depth=depth,
198
+ partial_concepts=[c for c in all_concepts if c in partial_lcl],
199
+ nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
200
+ accept_partial=accept_partial,
201
+ datasource=datasource,
202
+ grain=Grain(components=all_concepts),
203
+ conditions=datasource.where.conditional if datasource.where else None,
204
+ )
205
+
206
+ # we need to nest the group node one further
207
+ if force_group is True:
208
+ candidate: StrategyNode = GroupNode(
209
+ output_concepts=all_concepts,
210
+ input_concepts=all_concepts,
211
+ environment=environment,
212
+ g=g,
213
+ parents=[bcandidate],
214
+ depth=depth,
215
+ partial_concepts=bcandidate.partial_concepts,
216
+ nullable_concepts=bcandidate.nullable_concepts,
217
+ )
218
+ else:
219
+ candidate = bcandidate
220
+ return candidate
221
+
222
+
223
+ def gen_select_merge_node(
224
+ all_concepts: List[Concept],
225
+ g: nx.DiGraph,
226
+ environment: Environment,
227
+ depth: int,
228
+ accept_partial: bool = False,
229
+ conditions: WhereClause | None = None,
230
+ ) -> Optional[StrategyNode]:
231
+ non_constant = [c for c in all_concepts if c.derivation != PurposeLineage.CONSTANT]
232
+ constants = [c for c in all_concepts if c.derivation == PurposeLineage.CONSTANT]
233
+ if not non_constant and constants:
234
+ return ConstantNode(
235
+ output_concepts=constants,
236
+ input_concepts=[],
237
+ environment=environment,
238
+ g=g,
239
+ parents=[],
240
+ depth=depth,
241
+ partial_concepts=[],
242
+ force_group=False,
243
+ )
244
+ for attempt in [False, True]:
245
+ pruned_concept_graph = create_pruned_concept_graph(g, non_constant, attempt)
246
+ subgraphs = list(nx.connected_components(pruned_concept_graph.to_undirected()))
247
+
248
+ if subgraphs and len(subgraphs) == 1:
249
+ logger.info(
250
+ f"{padding(depth)}{LOGGER_PREFIX} found covering graph w/ partial flag {attempt}"
251
+ )
252
+ break
253
+ if len(subgraphs) > 1:
254
+ # from trilogy.hooks.graph_hook import GraphHook
255
+ # GraphHook().query_graph_built(pruned_concept_graph.to_undirected(), highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
256
+ # raise SyntaxError(f'Too many subgraphs found for {[c.address for c in all_concepts]}: got {subgraphs}')
257
+ logger.info(
258
+ f"{padding(depth)}{LOGGER_PREFIX} Too many subgraphs found for {[c.address for c in non_constant]}: got {subgraphs}'"
259
+ )
260
+ return None
261
+
262
+ sub_nodes = resolve_subgraphs(pruned_concept_graph)
263
+
264
+ logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
265
+ parents = [
266
+ create_select_node(
267
+ k,
268
+ subgraph,
269
+ g=g,
270
+ accept_partial=accept_partial,
271
+ environment=environment,
272
+ depth=depth,
273
+ )
274
+ for k, subgraph in sub_nodes.items()
275
+ ]
276
+ if not parents:
277
+ return None
278
+
279
+ if constants:
280
+ parents.append(
281
+ ConstantNode(
282
+ output_concepts=constants,
283
+ input_concepts=[],
284
+ environment=environment,
285
+ g=g,
286
+ parents=[],
287
+ depth=depth,
288
+ partial_concepts=[],
289
+ force_group=False,
290
+ )
291
+ )
292
+
293
+ if len(parents) == 1:
294
+ return parents[0]
295
+ return MergeNode(
296
+ output_concepts=all_concepts,
297
+ input_concepts=non_constant,
298
+ environment=environment,
299
+ g=g,
300
+ depth=depth,
301
+ parents=parents,
302
+ )