pytrilogy 0.0.2.17__py3-none-any.whl → 0.0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/METADATA +12 -8
- {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/RECORD +40 -39
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/enums.py +1 -0
- trilogy/core/functions.py +11 -0
- trilogy/core/models.py +89 -47
- trilogy/core/optimization.py +15 -9
- trilogy/core/processing/concept_strategies_v3.py +372 -145
- trilogy/core/processing/node_generators/basic_node.py +27 -55
- trilogy/core/processing/node_generators/common.py +6 -7
- trilogy/core/processing/node_generators/filter_node.py +28 -31
- trilogy/core/processing/node_generators/group_node.py +27 -5
- trilogy/core/processing/node_generators/group_to_node.py +3 -1
- trilogy/core/processing/node_generators/multiselect_node.py +3 -0
- trilogy/core/processing/node_generators/node_merge_node.py +14 -10
- trilogy/core/processing/node_generators/rowset_node.py +12 -12
- trilogy/core/processing/node_generators/select_merge_node.py +317 -0
- trilogy/core/processing/node_generators/select_node.py +7 -511
- trilogy/core/processing/node_generators/unnest_node.py +4 -3
- trilogy/core/processing/node_generators/window_node.py +12 -37
- trilogy/core/processing/nodes/__init__.py +0 -2
- trilogy/core/processing/nodes/base_node.py +69 -20
- trilogy/core/processing/nodes/filter_node.py +3 -0
- trilogy/core/processing/nodes/group_node.py +18 -17
- trilogy/core/processing/nodes/merge_node.py +4 -10
- trilogy/core/processing/nodes/select_node_v2.py +28 -14
- trilogy/core/processing/nodes/window_node.py +1 -2
- trilogy/core/processing/utility.py +51 -4
- trilogy/core/query_processor.py +17 -73
- trilogy/dialect/base.py +7 -3
- trilogy/dialect/duckdb.py +4 -1
- trilogy/dialect/sql_server.py +3 -3
- trilogy/hooks/query_debugger.py +5 -3
- trilogy/parsing/parse_engine.py +66 -38
- trilogy/parsing/trilogy.lark +2 -1
- {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.17.dist-info → pytrilogy-0.0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from trilogy.core.models import (
|
|
4
|
+
Concept,
|
|
5
|
+
Environment,
|
|
6
|
+
Grain,
|
|
7
|
+
Datasource,
|
|
8
|
+
WhereClause,
|
|
9
|
+
LooseConceptList,
|
|
10
|
+
)
|
|
11
|
+
from trilogy.core.processing.nodes import (
|
|
12
|
+
MergeNode,
|
|
13
|
+
StrategyNode,
|
|
14
|
+
GroupNode,
|
|
15
|
+
ConstantNode,
|
|
16
|
+
SelectNode,
|
|
17
|
+
)
|
|
18
|
+
import networkx as nx
|
|
19
|
+
from trilogy.core.graph_models import concept_to_node
|
|
20
|
+
from trilogy.constants import logger
|
|
21
|
+
from trilogy.core.processing.utility import padding
|
|
22
|
+
from trilogy.core.enums import PurposeLineage
|
|
23
|
+
|
|
24
|
+
LOGGER_PREFIX = "[GEN_ROOT_MERGE_NODE]"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def extract_address(node: str):
|
|
28
|
+
return node.split("~")[1].split("@")[0]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_graph_partial_nodes(g: nx.DiGraph) -> dict[str, list[str]]:
|
|
32
|
+
datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
|
|
33
|
+
partial: dict[str, list[str]] = {}
|
|
34
|
+
for node in g.nodes:
|
|
35
|
+
if node in datasources:
|
|
36
|
+
partial[node] = [
|
|
37
|
+
concept_to_node(c) for c in datasources[node].partial_concepts
|
|
38
|
+
]
|
|
39
|
+
return partial
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
|
|
43
|
+
datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
|
|
44
|
+
partial: dict[str, int] = {}
|
|
45
|
+
for node in g.nodes:
|
|
46
|
+
if node in datasources:
|
|
47
|
+
partial[node] = len(datasources[node].grain.components)
|
|
48
|
+
return partial
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def create_pruned_concept_graph(
|
|
52
|
+
g: nx.DiGraph, all_concepts: List[Concept], accept_partial: bool = False
|
|
53
|
+
) -> nx.DiGraph:
|
|
54
|
+
orig_g = g
|
|
55
|
+
g = g.copy()
|
|
56
|
+
target_addresses = set([c.address for c in all_concepts])
|
|
57
|
+
concepts: dict[str, Concept] = nx.get_node_attributes(orig_g, "concept")
|
|
58
|
+
datasources: dict[str, Datasource] = nx.get_node_attributes(orig_g, "datasource")
|
|
59
|
+
relevant_concepts_pre = {
|
|
60
|
+
n: x.address
|
|
61
|
+
for n in g.nodes()
|
|
62
|
+
# filter out synonyms
|
|
63
|
+
if (x := concepts.get(n, None)) and x.address in target_addresses
|
|
64
|
+
}
|
|
65
|
+
relevant_concepts: list[str] = list(relevant_concepts_pre.keys())
|
|
66
|
+
relevent_datasets: list[str] = []
|
|
67
|
+
if not accept_partial:
|
|
68
|
+
partial = {}
|
|
69
|
+
for node in g.nodes:
|
|
70
|
+
if node in datasources:
|
|
71
|
+
partial[node] = [
|
|
72
|
+
concept_to_node(c) for c in datasources[node].partial_concepts
|
|
73
|
+
]
|
|
74
|
+
to_remove = []
|
|
75
|
+
for edge in g.edges:
|
|
76
|
+
if (
|
|
77
|
+
edge[0] in datasources
|
|
78
|
+
and (pnodes := partial.get(edge[0], []))
|
|
79
|
+
and edge[1] in pnodes
|
|
80
|
+
):
|
|
81
|
+
to_remove.append(edge)
|
|
82
|
+
if (
|
|
83
|
+
edge[1] in datasources
|
|
84
|
+
and (pnodes := partial.get(edge[1], []))
|
|
85
|
+
and edge[0] in pnodes
|
|
86
|
+
):
|
|
87
|
+
to_remove.append(edge)
|
|
88
|
+
for edge in to_remove:
|
|
89
|
+
g.remove_edge(*edge)
|
|
90
|
+
for n in g.nodes():
|
|
91
|
+
if not n.startswith("ds~"):
|
|
92
|
+
continue
|
|
93
|
+
actual_neighbors = [
|
|
94
|
+
x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
|
|
95
|
+
]
|
|
96
|
+
if actual_neighbors:
|
|
97
|
+
relevent_datasets.append(n)
|
|
98
|
+
|
|
99
|
+
# for injecting extra join concepts that are shared between datasets
|
|
100
|
+
# use the original graph, pre-partial pruning
|
|
101
|
+
for n in orig_g.nodes:
|
|
102
|
+
# readd ignoring grain
|
|
103
|
+
# we want to join inclusive of all concepts
|
|
104
|
+
roots: dict[str, set[str]] = {}
|
|
105
|
+
if n.startswith("c~") and n not in relevant_concepts:
|
|
106
|
+
root = n.split("@")[0]
|
|
107
|
+
neighbors = roots.get(root, set())
|
|
108
|
+
for neighbor in nx.all_neighbors(orig_g, n):
|
|
109
|
+
if neighbor in relevent_datasets:
|
|
110
|
+
neighbors.add(neighbor)
|
|
111
|
+
if len(neighbors) > 1:
|
|
112
|
+
relevant_concepts.append(n)
|
|
113
|
+
roots[root] = set()
|
|
114
|
+
g.remove_nodes_from(
|
|
115
|
+
[
|
|
116
|
+
n
|
|
117
|
+
for n in g.nodes()
|
|
118
|
+
if n not in relevent_datasets and n not in relevant_concepts
|
|
119
|
+
]
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
subgraphs = list(nx.connected_components(g.to_undirected()))
|
|
123
|
+
if not subgraphs:
|
|
124
|
+
return None
|
|
125
|
+
if subgraphs and len(subgraphs) != 1:
|
|
126
|
+
return None
|
|
127
|
+
# add back any relevant edges that might have been partially filtered
|
|
128
|
+
relevant = set(relevant_concepts + relevent_datasets)
|
|
129
|
+
for edge in orig_g.edges():
|
|
130
|
+
if edge[0] in relevant and edge[1] in relevant:
|
|
131
|
+
g.add_edge(edge[0], edge[1])
|
|
132
|
+
|
|
133
|
+
return g
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def resolve_subgraphs(g: nx.DiGraph) -> dict[str, list[str]]:
|
|
137
|
+
datasources = [n for n in g.nodes if n.startswith("ds~")]
|
|
138
|
+
subgraphs = {ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources}
|
|
139
|
+
partial_map = get_graph_partial_nodes(g)
|
|
140
|
+
grain_length = get_graph_grain_length(g)
|
|
141
|
+
non_partial = {
|
|
142
|
+
ds: [c for c in subgraphs[ds] if c not in partial_map[ds]] for ds in datasources
|
|
143
|
+
}
|
|
144
|
+
pruned_subgraphs = {}
|
|
145
|
+
for key, value in subgraphs.items():
|
|
146
|
+
is_subset = False
|
|
147
|
+
matches = set()
|
|
148
|
+
# Compare current list with other lists
|
|
149
|
+
for other_key, other_value in non_partial.items():
|
|
150
|
+
if key != other_key and set(value).issubset(set(other_value)):
|
|
151
|
+
if len(value) < len(other_value):
|
|
152
|
+
is_subset = True
|
|
153
|
+
break
|
|
154
|
+
elif len(value) == len(other_value):
|
|
155
|
+
matches.add(other_key)
|
|
156
|
+
matches.add(key)
|
|
157
|
+
if matches:
|
|
158
|
+
is_subset = key is not min(matches, key=lambda x: (grain_length[x], x))
|
|
159
|
+
if not is_subset:
|
|
160
|
+
pruned_subgraphs[key] = value
|
|
161
|
+
return pruned_subgraphs
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def create_select_node(
|
|
165
|
+
ds_name: str,
|
|
166
|
+
subgraph: list[str],
|
|
167
|
+
accept_partial: bool,
|
|
168
|
+
g,
|
|
169
|
+
environment: Environment,
|
|
170
|
+
depth: int,
|
|
171
|
+
) -> StrategyNode:
|
|
172
|
+
ds_name = ds_name.split("~")[1]
|
|
173
|
+
all_concepts = [
|
|
174
|
+
environment.concepts[extract_address(c)] for c in subgraph if c.startswith("c~")
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
all_lcl = LooseConceptList(concepts=all_concepts)
|
|
178
|
+
if all([c.derivation == PurposeLineage.CONSTANT for c in all_concepts]):
|
|
179
|
+
logger.info(
|
|
180
|
+
f"{padding(depth)}{LOGGER_PREFIX} All concepts {[x.address for x in all_concepts]} are constants, returning constant node"
|
|
181
|
+
)
|
|
182
|
+
return ConstantNode(
|
|
183
|
+
output_concepts=all_concepts,
|
|
184
|
+
input_concepts=[],
|
|
185
|
+
environment=environment,
|
|
186
|
+
g=g,
|
|
187
|
+
parents=[],
|
|
188
|
+
depth=depth,
|
|
189
|
+
# no partial for constants
|
|
190
|
+
partial_concepts=[],
|
|
191
|
+
force_group=False,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
datasource = environment.datasources[ds_name]
|
|
195
|
+
target_grain = Grain(components=all_concepts)
|
|
196
|
+
force_group = False
|
|
197
|
+
if not datasource.grain.issubset(target_grain):
|
|
198
|
+
force_group = True
|
|
199
|
+
partial_concepts = [
|
|
200
|
+
c.concept
|
|
201
|
+
for c in datasource.columns
|
|
202
|
+
if not c.is_complete and c.concept in all_lcl
|
|
203
|
+
]
|
|
204
|
+
partial_lcl = LooseConceptList(concepts=partial_concepts)
|
|
205
|
+
nullable_concepts = [
|
|
206
|
+
c.concept for c in datasource.columns if c.is_nullable and c.concept in all_lcl
|
|
207
|
+
]
|
|
208
|
+
nullable_lcl = LooseConceptList(concepts=nullable_concepts)
|
|
209
|
+
|
|
210
|
+
bcandidate: StrategyNode = SelectNode(
|
|
211
|
+
input_concepts=[c.concept for c in datasource.columns],
|
|
212
|
+
output_concepts=all_concepts,
|
|
213
|
+
environment=environment,
|
|
214
|
+
g=g,
|
|
215
|
+
parents=[],
|
|
216
|
+
depth=depth,
|
|
217
|
+
partial_concepts=[c for c in all_concepts if c in partial_lcl],
|
|
218
|
+
nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
|
|
219
|
+
accept_partial=accept_partial,
|
|
220
|
+
datasource=datasource,
|
|
221
|
+
grain=Grain(components=all_concepts),
|
|
222
|
+
conditions=datasource.where.conditional if datasource.where else None,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# we need to nest the group node one further
|
|
226
|
+
if force_group is True:
|
|
227
|
+
candidate: StrategyNode = GroupNode(
|
|
228
|
+
output_concepts=all_concepts,
|
|
229
|
+
input_concepts=all_concepts,
|
|
230
|
+
environment=environment,
|
|
231
|
+
g=g,
|
|
232
|
+
parents=[bcandidate],
|
|
233
|
+
depth=depth,
|
|
234
|
+
partial_concepts=bcandidate.partial_concepts,
|
|
235
|
+
nullable_concepts=bcandidate.nullable_concepts,
|
|
236
|
+
)
|
|
237
|
+
else:
|
|
238
|
+
candidate = bcandidate
|
|
239
|
+
return candidate
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def gen_select_merge_node(
|
|
243
|
+
all_concepts: List[Concept],
|
|
244
|
+
g: nx.DiGraph,
|
|
245
|
+
environment: Environment,
|
|
246
|
+
depth: int,
|
|
247
|
+
accept_partial: bool = False,
|
|
248
|
+
conditions: WhereClause | None = None,
|
|
249
|
+
) -> Optional[StrategyNode]:
|
|
250
|
+
non_constant = [c for c in all_concepts if c.derivation != PurposeLineage.CONSTANT]
|
|
251
|
+
constants = [c for c in all_concepts if c.derivation == PurposeLineage.CONSTANT]
|
|
252
|
+
if not non_constant and constants:
|
|
253
|
+
return ConstantNode(
|
|
254
|
+
output_concepts=constants,
|
|
255
|
+
input_concepts=[],
|
|
256
|
+
environment=environment,
|
|
257
|
+
g=g,
|
|
258
|
+
parents=[],
|
|
259
|
+
depth=depth,
|
|
260
|
+
partial_concepts=[],
|
|
261
|
+
force_group=False,
|
|
262
|
+
)
|
|
263
|
+
for attempt in [False, True]:
|
|
264
|
+
pruned_concept_graph = create_pruned_concept_graph(g, non_constant, attempt)
|
|
265
|
+
if pruned_concept_graph:
|
|
266
|
+
logger.info(
|
|
267
|
+
f"{padding(depth)}{LOGGER_PREFIX} found covering graph w/ partial flag {attempt}"
|
|
268
|
+
)
|
|
269
|
+
break
|
|
270
|
+
|
|
271
|
+
if not pruned_concept_graph:
|
|
272
|
+
logger.info(
|
|
273
|
+
f"{padding(depth)}{LOGGER_PREFIX} no covering graph found {attempt}"
|
|
274
|
+
)
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
sub_nodes = resolve_subgraphs(pruned_concept_graph)
|
|
278
|
+
|
|
279
|
+
logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
|
|
280
|
+
parents = [
|
|
281
|
+
create_select_node(
|
|
282
|
+
k,
|
|
283
|
+
subgraph,
|
|
284
|
+
g=g,
|
|
285
|
+
accept_partial=accept_partial,
|
|
286
|
+
environment=environment,
|
|
287
|
+
depth=depth,
|
|
288
|
+
)
|
|
289
|
+
for k, subgraph in sub_nodes.items()
|
|
290
|
+
]
|
|
291
|
+
if not parents:
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
if constants:
|
|
295
|
+
parents.append(
|
|
296
|
+
ConstantNode(
|
|
297
|
+
output_concepts=constants,
|
|
298
|
+
input_concepts=[],
|
|
299
|
+
environment=environment,
|
|
300
|
+
g=g,
|
|
301
|
+
parents=[],
|
|
302
|
+
depth=depth,
|
|
303
|
+
partial_concepts=[],
|
|
304
|
+
force_group=False,
|
|
305
|
+
)
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
if len(parents) == 1:
|
|
309
|
+
return parents[0]
|
|
310
|
+
return MergeNode(
|
|
311
|
+
output_concepts=all_concepts,
|
|
312
|
+
input_concepts=non_constant,
|
|
313
|
+
environment=environment,
|
|
314
|
+
g=g,
|
|
315
|
+
depth=depth,
|
|
316
|
+
parents=parents,
|
|
317
|
+
)
|