pytrilogy 0.0.2.25__py3-none-any.whl → 0.0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/RECORD +26 -26
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/graph_models.py +2 -2
- trilogy/core/models.py +205 -140
- trilogy/core/optimizations/inline_datasource.py +4 -4
- trilogy/core/processing/node_generators/common.py +0 -1
- trilogy/core/processing/node_generators/select_merge_node.py +56 -23
- trilogy/core/processing/nodes/base_node.py +3 -0
- trilogy/core/processing/nodes/merge_node.py +12 -12
- trilogy/core/processing/nodes/select_node_v2.py +6 -2
- trilogy/core/processing/utility.py +237 -258
- trilogy/core/query_processor.py +65 -53
- trilogy/dialect/base.py +1 -0
- trilogy/dialect/common.py +4 -25
- trilogy/executor.py +12 -3
- trilogy/hooks/query_debugger.py +5 -1
- trilogy/parsing/common.py +4 -6
- trilogy/parsing/parse_engine.py +20 -16
- trilogy/parsing/render.py +63 -21
- trilogy/parsing/trilogy.lark +6 -4
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.27.dist-info}/top_level.txt +0 -0
|
@@ -63,14 +63,14 @@ class InlineDatasource(OptimizationRule):
|
|
|
63
63
|
for replaceable in to_inline:
|
|
64
64
|
if replaceable.name not in self.candidates[cte.name]:
|
|
65
65
|
self.candidates[cte.name].add(replaceable.name)
|
|
66
|
-
self.count[replaceable.source.
|
|
66
|
+
self.count[replaceable.source.identifier] += 1
|
|
67
67
|
return True
|
|
68
68
|
if (
|
|
69
|
-
self.count[replaceable.source.
|
|
69
|
+
self.count[replaceable.source.identifier]
|
|
70
70
|
> CONFIG.optimizations.constant_inline_cutoff
|
|
71
71
|
):
|
|
72
72
|
self.log(
|
|
73
|
-
f"Skipping inlining raw datasource {replaceable.source.
|
|
73
|
+
f"Skipping inlining raw datasource {replaceable.source.identifier} ({replaceable.name}) due to multiple references"
|
|
74
74
|
)
|
|
75
75
|
continue
|
|
76
76
|
if not replaceable.source.datasources[0].grain.issubset(replaceable.grain):
|
|
@@ -81,7 +81,7 @@ class InlineDatasource(OptimizationRule):
|
|
|
81
81
|
result = cte.inline_parent_datasource(replaceable, force_group=force_group)
|
|
82
82
|
if result:
|
|
83
83
|
self.log(
|
|
84
|
-
f"Inlined parent {replaceable.name} with {replaceable.source.
|
|
84
|
+
f"Inlined parent {replaceable.name} with {replaceable.source.identifier}"
|
|
85
85
|
)
|
|
86
86
|
optimized = True
|
|
87
87
|
else:
|
|
@@ -28,14 +28,18 @@ def extract_address(node: str):
|
|
|
28
28
|
return node.split("~")[1].split("@")[0]
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def get_graph_partial_nodes(
|
|
31
|
+
def get_graph_partial_nodes(
|
|
32
|
+
g: nx.DiGraph, conditions: WhereClause | None
|
|
33
|
+
) -> dict[str, list[str]]:
|
|
32
34
|
datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
|
|
33
35
|
partial: dict[str, list[str]] = {}
|
|
34
36
|
for node in g.nodes:
|
|
35
37
|
if node in datasources:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
ds = datasources[node]
|
|
39
|
+
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
40
|
+
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
41
|
+
partial[node] = []
|
|
42
|
+
|
|
39
43
|
return partial
|
|
40
44
|
|
|
41
45
|
|
|
@@ -49,7 +53,10 @@ def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
|
|
|
49
53
|
|
|
50
54
|
|
|
51
55
|
def create_pruned_concept_graph(
|
|
52
|
-
g: nx.DiGraph,
|
|
56
|
+
g: nx.DiGraph,
|
|
57
|
+
all_concepts: List[Concept],
|
|
58
|
+
accept_partial: bool = False,
|
|
59
|
+
conditions: WhereClause | None = None,
|
|
53
60
|
) -> nx.DiGraph:
|
|
54
61
|
orig_g = g
|
|
55
62
|
g = g.copy()
|
|
@@ -66,11 +73,7 @@ def create_pruned_concept_graph(
|
|
|
66
73
|
relevent_datasets: list[str] = []
|
|
67
74
|
if not accept_partial:
|
|
68
75
|
partial = {}
|
|
69
|
-
|
|
70
|
-
if node in datasources:
|
|
71
|
-
partial[node] = [
|
|
72
|
-
concept_to_node(c) for c in datasources[node].partial_concepts
|
|
73
|
-
]
|
|
76
|
+
partial = get_graph_partial_nodes(g, conditions)
|
|
74
77
|
to_remove = []
|
|
75
78
|
for edge in g.edges:
|
|
76
79
|
if (
|
|
@@ -133,31 +136,53 @@ def create_pruned_concept_graph(
|
|
|
133
136
|
return g
|
|
134
137
|
|
|
135
138
|
|
|
136
|
-
def resolve_subgraphs(
|
|
139
|
+
def resolve_subgraphs(
|
|
140
|
+
g: nx.DiGraph, conditions: WhereClause | None
|
|
141
|
+
) -> dict[str, list[str]]:
|
|
137
142
|
datasources = [n for n in g.nodes if n.startswith("ds~")]
|
|
138
|
-
subgraphs
|
|
139
|
-
|
|
143
|
+
subgraphs: dict[str, list[str]] = {
|
|
144
|
+
ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources
|
|
145
|
+
}
|
|
146
|
+
partial_map = get_graph_partial_nodes(g, conditions)
|
|
140
147
|
grain_length = get_graph_grain_length(g)
|
|
141
|
-
|
|
142
|
-
|
|
148
|
+
concepts: dict[str, Concept] = nx.get_node_attributes(g, "concept")
|
|
149
|
+
non_partial_map = {
|
|
150
|
+
ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
|
|
151
|
+
for ds in datasources
|
|
152
|
+
}
|
|
153
|
+
concept_map = {
|
|
154
|
+
ds: [concepts[c].address for c in subgraphs[ds]] for ds in datasources
|
|
143
155
|
}
|
|
144
156
|
pruned_subgraphs = {}
|
|
145
|
-
for key,
|
|
157
|
+
for key, nodes in subgraphs.items():
|
|
158
|
+
value = non_partial_map[key]
|
|
159
|
+
all_concepts = concept_map[key]
|
|
146
160
|
is_subset = False
|
|
147
161
|
matches = set()
|
|
148
162
|
# Compare current list with other lists
|
|
149
|
-
for other_key,
|
|
150
|
-
|
|
163
|
+
for other_key, other_all_concepts in concept_map.items():
|
|
164
|
+
other_value = non_partial_map[other_key]
|
|
165
|
+
# needs to be a subset of non partial and a subset of all
|
|
166
|
+
if (
|
|
167
|
+
key != other_key
|
|
168
|
+
and set(value).issubset(set(other_value))
|
|
169
|
+
and set(all_concepts).issubset(set(other_all_concepts))
|
|
170
|
+
):
|
|
151
171
|
if len(value) < len(other_value):
|
|
152
172
|
is_subset = True
|
|
173
|
+
logger.debug(
|
|
174
|
+
f"Dropping subgraph {key} with {value} as it is a subset of {other_key} with {other_value}"
|
|
175
|
+
)
|
|
153
176
|
break
|
|
154
|
-
elif len(value) == len(other_value)
|
|
177
|
+
elif len(value) == len(other_value) and len(all_concepts) == len(
|
|
178
|
+
other_all_concepts
|
|
179
|
+
):
|
|
155
180
|
matches.add(other_key)
|
|
156
181
|
matches.add(key)
|
|
157
182
|
if matches:
|
|
158
183
|
is_subset = key is not min(matches, key=lambda x: (grain_length[x], x))
|
|
159
184
|
if not is_subset:
|
|
160
|
-
pruned_subgraphs[key] =
|
|
185
|
+
pruned_subgraphs[key] = nodes
|
|
161
186
|
return pruned_subgraphs
|
|
162
187
|
|
|
163
188
|
|
|
@@ -168,6 +193,7 @@ def create_select_node(
|
|
|
168
193
|
g,
|
|
169
194
|
environment: Environment,
|
|
170
195
|
depth: int,
|
|
196
|
+
conditions: WhereClause | None = None,
|
|
171
197
|
) -> StrategyNode:
|
|
172
198
|
ds_name = ds_name.split("~")[1]
|
|
173
199
|
all_concepts = [
|
|
@@ -206,6 +232,7 @@ def create_select_node(
|
|
|
206
232
|
c.concept for c in datasource.columns if c.is_nullable and c.concept in all_lcl
|
|
207
233
|
]
|
|
208
234
|
nullable_lcl = LooseConceptList(concepts=nullable_concepts)
|
|
235
|
+
partial_is_full = conditions and (conditions == datasource.non_partial_for)
|
|
209
236
|
|
|
210
237
|
bcandidate: StrategyNode = SelectNode(
|
|
211
238
|
input_concepts=[c.concept for c in datasource.columns],
|
|
@@ -214,12 +241,15 @@ def create_select_node(
|
|
|
214
241
|
g=g,
|
|
215
242
|
parents=[],
|
|
216
243
|
depth=depth,
|
|
217
|
-
partial_concepts=
|
|
244
|
+
partial_concepts=(
|
|
245
|
+
[] if partial_is_full else [c for c in all_concepts if c in partial_lcl]
|
|
246
|
+
),
|
|
218
247
|
nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
|
|
219
248
|
accept_partial=accept_partial,
|
|
220
249
|
datasource=datasource,
|
|
221
250
|
grain=Grain(components=all_concepts),
|
|
222
251
|
conditions=datasource.where.conditional if datasource.where else None,
|
|
252
|
+
render_condition=not partial_is_full,
|
|
223
253
|
)
|
|
224
254
|
|
|
225
255
|
# we need to nest the group node one further
|
|
@@ -261,7 +291,9 @@ def gen_select_merge_node(
|
|
|
261
291
|
force_group=False,
|
|
262
292
|
)
|
|
263
293
|
for attempt in [False, True]:
|
|
264
|
-
pruned_concept_graph = create_pruned_concept_graph(
|
|
294
|
+
pruned_concept_graph = create_pruned_concept_graph(
|
|
295
|
+
g, non_constant, attempt, conditions
|
|
296
|
+
)
|
|
265
297
|
if pruned_concept_graph:
|
|
266
298
|
logger.info(
|
|
267
299
|
f"{padding(depth)}{LOGGER_PREFIX} found covering graph w/ partial flag {attempt}"
|
|
@@ -274,7 +306,7 @@ def gen_select_merge_node(
|
|
|
274
306
|
)
|
|
275
307
|
return None
|
|
276
308
|
|
|
277
|
-
sub_nodes = resolve_subgraphs(pruned_concept_graph)
|
|
309
|
+
sub_nodes = resolve_subgraphs(pruned_concept_graph, conditions)
|
|
278
310
|
|
|
279
311
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
|
|
280
312
|
parents = [
|
|
@@ -285,6 +317,7 @@ def gen_select_merge_node(
|
|
|
285
317
|
accept_partial=accept_partial,
|
|
286
318
|
environment=environment,
|
|
287
319
|
depth=depth,
|
|
320
|
+
conditions=conditions,
|
|
288
321
|
)
|
|
289
322
|
for k, subgraph in sub_nodes.items()
|
|
290
323
|
]
|
|
@@ -165,6 +165,7 @@ class StrategyNode:
|
|
|
165
165
|
hidden_concepts: List[Concept] | None = None,
|
|
166
166
|
existence_concepts: List[Concept] | None = None,
|
|
167
167
|
virtual_output_concepts: List[Concept] | None = None,
|
|
168
|
+
render_condition: bool = True,
|
|
168
169
|
):
|
|
169
170
|
self.input_concepts: List[Concept] = (
|
|
170
171
|
unique(input_concepts, "address") if input_concepts else []
|
|
@@ -208,6 +209,7 @@ class StrategyNode:
|
|
|
208
209
|
)
|
|
209
210
|
self.validate_parents()
|
|
210
211
|
self.log = True
|
|
212
|
+
self.render_condition = render_condition
|
|
211
213
|
|
|
212
214
|
def add_parents(self, parents: list["StrategyNode"]):
|
|
213
215
|
self.parents += parents
|
|
@@ -380,6 +382,7 @@ class StrategyNode:
|
|
|
380
382
|
hidden_concepts=list(self.hidden_concepts),
|
|
381
383
|
existence_concepts=list(self.existence_concepts),
|
|
382
384
|
virtual_output_concepts=list(self.virtual_output_concepts),
|
|
385
|
+
render_condition=self.render_condition,
|
|
383
386
|
)
|
|
384
387
|
|
|
385
388
|
|
|
@@ -89,8 +89,8 @@ def deduplicate_nodes_and_joins(
|
|
|
89
89
|
joins = [
|
|
90
90
|
j
|
|
91
91
|
for j in joins
|
|
92
|
-
if j.left_node.resolve().
|
|
93
|
-
and j.right_node.resolve().
|
|
92
|
+
if j.left_node.resolve().identifier not in removed
|
|
93
|
+
and j.right_node.resolve().identifier not in removed
|
|
94
94
|
]
|
|
95
95
|
return joins, merged
|
|
96
96
|
|
|
@@ -155,8 +155,8 @@ class MergeNode(StrategyNode):
|
|
|
155
155
|
for join in node_joins:
|
|
156
156
|
left = join.left_node.resolve()
|
|
157
157
|
right = join.right_node.resolve()
|
|
158
|
-
if left.
|
|
159
|
-
raise SyntaxError(f"Cannot join node {left.
|
|
158
|
+
if left.identifier == right.identifier:
|
|
159
|
+
raise SyntaxError(f"Cannot join node {left.identifier} to itself")
|
|
160
160
|
joins.append(
|
|
161
161
|
BaseJoin(
|
|
162
162
|
left_datasource=left,
|
|
@@ -168,7 +168,7 @@ class MergeNode(StrategyNode):
|
|
|
168
168
|
)
|
|
169
169
|
return joins
|
|
170
170
|
|
|
171
|
-
def create_full_joins(self, dataset_list: List[QueryDatasource]):
|
|
171
|
+
def create_full_joins(self, dataset_list: List[QueryDatasource | Datasource]):
|
|
172
172
|
joins = []
|
|
173
173
|
seen = set()
|
|
174
174
|
for left_value in dataset_list:
|
|
@@ -198,7 +198,7 @@ class MergeNode(StrategyNode):
|
|
|
198
198
|
environment: Environment,
|
|
199
199
|
) -> List[BaseJoin | UnnestJoin]:
|
|
200
200
|
# only finally, join between them for unique values
|
|
201
|
-
dataset_list: List[QueryDatasource] = sorted(
|
|
201
|
+
dataset_list: List[QueryDatasource | Datasource] = sorted(
|
|
202
202
|
final_datasets, key=lambda x: -len(x.grain.components_copy)
|
|
203
203
|
)
|
|
204
204
|
|
|
@@ -215,7 +215,7 @@ class MergeNode(StrategyNode):
|
|
|
215
215
|
logger.info(
|
|
216
216
|
f"{self.logging_prefix}{LOGGER_PREFIX} inferring node joins to target grain {str(grain)}"
|
|
217
217
|
)
|
|
218
|
-
joins = get_node_joins(dataset_list,
|
|
218
|
+
joins = get_node_joins(dataset_list, environment=environment)
|
|
219
219
|
elif final_joins:
|
|
220
220
|
logger.info(
|
|
221
221
|
f"{self.logging_prefix}{LOGGER_PREFIX} translating provided node joins {len(final_joins)}"
|
|
@@ -238,13 +238,13 @@ class MergeNode(StrategyNode):
|
|
|
238
238
|
merged: dict[str, QueryDatasource | Datasource] = {}
|
|
239
239
|
final_joins: List[NodeJoin] | None = self.node_joins
|
|
240
240
|
for source in parent_sources:
|
|
241
|
-
if source.
|
|
241
|
+
if source.identifier in merged:
|
|
242
242
|
logger.info(
|
|
243
|
-
f"{self.logging_prefix}{LOGGER_PREFIX} merging parent node with {source.
|
|
243
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} merging parent node with {source.identifier} into existing"
|
|
244
244
|
)
|
|
245
|
-
merged[source.
|
|
245
|
+
merged[source.identifier] = merged[source.identifier] + source
|
|
246
246
|
else:
|
|
247
|
-
merged[source.
|
|
247
|
+
merged[source.identifier] = source
|
|
248
248
|
|
|
249
249
|
# it's possible that we have more sources than we need
|
|
250
250
|
final_joins, merged = deduplicate_nodes_and_joins(
|
|
@@ -314,7 +314,7 @@ class MergeNode(StrategyNode):
|
|
|
314
314
|
full_join_concepts = []
|
|
315
315
|
for join in joins:
|
|
316
316
|
if isinstance(join, BaseJoin) and join.join_type == JoinType.FULL:
|
|
317
|
-
full_join_concepts += join.
|
|
317
|
+
full_join_concepts += join.input_concepts
|
|
318
318
|
if self.whole_grain:
|
|
319
319
|
force_group = False
|
|
320
320
|
elif self.force_group is False:
|
|
@@ -49,6 +49,7 @@ class SelectNode(StrategyNode):
|
|
|
49
49
|
conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
50
50
|
preexisting_conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
51
51
|
hidden_concepts: List[Concept] | None = None,
|
|
52
|
+
render_condition: bool = True,
|
|
52
53
|
):
|
|
53
54
|
super().__init__(
|
|
54
55
|
input_concepts=input_concepts,
|
|
@@ -65,6 +66,7 @@ class SelectNode(StrategyNode):
|
|
|
65
66
|
conditions=conditions,
|
|
66
67
|
preexisting_conditions=preexisting_conditions,
|
|
67
68
|
hidden_concepts=hidden_concepts,
|
|
69
|
+
render_condition=render_condition,
|
|
68
70
|
)
|
|
69
71
|
self.accept_partial = accept_partial
|
|
70
72
|
self.datasource = datasource
|
|
@@ -120,7 +122,8 @@ class SelectNode(StrategyNode):
|
|
|
120
122
|
],
|
|
121
123
|
nullable_concepts=[c.concept for c in datasource.columns if c.is_nullable],
|
|
122
124
|
source_type=SourceType.DIRECT_SELECT,
|
|
123
|
-
|
|
125
|
+
# we can skip rendering conditions
|
|
126
|
+
condition=self.conditions if self.render_condition else None,
|
|
124
127
|
# select nodes should never group
|
|
125
128
|
force_group=self.force_group,
|
|
126
129
|
hidden_concepts=self.hidden_concepts,
|
|
@@ -128,7 +131,7 @@ class SelectNode(StrategyNode):
|
|
|
128
131
|
|
|
129
132
|
def resolve_from_constant_datasources(self) -> QueryDatasource:
|
|
130
133
|
datasource = Datasource(
|
|
131
|
-
|
|
134
|
+
name=CONSTANT_DATASET, address=CONSTANT_DATASET, columns=[]
|
|
132
135
|
)
|
|
133
136
|
return QueryDatasource(
|
|
134
137
|
input_concepts=[],
|
|
@@ -205,6 +208,7 @@ class SelectNode(StrategyNode):
|
|
|
205
208
|
conditions=self.conditions,
|
|
206
209
|
preexisting_conditions=self.preexisting_conditions,
|
|
207
210
|
hidden_concepts=self.hidden_concepts,
|
|
211
|
+
render_condition=self.render_condition,
|
|
208
212
|
)
|
|
209
213
|
|
|
210
214
|
|