pytrilogy 0.0.1.110__py3-none-any.whl → 0.0.1.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +33 -33
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/models.py +85 -67
- trilogy/core/optimization.py +23 -8
- trilogy/core/processing/concept_strategies_v3.py +44 -19
- trilogy/core/processing/node_generators/basic_node.py +2 -0
- trilogy/core/processing/node_generators/common.py +3 -1
- trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
- trilogy/core/processing/node_generators/filter_node.py +36 -6
- trilogy/core/processing/node_generators/node_merge_node.py +34 -23
- trilogy/core/processing/node_generators/rowset_node.py +30 -6
- trilogy/core/processing/node_generators/select_node.py +23 -9
- trilogy/core/processing/node_generators/unnest_node.py +24 -3
- trilogy/core/processing/node_generators/window_node.py +4 -2
- trilogy/core/processing/nodes/__init__.py +7 -6
- trilogy/core/processing/nodes/base_node.py +40 -6
- trilogy/core/processing/nodes/filter_node.py +15 -1
- trilogy/core/processing/nodes/group_node.py +20 -1
- trilogy/core/processing/nodes/merge_node.py +36 -7
- trilogy/core/processing/nodes/select_node_v2.py +34 -39
- trilogy/core/processing/nodes/unnest_node.py +12 -0
- trilogy/core/processing/nodes/window_node.py +11 -0
- trilogy/core/processing/utility.py +0 -14
- trilogy/core/query_processor.py +125 -29
- trilogy/dialect/base.py +45 -40
- trilogy/executor.py +31 -3
- trilogy/parsing/parse_engine.py +49 -17
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
trilogy/core/query_processor.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional, Set, Union, Dict
|
|
1
|
+
from typing import List, Optional, Set, Union, Dict, Tuple
|
|
2
2
|
|
|
3
3
|
from trilogy.core.env_processor import generate_graph
|
|
4
4
|
from trilogy.core.graph_models import ReferenceGraph
|
|
@@ -84,19 +84,31 @@ def base_join_to_join(
|
|
|
84
84
|
|
|
85
85
|
def generate_source_map(
|
|
86
86
|
query_datasource: QueryDatasource, all_new_ctes: List[CTE]
|
|
87
|
-
) -> Dict[str, str
|
|
87
|
+
) -> Tuple[Dict[str, list[str]], Dict[str, list[str]]]:
|
|
88
88
|
source_map: Dict[str, list[str]] = defaultdict(list)
|
|
89
89
|
# now populate anything derived in this level
|
|
90
90
|
for qdk, qdv in query_datasource.source_map.items():
|
|
91
|
+
unnest = [x for x in qdv if isinstance(x, UnnestJoin)]
|
|
92
|
+
for x in unnest:
|
|
93
|
+
source_map[qdk] = []
|
|
91
94
|
if (
|
|
92
95
|
qdk not in source_map
|
|
93
96
|
and len(qdv) == 1
|
|
94
97
|
and isinstance(list(qdv)[0], UnnestJoin)
|
|
95
98
|
):
|
|
96
99
|
source_map[qdk] = []
|
|
100
|
+
basic = [x for x in qdv if isinstance(x, Datasource)]
|
|
101
|
+
for base in basic:
|
|
102
|
+
source_map[qdk].append(base.name)
|
|
97
103
|
|
|
98
|
-
|
|
99
|
-
|
|
104
|
+
ctes = [x for x in qdv if isinstance(x, QueryDatasource)]
|
|
105
|
+
if ctes:
|
|
106
|
+
names = set([x.name for x in ctes])
|
|
107
|
+
matches = [cte for cte in all_new_ctes if cte.source.name in names]
|
|
108
|
+
|
|
109
|
+
if not matches and names:
|
|
110
|
+
raise SyntaxError(query_datasource.source_map)
|
|
111
|
+
for cte in matches:
|
|
100
112
|
output_address = [
|
|
101
113
|
x.address
|
|
102
114
|
for x in cte.output_columns
|
|
@@ -105,21 +117,27 @@ def generate_source_map(
|
|
|
105
117
|
if qdk in output_address:
|
|
106
118
|
source_map[qdk].append(cte.name)
|
|
107
119
|
# now do a pass that accepts partials
|
|
108
|
-
# TODO: move this into a second loop by first creationg all sub
|
|
120
|
+
# TODO: move this into a second loop by first creationg all sub sources
|
|
109
121
|
# then loop through this
|
|
110
|
-
for cte in
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if qdk not in source_map:
|
|
114
|
-
source_map[qdk] = [cte.name]
|
|
115
|
-
if qdk not in source_map and not qdv:
|
|
116
|
-
# set source to empty, as it must be derived in this element
|
|
117
|
-
source_map[qdk] = []
|
|
122
|
+
for cte in matches:
|
|
123
|
+
if qdk not in source_map:
|
|
124
|
+
source_map[qdk] = [cte.name]
|
|
118
125
|
if qdk not in source_map:
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
126
|
+
if not qdv:
|
|
127
|
+
source_map[qdk] = []
|
|
128
|
+
elif CONFIG.validate_missing:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
f"Missing {qdk} in {source_map}, source map {query_datasource.source_map} "
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# existence lookups use a separate map
|
|
134
|
+
# as they cannot be referenced in row resolution
|
|
135
|
+
existence_source_map: Dict[str, list[str]] = defaultdict(list)
|
|
136
|
+
for ek, ev in query_datasource.existence_source_map.items():
|
|
137
|
+
names = set([x.name for x in ev])
|
|
138
|
+
ematches = [cte.name for cte in all_new_ctes if cte.source.name in names]
|
|
139
|
+
existence_source_map[ek] = ematches
|
|
140
|
+
return {k: [] if not v else v for k, v in source_map.items()}, existence_source_map
|
|
123
141
|
|
|
124
142
|
|
|
125
143
|
def datasource_to_query_datasource(datasource: Datasource) -> QueryDatasource:
|
|
@@ -158,6 +176,52 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
|
|
|
158
176
|
return full_name.replace("<", "").replace(">", "").replace(",", "_")
|
|
159
177
|
|
|
160
178
|
|
|
179
|
+
def resolve_cte_base_name_and_alias(
|
|
180
|
+
name: str,
|
|
181
|
+
source: QueryDatasource,
|
|
182
|
+
parents: List[CTE],
|
|
183
|
+
joins: List[Join | InstantiatedUnnestJoin],
|
|
184
|
+
) -> Tuple[str | None, str | None]:
|
|
185
|
+
|
|
186
|
+
valid_joins: List[Join] = [join for join in joins if isinstance(join, Join)]
|
|
187
|
+
relevant_parent_sources = set()
|
|
188
|
+
for k, v in source.source_map.items():
|
|
189
|
+
if v:
|
|
190
|
+
relevant_parent_sources.update(v)
|
|
191
|
+
eligible = [x for x in source.datasources if x in relevant_parent_sources]
|
|
192
|
+
if (
|
|
193
|
+
len(eligible) == 1
|
|
194
|
+
and isinstance(eligible[0], Datasource)
|
|
195
|
+
and not eligible[0].name == CONSTANT_DATASET
|
|
196
|
+
):
|
|
197
|
+
ds = eligible[0]
|
|
198
|
+
return ds.safe_location, ds.identifier
|
|
199
|
+
|
|
200
|
+
# if we have multiple joined CTEs, pick the base
|
|
201
|
+
# as the root
|
|
202
|
+
elif len(eligible) == 1 and len(parents) == 1:
|
|
203
|
+
return parents[0].name, parents[0].name
|
|
204
|
+
elif valid_joins and len(valid_joins) > 0:
|
|
205
|
+
candidates = [x.left_cte.name for x in valid_joins]
|
|
206
|
+
disallowed = [x.right_cte.name for x in valid_joins]
|
|
207
|
+
try:
|
|
208
|
+
cte = [y for y in candidates if y not in disallowed][0]
|
|
209
|
+
return cte, cte
|
|
210
|
+
except IndexError:
|
|
211
|
+
raise SyntaxError(
|
|
212
|
+
f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in parents]}"
|
|
213
|
+
)
|
|
214
|
+
elif eligible:
|
|
215
|
+
matched = [x for x in parents if x.source.name == eligible[0].name]
|
|
216
|
+
if matched:
|
|
217
|
+
return matched[0].name, matched[0].name
|
|
218
|
+
|
|
219
|
+
logger.info(
|
|
220
|
+
f"Could not determine CTE base name for {name} with relevant sources {relevant_parent_sources}"
|
|
221
|
+
)
|
|
222
|
+
return None, None
|
|
223
|
+
|
|
224
|
+
|
|
161
225
|
def datasource_to_ctes(
|
|
162
226
|
query_datasource: QueryDatasource, name_map: dict[str, str]
|
|
163
227
|
) -> List[CTE]:
|
|
@@ -176,7 +240,8 @@ def datasource_to_ctes(
|
|
|
176
240
|
sub_cte = datasource_to_ctes(sub_datasource, name_map)
|
|
177
241
|
parents += sub_cte
|
|
178
242
|
all_new_ctes += sub_cte
|
|
179
|
-
source_map = generate_source_map(query_datasource, all_new_ctes)
|
|
243
|
+
source_map, existence_map = generate_source_map(query_datasource, all_new_ctes)
|
|
244
|
+
|
|
180
245
|
else:
|
|
181
246
|
# source is the first datasource of the query datasource
|
|
182
247
|
source = query_datasource.datasources[0]
|
|
@@ -184,13 +249,27 @@ def datasource_to_ctes(
|
|
|
184
249
|
# render properly on initial access; since they have
|
|
185
250
|
# no actual source
|
|
186
251
|
if source.full_name == DEFAULT_NAMESPACE + "_" + CONSTANT_DATASET:
|
|
187
|
-
source_map = {k:
|
|
252
|
+
source_map = {k: [] for k in query_datasource.source_map}
|
|
253
|
+
existence_map = source_map
|
|
188
254
|
else:
|
|
189
255
|
source_map = {
|
|
190
|
-
k:
|
|
256
|
+
k: [] if not v else [source.identifier]
|
|
191
257
|
for k, v in query_datasource.source_map.items()
|
|
192
258
|
}
|
|
259
|
+
existence_map = source_map
|
|
260
|
+
|
|
193
261
|
human_id = generate_cte_name(query_datasource.full_name, name_map)
|
|
262
|
+
logger.info(
|
|
263
|
+
f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, parent had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} "
|
|
264
|
+
)
|
|
265
|
+
final_joins = [
|
|
266
|
+
x
|
|
267
|
+
for x in [base_join_to_join(join, parents) for join in query_datasource.joins]
|
|
268
|
+
if x
|
|
269
|
+
]
|
|
270
|
+
base_name, base_alias = resolve_cte_base_name_and_alias(
|
|
271
|
+
human_id, query_datasource, parents, final_joins
|
|
272
|
+
)
|
|
194
273
|
cte = CTE(
|
|
195
274
|
name=human_id,
|
|
196
275
|
source=query_datasource,
|
|
@@ -200,14 +279,9 @@ def datasource_to_ctes(
|
|
|
200
279
|
for c in query_datasource.output_concepts
|
|
201
280
|
],
|
|
202
281
|
source_map=source_map,
|
|
282
|
+
existence_source_map=existence_map,
|
|
203
283
|
# related columns include all referenced columns, such as filtering
|
|
204
|
-
joins=
|
|
205
|
-
x
|
|
206
|
-
for x in [
|
|
207
|
-
base_join_to_join(join, parents) for join in query_datasource.joins
|
|
208
|
-
]
|
|
209
|
-
if x
|
|
210
|
-
],
|
|
284
|
+
joins=final_joins,
|
|
211
285
|
grain=query_datasource.grain,
|
|
212
286
|
group_to_grain=query_datasource.group_required,
|
|
213
287
|
# we restrict parent_ctes to one level
|
|
@@ -217,11 +291,13 @@ def datasource_to_ctes(
|
|
|
217
291
|
partial_concepts=query_datasource.partial_concepts,
|
|
218
292
|
join_derived_concepts=query_datasource.join_derived_concepts,
|
|
219
293
|
hidden_concepts=query_datasource.hidden_concepts,
|
|
294
|
+
base_name_override=base_name,
|
|
295
|
+
base_alias_override=base_alias,
|
|
220
296
|
)
|
|
221
297
|
if cte.grain != query_datasource.grain:
|
|
222
298
|
raise ValueError("Grain was corrupted in CTE generation")
|
|
223
299
|
for x in cte.output_columns:
|
|
224
|
-
if x.address not in cte.source_map:
|
|
300
|
+
if x.address not in cte.source_map and CONFIG.validate_missing:
|
|
225
301
|
raise ValueError(
|
|
226
302
|
f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
|
|
227
303
|
)
|
|
@@ -243,12 +319,32 @@ def get_query_datasources(
|
|
|
243
319
|
if not statement.output_components:
|
|
244
320
|
raise ValueError(f"Statement has no output components {statement}")
|
|
245
321
|
ds = source_query_concepts(
|
|
246
|
-
statement.output_components,
|
|
322
|
+
statement.output_components,
|
|
323
|
+
environment=environment,
|
|
324
|
+
g=graph,
|
|
247
325
|
)
|
|
248
326
|
if hooks:
|
|
249
327
|
for hook in hooks:
|
|
250
328
|
hook.process_root_strategy_node(ds)
|
|
251
329
|
final_qds = ds.resolve()
|
|
330
|
+
|
|
331
|
+
# we if we have a where clause doing an existence check
|
|
332
|
+
# treat that as separate subquery
|
|
333
|
+
if (where := statement.where_clause) and where.existence_arguments:
|
|
334
|
+
for subselect in where.existence_arguments:
|
|
335
|
+
if not subselect:
|
|
336
|
+
continue
|
|
337
|
+
logger.info(
|
|
338
|
+
f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
|
|
339
|
+
)
|
|
340
|
+
eds = source_query_concepts([*subselect], environment=environment, g=graph)
|
|
341
|
+
|
|
342
|
+
final_eds = eds.resolve()
|
|
343
|
+
first_parent = final_qds
|
|
344
|
+
first_parent.datasources.append(final_eds)
|
|
345
|
+
for x in final_eds.output_concepts:
|
|
346
|
+
if x.address not in first_parent.existence_source_map:
|
|
347
|
+
first_parent.existence_source_map[x.address] = {final_eds}
|
|
252
348
|
return final_qds
|
|
253
349
|
|
|
254
350
|
|
trilogy/dialect/base.py
CHANGED
|
@@ -193,27 +193,6 @@ ORDER BY {% for order in order_by %}
|
|
|
193
193
|
)
|
|
194
194
|
|
|
195
195
|
|
|
196
|
-
def check_lineage(c: Concept, cte: CTE) -> bool:
|
|
197
|
-
checks = []
|
|
198
|
-
if not c.lineage:
|
|
199
|
-
return True
|
|
200
|
-
for sub_c in c.lineage.concept_arguments:
|
|
201
|
-
if not isinstance(sub_c, Concept):
|
|
202
|
-
continue
|
|
203
|
-
if sub_c.address in cte.source_map or (
|
|
204
|
-
sub_c.lineage and check_lineage(sub_c, cte)
|
|
205
|
-
):
|
|
206
|
-
checks.append(True)
|
|
207
|
-
else:
|
|
208
|
-
logger.debug(
|
|
209
|
-
f"{LOGGER_PREFIX} [{sub_c.address}] not found in source map for"
|
|
210
|
-
f" {cte.name}, have cte keys {[c for c in cte.source_map.keys()]} and"
|
|
211
|
-
f" datasource keys {[c for c in cte.source.source_map.keys()]}"
|
|
212
|
-
)
|
|
213
|
-
checks.append(False)
|
|
214
|
-
return all(checks)
|
|
215
|
-
|
|
216
|
-
|
|
217
196
|
def safe_quote(string: str, quote_char: str):
|
|
218
197
|
# split dotted identifiers
|
|
219
198
|
# TODO: evaluate if we need smarter parsing for strings that could actually include .
|
|
@@ -259,7 +238,7 @@ class BaseDialect:
|
|
|
259
238
|
f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
|
|
260
239
|
)
|
|
261
240
|
|
|
262
|
-
if c.lineage and cte.source_map.get(c.address,
|
|
241
|
+
if c.lineage and cte.source_map.get(c.address, []) == []:
|
|
263
242
|
logger.debug(
|
|
264
243
|
f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
|
|
265
244
|
)
|
|
@@ -273,7 +252,11 @@ class BaseDialect:
|
|
|
273
252
|
]
|
|
274
253
|
rval = f"{self.WINDOW_FUNCTION_MAP[c.lineage.type](concept = self.render_concept_sql(c.lineage.content, cte=cte, alias=False), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
|
|
275
254
|
elif isinstance(c.lineage, FilterItem):
|
|
276
|
-
|
|
255
|
+
# for cases when we've optimized this
|
|
256
|
+
if len(cte.output_columns) == 1:
|
|
257
|
+
rval = self.render_expr(c.lineage.content, cte=cte)
|
|
258
|
+
else:
|
|
259
|
+
rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
|
|
277
260
|
elif isinstance(c.lineage, RowsetItem):
|
|
278
261
|
rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False)}"
|
|
279
262
|
elif isinstance(c.lineage, MultiSelectStatement):
|
|
@@ -356,17 +339,28 @@ class BaseDialect:
|
|
|
356
339
|
cte: Optional[CTE] = None,
|
|
357
340
|
cte_map: Optional[Dict[str, CTE]] = None,
|
|
358
341
|
) -> str:
|
|
359
|
-
# if isinstance(e, Concept):
|
|
360
|
-
# cte = cte or cte_map.get(e.address, None)
|
|
361
342
|
|
|
362
343
|
if isinstance(e, SubselectComparison):
|
|
363
|
-
|
|
344
|
+
|
|
364
345
|
if isinstance(e.right, Concept):
|
|
365
|
-
|
|
346
|
+
# we won't always have an existnce map
|
|
347
|
+
# so fall back to the normal map
|
|
348
|
+
lookup_cte = cte
|
|
349
|
+
if cte_map and not lookup_cte:
|
|
350
|
+
lookup_cte = cte_map.get(e.right.address)
|
|
351
|
+
assert lookup_cte, "Subselects must be rendered with a CTE in context"
|
|
352
|
+
if e.right.address not in lookup_cte.existence_source_map:
|
|
353
|
+
lookup = lookup_cte.source_map[e.right.address]
|
|
354
|
+
else:
|
|
355
|
+
lookup = lookup_cte.existence_source_map[e.right.address]
|
|
356
|
+
|
|
357
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {lookup[0]})"
|
|
358
|
+
elif isinstance(e.right, (ListWrapper, Parenthetical)):
|
|
359
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
360
|
+
elif isinstance(e.right, (str, int, bool, float, list)):
|
|
361
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} ({self.render_expr(e.right, cte=cte, cte_map=cte_map)})"
|
|
366
362
|
else:
|
|
367
|
-
|
|
368
|
-
f"Subselects must be a concept, got {e.right}"
|
|
369
|
-
)
|
|
363
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} ({self.render_expr(e.right, cte=cte, cte_map=cte_map)})"
|
|
370
364
|
elif isinstance(e, Comparison):
|
|
371
365
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
372
366
|
elif isinstance(e, Conditional):
|
|
@@ -449,15 +443,15 @@ class BaseDialect:
|
|
|
449
443
|
for c in cte.output_columns
|
|
450
444
|
if c.address not in [y.address for y in cte.hidden_concepts]
|
|
451
445
|
]
|
|
446
|
+
if cte.base_name == cte.base_alias:
|
|
447
|
+
source = cte.base_name
|
|
448
|
+
else:
|
|
449
|
+
source = f"{cte.base_name} as {cte.base_alias}"
|
|
452
450
|
return CompiledCTE(
|
|
453
451
|
name=cte.name,
|
|
454
452
|
statement=self.SQL_TEMPLATE.render(
|
|
455
453
|
select_columns=select_columns,
|
|
456
|
-
base=(
|
|
457
|
-
f"{cte.base_name} as {cte.base_alias}"
|
|
458
|
-
if cte.render_from_clause
|
|
459
|
-
else None
|
|
460
|
-
),
|
|
454
|
+
base=(f"{source}" if cte.render_from_clause else None),
|
|
461
455
|
grain=cte.grain,
|
|
462
456
|
limit=cte.limit,
|
|
463
457
|
# some joins may not need to be rendered
|
|
@@ -513,7 +507,7 @@ class BaseDialect:
|
|
|
513
507
|
c
|
|
514
508
|
for c in cte.output_columns
|
|
515
509
|
if c.purpose == Purpose.CONSTANT
|
|
516
|
-
and cte.source_map[c.address] !=
|
|
510
|
+
and cte.source_map[c.address] != []
|
|
517
511
|
],
|
|
518
512
|
"address",
|
|
519
513
|
)
|
|
@@ -639,7 +633,7 @@ class BaseDialect:
|
|
|
639
633
|
filter = set(
|
|
640
634
|
[
|
|
641
635
|
str(x.address)
|
|
642
|
-
for x in query.where_clause.
|
|
636
|
+
for x in query.where_clause.row_arguments
|
|
643
637
|
if not x.derivation == PurposeLineage.CONSTANT
|
|
644
638
|
]
|
|
645
639
|
)
|
|
@@ -650,10 +644,21 @@ class BaseDialect:
|
|
|
650
644
|
|
|
651
645
|
if not found:
|
|
652
646
|
raise NotImplementedError(
|
|
653
|
-
f"Cannot generate query with filtering on
|
|
654
|
-
f" not a subset of the query output grain {query_output}.
|
|
655
|
-
" filtered concept instead
|
|
647
|
+
f"Cannot generate query with filtering on row arguments {filter} that is"
|
|
648
|
+
f" not a subset of the query output grain {query_output}. Try a"
|
|
649
|
+
" filtered concept instead, or include it in the select clause"
|
|
656
650
|
)
|
|
651
|
+
for ex_set in query.where_clause.existence_arguments:
|
|
652
|
+
for c in ex_set:
|
|
653
|
+
if c.address not in cte_output_map:
|
|
654
|
+
cts = [
|
|
655
|
+
ct
|
|
656
|
+
for ct in query.ctes
|
|
657
|
+
if ct.name in query.base.existence_source_map[c.address]
|
|
658
|
+
]
|
|
659
|
+
if not cts:
|
|
660
|
+
raise ValueError(query.base.existence_source_map[c.address])
|
|
661
|
+
cte_output_map[c.address] = cts[0]
|
|
657
662
|
|
|
658
663
|
compiled_ctes = self.generate_ctes(query)
|
|
659
664
|
|
trilogy/executor.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional, Any
|
|
1
|
+
from typing import List, Optional, Any, Generator
|
|
2
2
|
from functools import singledispatchmethod
|
|
3
3
|
from sqlalchemy import text
|
|
4
4
|
from sqlalchemy.engine import Engine, CursorResult
|
|
@@ -222,6 +222,35 @@ class Executor(object):
|
|
|
222
222
|
sql.append(x)
|
|
223
223
|
return sql
|
|
224
224
|
|
|
225
|
+
def parse_text_generator(
|
|
226
|
+
self, command: str, persist: bool = False
|
|
227
|
+
) -> Generator[
|
|
228
|
+
ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement, None, None
|
|
229
|
+
]:
|
|
230
|
+
"""Process a preql text command"""
|
|
231
|
+
_, parsed = parse_text(command, self.environment)
|
|
232
|
+
generatable = [
|
|
233
|
+
x
|
|
234
|
+
for x in parsed
|
|
235
|
+
if isinstance(
|
|
236
|
+
x,
|
|
237
|
+
(
|
|
238
|
+
SelectStatement,
|
|
239
|
+
PersistStatement,
|
|
240
|
+
MultiSelectStatement,
|
|
241
|
+
ShowStatement,
|
|
242
|
+
),
|
|
243
|
+
)
|
|
244
|
+
]
|
|
245
|
+
while generatable:
|
|
246
|
+
t = generatable.pop(0)
|
|
247
|
+
x = self.generator.generate_queries(
|
|
248
|
+
self.environment, [t], hooks=self.hooks
|
|
249
|
+
)[0]
|
|
250
|
+
if persist and isinstance(x, ProcessedQueryPersist):
|
|
251
|
+
self.environment.add_datasource(x.datasource)
|
|
252
|
+
yield x
|
|
253
|
+
|
|
225
254
|
def execute_raw_sql(self, command: str) -> CursorResult:
|
|
226
255
|
"""Run a command against the raw underlying
|
|
227
256
|
execution engine"""
|
|
@@ -229,10 +258,9 @@ class Executor(object):
|
|
|
229
258
|
|
|
230
259
|
def execute_text(self, command: str) -> List[CursorResult]:
|
|
231
260
|
"""Run a preql text command"""
|
|
232
|
-
sql = self.parse_text(command)
|
|
233
261
|
output = []
|
|
234
262
|
# connection = self.engine.connect()
|
|
235
|
-
for statement in
|
|
263
|
+
for statement in self.parse_text_generator(command):
|
|
236
264
|
if isinstance(statement, ProcessedShowStatement):
|
|
237
265
|
output.append(
|
|
238
266
|
generate_result_set(
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -101,6 +101,7 @@ from trilogy.core.models import (
|
|
|
101
101
|
ConceptDerivation,
|
|
102
102
|
RowsetDerivationStatement,
|
|
103
103
|
LooseConceptList,
|
|
104
|
+
list_to_wrapper,
|
|
104
105
|
)
|
|
105
106
|
from trilogy.parsing.exceptions import ParseError
|
|
106
107
|
from trilogy.utility import string_to_hash
|
|
@@ -113,6 +114,7 @@ from trilogy.parsing.common import (
|
|
|
113
114
|
arbitrary_to_concept,
|
|
114
115
|
)
|
|
115
116
|
|
|
117
|
+
|
|
116
118
|
CONSTANT_TYPES = (int, float, str, bool, ListWrapper)
|
|
117
119
|
|
|
118
120
|
grammar = r"""
|
|
@@ -138,7 +140,7 @@ grammar = r"""
|
|
|
138
140
|
//<customer_id,country>.property local_alias STRING
|
|
139
141
|
concept_property_declaration: PROPERTY (prop_ident | IDENTIFIER) data_type concept_nullable_modifier? metadata?
|
|
140
142
|
//metric post_length <- len(post_text);
|
|
141
|
-
concept_derivation: (PURPOSE | AUTO | PROPERTY ) IDENTIFIER "<" "-" expr
|
|
143
|
+
concept_derivation: (PURPOSE | AUTO | PROPERTY ) (prop_ident | IDENTIFIER) "<" "-" expr
|
|
142
144
|
|
|
143
145
|
rowset_derivation_statement: ("rowset"i IDENTIFIER "<" "-" (multi_select_statement | select_statement)) | ("with"i IDENTIFIER "as"i (multi_select_statement | select_statement))
|
|
144
146
|
|
|
@@ -179,13 +181,11 @@ grammar = r"""
|
|
|
179
181
|
// multiple_selects
|
|
180
182
|
multi_select_statement: select_statement ("merge" select_statement)+ "align"i align_clause where? comment* order_by? comment* limit? comment*
|
|
181
183
|
|
|
182
|
-
|
|
183
184
|
align_item: IDENTIFIER ":" IDENTIFIER ("," IDENTIFIER)* ","?
|
|
184
185
|
|
|
185
186
|
align_clause: align_item ("," align_item)* ","?
|
|
186
187
|
|
|
187
188
|
// merge statemment
|
|
188
|
-
|
|
189
189
|
merge_statement: "merge" IDENTIFIER ("," IDENTIFIER)* ","? comment*
|
|
190
190
|
|
|
191
191
|
// FUNCTION blocks
|
|
@@ -193,7 +193,6 @@ grammar = r"""
|
|
|
193
193
|
function_binding_item: IDENTIFIER ":" data_type
|
|
194
194
|
function_binding_list: (function_binding_item ",")* function_binding_item ","?
|
|
195
195
|
raw_function: "bind" "sql" IDENTIFIER "(" function_binding_list ")" "-" ">" data_type "as"i MULTILINE_STRING
|
|
196
|
-
|
|
197
196
|
|
|
198
197
|
// user_id where state = Mexico
|
|
199
198
|
filter_item: "filter"i IDENTIFIER where
|
|
@@ -249,9 +248,9 @@ grammar = r"""
|
|
|
249
248
|
|
|
250
249
|
COMPARISON_OPERATOR: (/is[\s]+not/ | "is" |"=" | ">" | "<" | ">=" | "<=" | "!=" )
|
|
251
250
|
|
|
252
|
-
comparison: (expr COMPARISON_OPERATOR expr)
|
|
251
|
+
comparison: (expr COMPARISON_OPERATOR expr)
|
|
253
252
|
|
|
254
|
-
subselect_comparison: expr array_comparison expr
|
|
253
|
+
subselect_comparison: expr array_comparison expr | (expr array_comparison expr_tuple)
|
|
255
254
|
|
|
256
255
|
expr_tuple: "(" (expr ",")* expr ","? ")"
|
|
257
256
|
|
|
@@ -267,7 +266,6 @@ grammar = r"""
|
|
|
267
266
|
|
|
268
267
|
// functions
|
|
269
268
|
|
|
270
|
-
//math TODO: add syntactic sugar
|
|
271
269
|
fadd: ("add"i "(" expr "," expr ")" ) | ( expr "+" expr )
|
|
272
270
|
fsub: ("subtract"i "(" expr "," expr ")" ) | ( expr "-" expr )
|
|
273
271
|
fmul: ("multiply"i "(" expr "," expr ")" ) | ( expr "*" expr )
|
|
@@ -738,10 +736,17 @@ class ParseToObjects(Transformer):
|
|
|
738
736
|
purpose = args[0]
|
|
739
737
|
if purpose == Purpose.AUTO:
|
|
740
738
|
purpose = None
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
739
|
+
raw_name = args[1]
|
|
740
|
+
if isinstance(raw_name, str):
|
|
741
|
+
lookup, namespace, name, parent_concept = parse_concept_reference(
|
|
742
|
+
raw_name, self.environment, purpose
|
|
743
|
+
)
|
|
744
|
+
else:
|
|
745
|
+
keys, name = raw_name
|
|
746
|
+
if "." in name:
|
|
747
|
+
namespace, name = name.rsplit(".", 1)
|
|
748
|
+
else:
|
|
749
|
+
namespace = self.environment.namespace or DEFAULT_NAMESPACE
|
|
745
750
|
source_value = args[2]
|
|
746
751
|
# we need to strip off every parenthetical to see what is being assigned.
|
|
747
752
|
while isinstance(source_value, Parenthetical):
|
|
@@ -971,7 +976,26 @@ class ParseToObjects(Transformer):
|
|
|
971
976
|
return Ordering(args.lower())
|
|
972
977
|
|
|
973
978
|
def order_list(self, args):
|
|
974
|
-
|
|
979
|
+
|
|
980
|
+
def handle_order_item(x, namespace: str):
|
|
981
|
+
if not isinstance(x, Concept):
|
|
982
|
+
x = arbitrary_to_concept(
|
|
983
|
+
x,
|
|
984
|
+
namespace=namespace,
|
|
985
|
+
name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(x))}",
|
|
986
|
+
)
|
|
987
|
+
return x
|
|
988
|
+
|
|
989
|
+
return [
|
|
990
|
+
OrderItem(
|
|
991
|
+
expr=handle_order_item(
|
|
992
|
+
x,
|
|
993
|
+
self.environment.namespace,
|
|
994
|
+
),
|
|
995
|
+
order=y,
|
|
996
|
+
)
|
|
997
|
+
for x, y in zip(args[::2], args[1::2])
|
|
998
|
+
]
|
|
975
999
|
|
|
976
1000
|
def order_by(self, args):
|
|
977
1001
|
return OrderBy(items=args[0])
|
|
@@ -1207,26 +1231,34 @@ class ParseToObjects(Transformer):
|
|
|
1207
1231
|
return float(args[0])
|
|
1208
1232
|
|
|
1209
1233
|
def array_lit(self, args):
|
|
1210
|
-
|
|
1211
|
-
assert len(set(types)) == 1
|
|
1212
|
-
return ListWrapper(args, type=types[0])
|
|
1234
|
+
return list_to_wrapper(args)
|
|
1213
1235
|
|
|
1214
1236
|
def literal(self, args):
|
|
1215
1237
|
return args[0]
|
|
1216
1238
|
|
|
1217
1239
|
def comparison(self, args) -> Comparison:
|
|
1240
|
+
if args[1] == ComparisonOperator.IN:
|
|
1241
|
+
raise SyntaxError
|
|
1218
1242
|
return Comparison(left=args[0], right=args[2], operator=args[1])
|
|
1219
1243
|
|
|
1220
1244
|
@v_args(meta=True)
|
|
1221
1245
|
def subselect_comparison(self, meta: Meta, args) -> SubselectComparison:
|
|
1222
1246
|
right = args[2]
|
|
1223
|
-
|
|
1247
|
+
|
|
1248
|
+
while isinstance(right, Parenthetical) and isinstance(
|
|
1249
|
+
right.content,
|
|
1250
|
+
(Concept, Function, FilterItem, WindowItem, AggregateWrapper, ListWrapper),
|
|
1251
|
+
):
|
|
1252
|
+
right = right.content
|
|
1253
|
+
if isinstance(
|
|
1254
|
+
right, (Function, FilterItem, WindowItem, AggregateWrapper, ListWrapper)
|
|
1255
|
+
):
|
|
1224
1256
|
right = arbitrary_to_concept(
|
|
1225
1257
|
right,
|
|
1226
1258
|
namespace=self.environment.namespace,
|
|
1227
1259
|
name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(right))}",
|
|
1228
1260
|
)
|
|
1229
|
-
self.environment.add_concept(right)
|
|
1261
|
+
self.environment.add_concept(right, meta=meta)
|
|
1230
1262
|
return SubselectComparison(
|
|
1231
1263
|
left=args[0],
|
|
1232
1264
|
right=right,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|