pytrilogy 0.0.2.12__py3-none-any.whl → 0.0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/RECORD +31 -31
- {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +16 -1
- trilogy/core/enums.py +3 -0
- trilogy/core/models.py +150 -17
- trilogy/core/optimizations/predicate_pushdown.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +8 -1
- trilogy/core/processing/node_generators/common.py +13 -36
- trilogy/core/processing/node_generators/filter_node.py +1 -15
- trilogy/core/processing/node_generators/group_node.py +19 -1
- trilogy/core/processing/node_generators/group_to_node.py +0 -12
- trilogy/core/processing/node_generators/multiselect_node.py +1 -10
- trilogy/core/processing/node_generators/rowset_node.py +3 -14
- trilogy/core/processing/node_generators/select_node.py +26 -0
- trilogy/core/processing/node_generators/window_node.py +1 -1
- trilogy/core/processing/nodes/base_node.py +40 -11
- trilogy/core/processing/nodes/group_node.py +31 -18
- trilogy/core/processing/nodes/merge_node.py +14 -5
- trilogy/core/processing/nodes/select_node_v2.py +4 -0
- trilogy/core/processing/utility.py +91 -3
- trilogy/core/query_processor.py +6 -12
- trilogy/dialect/common.py +10 -8
- trilogy/executor.py +8 -2
- trilogy/parsing/common.py +34 -4
- trilogy/parsing/parse_engine.py +31 -19
- trilogy/parsing/trilogy.lark +5 -5
- {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.12.dist-info → pytrilogy-0.0.2.14.dist-info}/top_level.txt +0 -0
|
@@ -43,6 +43,7 @@ class SelectNode(StrategyNode):
|
|
|
43
43
|
parents: List["StrategyNode"] | None = None,
|
|
44
44
|
depth: int = 0,
|
|
45
45
|
partial_concepts: List[Concept] | None = None,
|
|
46
|
+
nullable_concepts: List[Concept] | None = None,
|
|
46
47
|
accept_partial: bool = False,
|
|
47
48
|
grain: Optional[Grain] = None,
|
|
48
49
|
force_group: bool | None = False,
|
|
@@ -58,6 +59,7 @@ class SelectNode(StrategyNode):
|
|
|
58
59
|
parents=parents,
|
|
59
60
|
depth=depth,
|
|
60
61
|
partial_concepts=partial_concepts,
|
|
62
|
+
nullable_concepts=nullable_concepts,
|
|
61
63
|
force_group=force_group,
|
|
62
64
|
grain=grain,
|
|
63
65
|
conditions=conditions,
|
|
@@ -115,6 +117,7 @@ class SelectNode(StrategyNode):
|
|
|
115
117
|
partial_concepts=[
|
|
116
118
|
c.concept for c in datasource.columns if not c.is_complete
|
|
117
119
|
],
|
|
120
|
+
nullable_concepts=[c.concept for c in datasource.columns if c.is_nullable],
|
|
118
121
|
source_type=SourceType.DIRECT_SELECT,
|
|
119
122
|
condition=self.conditions,
|
|
120
123
|
# select nodes should never group
|
|
@@ -183,6 +186,7 @@ class SelectNode(StrategyNode):
|
|
|
183
186
|
parents=self.parents,
|
|
184
187
|
whole_grain=self.whole_grain,
|
|
185
188
|
partial_concepts=list(self.partial_concepts),
|
|
189
|
+
nullable_concepts=list(self.nullable_concepts),
|
|
186
190
|
accept_partial=self.accept_partial,
|
|
187
191
|
grain=self.grain,
|
|
188
192
|
force_group=self.force_group,
|
|
@@ -18,9 +18,11 @@ from trilogy.core.models import (
|
|
|
18
18
|
WindowItem,
|
|
19
19
|
AggregateWrapper,
|
|
20
20
|
DataType,
|
|
21
|
+
ConceptPair,
|
|
22
|
+
UnnestJoin,
|
|
21
23
|
)
|
|
22
24
|
|
|
23
|
-
from trilogy.core.enums import Purpose, Granularity, BooleanOperator
|
|
25
|
+
from trilogy.core.enums import Purpose, Granularity, BooleanOperator, Modifier
|
|
24
26
|
from trilogy.core.constants import CONSTANT_DATASET
|
|
25
27
|
from enum import Enum
|
|
26
28
|
from trilogy.utility import unique
|
|
@@ -243,8 +245,23 @@ def get_node_joins(
|
|
|
243
245
|
local_concepts = [
|
|
244
246
|
c for c in local_concepts if c.granularity != Granularity.SINGLE_ROW
|
|
245
247
|
]
|
|
246
|
-
|
|
248
|
+
elif any(
|
|
249
|
+
[
|
|
250
|
+
c.address in [x.address for x in identifier_map[right].partial_concepts]
|
|
251
|
+
for c in local_concepts
|
|
252
|
+
]
|
|
253
|
+
) or any(
|
|
254
|
+
[
|
|
255
|
+
c.address in [x.address for x in identifier_map[left].nullable_concepts]
|
|
256
|
+
for c in local_concepts
|
|
257
|
+
]
|
|
258
|
+
):
|
|
247
259
|
join_type = JoinType.LEFT_OUTER
|
|
260
|
+
local_concepts = [
|
|
261
|
+
c for c in local_concepts if c.granularity != Granularity.SINGLE_ROW
|
|
262
|
+
]
|
|
263
|
+
else:
|
|
264
|
+
join_type = JoinType.INNER
|
|
248
265
|
# remove any constants if other join keys exist
|
|
249
266
|
local_concepts = [
|
|
250
267
|
c for c in local_concepts if c.granularity != Granularity.SINGLE_ROW
|
|
@@ -287,7 +304,18 @@ def get_node_joins(
|
|
|
287
304
|
)
|
|
288
305
|
narg = (left_arg, right_arg)
|
|
289
306
|
if narg not in join_tuples:
|
|
290
|
-
|
|
307
|
+
modifiers = set()
|
|
308
|
+
if left_arg.address in [
|
|
309
|
+
x.address for x in left_datasource.nullable_concepts
|
|
310
|
+
] and right_arg.address in [
|
|
311
|
+
x.address for x in right_datasource.nullable_concepts
|
|
312
|
+
]:
|
|
313
|
+
modifiers.add(Modifier.NULLABLE)
|
|
314
|
+
join_tuples.append(
|
|
315
|
+
ConceptPair(
|
|
316
|
+
left=left_arg, right=right_arg, modifiers=list(modifiers)
|
|
317
|
+
)
|
|
318
|
+
)
|
|
291
319
|
final_joins_pre.append(
|
|
292
320
|
BaseJoin(
|
|
293
321
|
left_datasource=identifier_map[left],
|
|
@@ -412,3 +440,63 @@ def decompose_condition(
|
|
|
412
440
|
else:
|
|
413
441
|
chunks.append(conditional)
|
|
414
442
|
return chunks
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def find_nullable_concepts(
|
|
446
|
+
source_map: Dict[str, set[Datasource | QueryDatasource | UnnestJoin]],
|
|
447
|
+
datasources: List[Datasource | QueryDatasource],
|
|
448
|
+
joins: List[BaseJoin | UnnestJoin],
|
|
449
|
+
) -> List[str]:
|
|
450
|
+
"""give a set of datasources and joins, find the concepts
|
|
451
|
+
that may contain nulls in the output set
|
|
452
|
+
"""
|
|
453
|
+
nullable_datasources = set()
|
|
454
|
+
datasource_map = {
|
|
455
|
+
x.identifier: x
|
|
456
|
+
for x in datasources
|
|
457
|
+
if isinstance(x, (Datasource, QueryDatasource))
|
|
458
|
+
}
|
|
459
|
+
for join in joins:
|
|
460
|
+
is_on_nullable_condition = False
|
|
461
|
+
if not isinstance(join, BaseJoin):
|
|
462
|
+
continue
|
|
463
|
+
if not join.concept_pairs:
|
|
464
|
+
continue
|
|
465
|
+
for pair in join.concept_pairs:
|
|
466
|
+
if pair.right.address in [
|
|
467
|
+
y.address
|
|
468
|
+
for y in datasource_map[
|
|
469
|
+
join.right_datasource.identifier
|
|
470
|
+
].nullable_concepts
|
|
471
|
+
]:
|
|
472
|
+
is_on_nullable_condition = True
|
|
473
|
+
break
|
|
474
|
+
if pair.left.address in [
|
|
475
|
+
y.address
|
|
476
|
+
for y in datasource_map[
|
|
477
|
+
join.left_datasource.identifier
|
|
478
|
+
].nullable_concepts
|
|
479
|
+
]:
|
|
480
|
+
is_on_nullable_condition = True
|
|
481
|
+
break
|
|
482
|
+
if is_on_nullable_condition:
|
|
483
|
+
nullable_datasources.add(datasource_map[join.right_datasource.identifier])
|
|
484
|
+
final_nullable = set()
|
|
485
|
+
|
|
486
|
+
for k, v in source_map.items():
|
|
487
|
+
local_nullable = [
|
|
488
|
+
x for x in datasources if k in [v.address for v in x.nullable_concepts]
|
|
489
|
+
]
|
|
490
|
+
if all(
|
|
491
|
+
[
|
|
492
|
+
k in [v.address for v in x.nullable_concepts]
|
|
493
|
+
for x in datasources
|
|
494
|
+
if k in [z.address for z in x.output_concepts]
|
|
495
|
+
]
|
|
496
|
+
):
|
|
497
|
+
final_nullable.add(k)
|
|
498
|
+
all_ds = set([ds for ds in local_nullable]).union(nullable_datasources)
|
|
499
|
+
if nullable_datasources:
|
|
500
|
+
if set(v).issubset(all_ds):
|
|
501
|
+
final_nullable.add(k)
|
|
502
|
+
return list(sorted(final_nullable))
|
trilogy/core/query_processor.py
CHANGED
|
@@ -35,7 +35,6 @@ from trilogy.core.ergonomics import CTE_NAMES
|
|
|
35
35
|
from trilogy.core.optimization import optimize_ctes
|
|
36
36
|
from math import ceil
|
|
37
37
|
from collections import defaultdict
|
|
38
|
-
from random import shuffle
|
|
39
38
|
|
|
40
39
|
LOGGER_PREFIX = "[QUERY BUILD]"
|
|
41
40
|
|
|
@@ -128,8 +127,6 @@ def generate_source_map(
|
|
|
128
127
|
if qdk in output_address:
|
|
129
128
|
source_map[qdk].append(cte.name)
|
|
130
129
|
# now do a pass that accepts partials
|
|
131
|
-
# TODO: move this into a second loop by first creationg all sub sources
|
|
132
|
-
# then loop through this
|
|
133
130
|
for cte in matches:
|
|
134
131
|
if qdk not in source_map:
|
|
135
132
|
source_map[qdk] = [cte.name]
|
|
@@ -180,7 +177,6 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
|
|
|
180
177
|
int = ceil(idx / len(CTE_NAMES))
|
|
181
178
|
suffix = f"_{int}"
|
|
182
179
|
valid = [x for x in CTE_NAMES if x + suffix not in name_map.values()]
|
|
183
|
-
shuffle(valid)
|
|
184
180
|
lookup = valid[0]
|
|
185
181
|
new_name = f"{lookup}{suffix}"
|
|
186
182
|
name_map[full_name] = new_name
|
|
@@ -196,8 +192,6 @@ def resolve_cte_base_name_and_alias_v2(
|
|
|
196
192
|
raw_joins: List[Join | InstantiatedUnnestJoin],
|
|
197
193
|
) -> Tuple[str | None, str | None]:
|
|
198
194
|
joins: List[Join] = [join for join in raw_joins if isinstance(join, Join)]
|
|
199
|
-
# INFO trilogy:query_processor.py:263 Finished building source map for civet with 3 parents, have {'local.relevant_customers': ['fowl', 'fowl'],
|
|
200
|
-
# 'customer.demographics.gender': ['mandrill'], 'customer.id': ['mandrill'], 'customer.demographics.id': ['mandrill'], 'customer.id_9268029262289908': [], 'customer.demographics.gender_1513806568509111': []}, query_datasource had non-empty keys ['local.relevant_customers', 'customer.demographics.gender', 'customer.id', 'customer.demographics.id'] and existence had non-empty keys []
|
|
201
195
|
if (
|
|
202
196
|
len(source.datasources) == 1
|
|
203
197
|
and isinstance(source.datasources[0], Datasource)
|
|
@@ -301,6 +295,7 @@ def datasource_to_ctes(
|
|
|
301
295
|
parent_ctes=parents,
|
|
302
296
|
condition=query_datasource.condition,
|
|
303
297
|
partial_concepts=query_datasource.partial_concepts,
|
|
298
|
+
nullable_concepts=query_datasource.nullable_concepts,
|
|
304
299
|
join_derived_concepts=query_datasource.join_derived_concepts,
|
|
305
300
|
hidden_concepts=query_datasource.hidden_concepts,
|
|
306
301
|
base_name_override=base_name,
|
|
@@ -334,12 +329,13 @@ def append_existence_check(
|
|
|
334
329
|
for subselect in where.existence_arguments:
|
|
335
330
|
if not subselect:
|
|
336
331
|
continue
|
|
337
|
-
|
|
338
|
-
f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
|
|
339
|
-
)
|
|
332
|
+
|
|
340
333
|
eds = source_query_concepts(
|
|
341
334
|
[*subselect], environment=environment, g=graph, history=history
|
|
342
335
|
)
|
|
336
|
+
logger.info(
|
|
337
|
+
f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
|
|
338
|
+
)
|
|
343
339
|
node.add_parents([eds])
|
|
344
340
|
node.add_existence_concepts([*subselect])
|
|
345
341
|
|
|
@@ -384,9 +380,7 @@ def get_query_node(
|
|
|
384
380
|
if nest_where and statement.where_clause:
|
|
385
381
|
if not all_aggregate:
|
|
386
382
|
ods.conditions = statement.where_clause.conditional
|
|
387
|
-
ods.
|
|
388
|
-
# ods.hidden_concepts = where_delta
|
|
389
|
-
ods.rebuild_cache()
|
|
383
|
+
ods.set_output_concepts(statement.output_components)
|
|
390
384
|
append_existence_check(ods, environment, graph, history)
|
|
391
385
|
ds = GroupNode(
|
|
392
386
|
output_concepts=statement.output_components,
|
trilogy/dialect/common.py
CHANGED
|
@@ -3,9 +3,9 @@ from trilogy.core.enums import UnnestMode, Modifier
|
|
|
3
3
|
from typing import Optional, Callable
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def null_wrapper(lval: str, rval: str,
|
|
7
|
-
if
|
|
8
|
-
return f"(
|
|
6
|
+
def null_wrapper(lval: str, rval: str, modifiers: list[Modifier]) -> str:
|
|
7
|
+
if Modifier.NULLABLE in modifiers:
|
|
8
|
+
return f"({lval} = {rval} or ({lval} is null and {rval} is null))"
|
|
9
9
|
return f"{lval} = {rval}"
|
|
10
10
|
|
|
11
11
|
|
|
@@ -48,7 +48,7 @@ def render_join(
|
|
|
48
48
|
null_wrapper(
|
|
49
49
|
f"{left_name}.{quote_character}{join.left_cte.get_alias(key.concept) if isinstance(join.left_cte, Datasource) else key.concept.safe_address}{quote_character}",
|
|
50
50
|
f"{right_name}.{quote_character}{join.right_cte.get_alias(key.concept) if isinstance(join.right_cte, Datasource) else key.concept.safe_address}{quote_character}",
|
|
51
|
-
key.concept,
|
|
51
|
+
modifiers=key.concept.modifiers or [],
|
|
52
52
|
)
|
|
53
53
|
for key in join.joinkeys
|
|
54
54
|
]
|
|
@@ -56,11 +56,13 @@ def render_join(
|
|
|
56
56
|
base_joinkeys.extend(
|
|
57
57
|
[
|
|
58
58
|
null_wrapper(
|
|
59
|
-
f"{left_name}.{quote_character}{join.left_cte.get_alias(
|
|
60
|
-
f"{right_name}.{quote_character}{join.right_cte.get_alias(
|
|
61
|
-
|
|
59
|
+
f"{left_name}.{quote_character}{join.left_cte.get_alias(pair.left) if isinstance(join.left_cte, Datasource) else pair.left.safe_address}{quote_character}",
|
|
60
|
+
f"{right_name}.{quote_character}{join.right_cte.get_alias(pair.right) if isinstance(join.right_cte, Datasource) else pair.right.safe_address}{quote_character}",
|
|
61
|
+
modifiers=pair.modifiers
|
|
62
|
+
+ (pair.left.modifiers or [])
|
|
63
|
+
+ (pair.right.modifiers or []),
|
|
62
64
|
)
|
|
63
|
-
for
|
|
65
|
+
for pair in join.joinkey_pairs
|
|
64
66
|
]
|
|
65
67
|
)
|
|
66
68
|
if not base_joinkeys:
|
trilogy/executor.py
CHANGED
|
@@ -300,10 +300,16 @@ class Executor(object):
|
|
|
300
300
|
self.environment.add_datasource(x.datasource)
|
|
301
301
|
yield x
|
|
302
302
|
|
|
303
|
-
def execute_raw_sql(
|
|
303
|
+
def execute_raw_sql(
|
|
304
|
+
self, command: str, variables: dict | None = None
|
|
305
|
+
) -> CursorResult:
|
|
304
306
|
"""Run a command against the raw underlying
|
|
305
307
|
execution engine"""
|
|
306
|
-
|
|
308
|
+
if variables:
|
|
309
|
+
return self.connection.execute(text(command), variables)
|
|
310
|
+
return self.connection.execute(
|
|
311
|
+
text(command),
|
|
312
|
+
)
|
|
307
313
|
|
|
308
314
|
def execute_text(self, command: str) -> List[CursorResult]:
|
|
309
315
|
"""Run a preql text command"""
|
trilogy/parsing/common.py
CHANGED
|
@@ -25,6 +25,15 @@ from trilogy.core.enums import PurposeLineage
|
|
|
25
25
|
from trilogy.constants import (
|
|
26
26
|
VIRTUAL_CONCEPT_PREFIX,
|
|
27
27
|
)
|
|
28
|
+
from trilogy.core.enums import Modifier
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_upstream_modifiers(keys: List[Concept]) -> list[Modifier]:
|
|
32
|
+
modifiers = set()
|
|
33
|
+
for pkey in keys:
|
|
34
|
+
if pkey.modifiers:
|
|
35
|
+
modifiers.update(pkey.modifiers)
|
|
36
|
+
return list(modifiers)
|
|
28
37
|
|
|
29
38
|
|
|
30
39
|
def process_function_args(
|
|
@@ -50,7 +59,7 @@ def process_function_args(
|
|
|
50
59
|
id_hash = string_to_hash(str(arg))
|
|
51
60
|
concept = function_to_concept(
|
|
52
61
|
arg,
|
|
53
|
-
name=f"{VIRTUAL_CONCEPT_PREFIX}_{id_hash}",
|
|
62
|
+
name=f"{VIRTUAL_CONCEPT_PREFIX}_{arg.operator.value}_{id_hash}",
|
|
54
63
|
namespace=environment.namespace,
|
|
55
64
|
)
|
|
56
65
|
# to satisfy mypy, concept will always have metadata
|
|
@@ -125,7 +134,7 @@ def constant_to_concept(
|
|
|
125
134
|
|
|
126
135
|
|
|
127
136
|
def function_to_concept(parent: Function, name: str, namespace: str) -> Concept:
|
|
128
|
-
pkeys = []
|
|
137
|
+
pkeys: List[Concept] = []
|
|
129
138
|
for x in parent.arguments:
|
|
130
139
|
pkeys += [
|
|
131
140
|
x
|
|
@@ -135,7 +144,7 @@ def function_to_concept(parent: Function, name: str, namespace: str) -> Concept:
|
|
|
135
144
|
grain = Grain()
|
|
136
145
|
for x in pkeys:
|
|
137
146
|
grain += x.grain
|
|
138
|
-
|
|
147
|
+
modifiers = get_upstream_modifiers(pkeys)
|
|
139
148
|
key_grain = []
|
|
140
149
|
for x in pkeys:
|
|
141
150
|
if x.keys:
|
|
@@ -155,6 +164,7 @@ def function_to_concept(parent: Function, name: str, namespace: str) -> Concept:
|
|
|
155
164
|
namespace=namespace,
|
|
156
165
|
grain=grain,
|
|
157
166
|
keys=keys,
|
|
167
|
+
modifiers=modifiers,
|
|
158
168
|
)
|
|
159
169
|
|
|
160
170
|
|
|
@@ -166,6 +176,7 @@ def filter_item_to_concept(
|
|
|
166
176
|
metadata: Metadata | None = None,
|
|
167
177
|
) -> Concept:
|
|
168
178
|
fmetadata = metadata or Metadata()
|
|
179
|
+
modifiers = get_upstream_modifiers(parent.content.concept_arguments)
|
|
169
180
|
return Concept(
|
|
170
181
|
name=name,
|
|
171
182
|
datatype=parent.content.datatype,
|
|
@@ -184,6 +195,7 @@ def filter_item_to_concept(
|
|
|
184
195
|
if parent.content.purpose == Purpose.PROPERTY
|
|
185
196
|
else Grain()
|
|
186
197
|
),
|
|
198
|
+
modifiers=modifiers,
|
|
187
199
|
)
|
|
188
200
|
|
|
189
201
|
|
|
@@ -202,6 +214,7 @@ def window_item_to_concept(
|
|
|
202
214
|
grain += [item.expr.output]
|
|
203
215
|
else:
|
|
204
216
|
grain = parent.over + [parent.content.output]
|
|
217
|
+
modifiers = get_upstream_modifiers(parent.content.concept_arguments)
|
|
205
218
|
return Concept(
|
|
206
219
|
name=name,
|
|
207
220
|
datatype=parent.content.datatype,
|
|
@@ -212,6 +225,7 @@ def window_item_to_concept(
|
|
|
212
225
|
grain=Grain(components=grain),
|
|
213
226
|
namespace=namespace,
|
|
214
227
|
keys=keys,
|
|
228
|
+
modifiers=modifiers,
|
|
215
229
|
)
|
|
216
230
|
|
|
217
231
|
|
|
@@ -229,6 +243,7 @@ def agg_wrapper_to_concept(
|
|
|
229
243
|
# at that grain
|
|
230
244
|
fmetadata = metadata or Metadata()
|
|
231
245
|
aggfunction = parent.function
|
|
246
|
+
modifiers = get_upstream_modifiers(parent.concept_arguments)
|
|
232
247
|
out = Concept(
|
|
233
248
|
name=name,
|
|
234
249
|
datatype=aggfunction.output_datatype,
|
|
@@ -238,6 +253,7 @@ def agg_wrapper_to_concept(
|
|
|
238
253
|
grain=Grain(components=parent.by) if parent.by else Grain(),
|
|
239
254
|
namespace=namespace,
|
|
240
255
|
keys=tuple(parent.by) if parent.by else keys,
|
|
256
|
+
modifiers=modifiers,
|
|
241
257
|
)
|
|
242
258
|
return out
|
|
243
259
|
|
|
@@ -255,20 +271,34 @@ def arbitrary_to_concept(
|
|
|
255
271
|
| str
|
|
256
272
|
),
|
|
257
273
|
namespace: str,
|
|
258
|
-
name: str,
|
|
274
|
+
name: str | None = None,
|
|
259
275
|
metadata: Metadata | None = None,
|
|
260
276
|
purpose: Purpose | None = None,
|
|
261
277
|
) -> Concept:
|
|
262
278
|
|
|
263
279
|
if isinstance(parent, AggregateWrapper):
|
|
280
|
+
if not name:
|
|
281
|
+
name = (
|
|
282
|
+
f"_agg_{parent.function.operator.value}_{string_to_hash(str(parent))}"
|
|
283
|
+
)
|
|
264
284
|
return agg_wrapper_to_concept(parent, namespace, name, metadata, purpose)
|
|
265
285
|
elif isinstance(parent, WindowItem):
|
|
286
|
+
if not name:
|
|
287
|
+
name = f"_window_{parent.type.value}_{string_to_hash(str(parent))}"
|
|
266
288
|
return window_item_to_concept(parent, name, namespace, purpose, metadata)
|
|
267
289
|
elif isinstance(parent, FilterItem):
|
|
290
|
+
if not name:
|
|
291
|
+
name = f"_filter_{parent.content.name}_{string_to_hash(str(parent))}"
|
|
268
292
|
return filter_item_to_concept(parent, name, namespace, purpose, metadata)
|
|
269
293
|
elif isinstance(parent, Function):
|
|
294
|
+
if not name:
|
|
295
|
+
name = f"_func_{parent.operator.value}_{string_to_hash(str(parent))}"
|
|
270
296
|
return function_to_concept(parent, name, namespace)
|
|
271
297
|
elif isinstance(parent, ListWrapper):
|
|
298
|
+
if not name:
|
|
299
|
+
name = f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
|
|
272
300
|
return constant_to_concept(parent, name, namespace, purpose, metadata)
|
|
273
301
|
else:
|
|
302
|
+
if not name:
|
|
303
|
+
name = f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
|
|
274
304
|
return constant_to_concept(parent, name, namespace, purpose, metadata)
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -16,7 +16,6 @@ from trilogy.core.internal import INTERNAL_NAMESPACE, ALL_ROWS_CONCEPT
|
|
|
16
16
|
from trilogy.constants import (
|
|
17
17
|
DEFAULT_NAMESPACE,
|
|
18
18
|
NULL_VALUE,
|
|
19
|
-
VIRTUAL_CONCEPT_PREFIX,
|
|
20
19
|
MagicConstants,
|
|
21
20
|
)
|
|
22
21
|
from trilogy.core.enums import (
|
|
@@ -93,6 +92,7 @@ from trilogy.core.models import (
|
|
|
93
92
|
WindowItemOver,
|
|
94
93
|
RawColumnExpr,
|
|
95
94
|
arg_to_datatype,
|
|
95
|
+
merge_datatypes,
|
|
96
96
|
ListWrapper,
|
|
97
97
|
MapWrapper,
|
|
98
98
|
MapType,
|
|
@@ -109,7 +109,6 @@ from trilogy.core.models import (
|
|
|
109
109
|
HavingClause,
|
|
110
110
|
)
|
|
111
111
|
from trilogy.parsing.exceptions import ParseError
|
|
112
|
-
from trilogy.utility import string_to_hash
|
|
113
112
|
from trilogy.parsing.common import (
|
|
114
113
|
agg_wrapper_to_concept,
|
|
115
114
|
window_item_to_concept,
|
|
@@ -739,8 +738,8 @@ class ParseToObjects(Transformer):
|
|
|
739
738
|
x = arbitrary_to_concept(
|
|
740
739
|
x,
|
|
741
740
|
namespace=namespace,
|
|
742
|
-
name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(x))}",
|
|
743
741
|
)
|
|
742
|
+
self.environment.add_concept(x)
|
|
744
743
|
return x
|
|
745
744
|
|
|
746
745
|
return [
|
|
@@ -781,6 +780,11 @@ class ParseToObjects(Transformer):
|
|
|
781
780
|
def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
|
|
782
781
|
return RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
|
|
783
782
|
|
|
783
|
+
def resolve_import_address(self, address) -> str:
|
|
784
|
+
with open(address, "r", encoding="utf-8") as f:
|
|
785
|
+
text = f.read()
|
|
786
|
+
return text
|
|
787
|
+
|
|
784
788
|
def import_statement(self, args: list[str]) -> ImportStatement:
|
|
785
789
|
alias = args[-1]
|
|
786
790
|
path = args[0].split(".")
|
|
@@ -790,8 +794,7 @@ class ParseToObjects(Transformer):
|
|
|
790
794
|
nparser = self.parsed[target]
|
|
791
795
|
else:
|
|
792
796
|
try:
|
|
793
|
-
|
|
794
|
-
text = f.read()
|
|
797
|
+
text = self.resolve_import_address(target)
|
|
795
798
|
nparser = ParseToObjects(
|
|
796
799
|
visit_tokens=True,
|
|
797
800
|
text=text,
|
|
@@ -1093,7 +1096,6 @@ class ParseToObjects(Transformer):
|
|
|
1093
1096
|
left = arbitrary_to_concept(
|
|
1094
1097
|
args[0],
|
|
1095
1098
|
namespace=self.environment.namespace,
|
|
1096
|
-
name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(args[0]))}",
|
|
1097
1099
|
)
|
|
1098
1100
|
self.environment.add_concept(left)
|
|
1099
1101
|
else:
|
|
@@ -1102,7 +1104,6 @@ class ParseToObjects(Transformer):
|
|
|
1102
1104
|
right = arbitrary_to_concept(
|
|
1103
1105
|
args[2],
|
|
1104
1106
|
namespace=self.environment.namespace,
|
|
1105
|
-
name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(args[2]))}",
|
|
1106
1107
|
)
|
|
1107
1108
|
self.environment.add_concept(right)
|
|
1108
1109
|
else:
|
|
@@ -1137,7 +1138,6 @@ class ParseToObjects(Transformer):
|
|
|
1137
1138
|
right = arbitrary_to_concept(
|
|
1138
1139
|
right,
|
|
1139
1140
|
namespace=self.environment.namespace,
|
|
1140
|
-
name=f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(right))}",
|
|
1141
1141
|
)
|
|
1142
1142
|
self.environment.add_concept(right, meta=meta)
|
|
1143
1143
|
return SubselectComparison(
|
|
@@ -1186,8 +1186,9 @@ class ParseToObjects(Transformer):
|
|
|
1186
1186
|
def window_item_order(self, args):
|
|
1187
1187
|
return WindowItemOrder(contents=args[0])
|
|
1188
1188
|
|
|
1189
|
-
|
|
1190
|
-
|
|
1189
|
+
@v_args(meta=True)
|
|
1190
|
+
def window_item(self, meta, args) -> WindowItem:
|
|
1191
|
+
type: WindowType = args[0]
|
|
1191
1192
|
order_by = []
|
|
1192
1193
|
over = []
|
|
1193
1194
|
index = None
|
|
@@ -1203,6 +1204,14 @@ class ParseToObjects(Transformer):
|
|
|
1203
1204
|
concept = self.environment.concepts[item]
|
|
1204
1205
|
elif isinstance(item, Concept):
|
|
1205
1206
|
concept = item
|
|
1207
|
+
elif isinstance(item, WindowType):
|
|
1208
|
+
type = item
|
|
1209
|
+
else:
|
|
1210
|
+
concept = arbitrary_to_concept(
|
|
1211
|
+
item,
|
|
1212
|
+
namespace=self.environment.namespace,
|
|
1213
|
+
)
|
|
1214
|
+
self.environment.add_concept(concept, meta=meta)
|
|
1206
1215
|
assert concept
|
|
1207
1216
|
return WindowItem(
|
|
1208
1217
|
type=type, content=concept, over=over, order_by=order_by, index=index
|
|
@@ -1697,8 +1706,7 @@ class ParseToObjects(Transformer):
|
|
|
1697
1706
|
@v_args(meta=True)
|
|
1698
1707
|
def fadd(self, meta, args) -> Function:
|
|
1699
1708
|
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
1700
|
-
output_datatype = arg_to_datatype(args
|
|
1701
|
-
# TODO: check for valid transforms?
|
|
1709
|
+
output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
|
|
1702
1710
|
return Function(
|
|
1703
1711
|
operator=FunctionType.ADD,
|
|
1704
1712
|
arguments=args,
|
|
@@ -1711,7 +1719,7 @@ class ParseToObjects(Transformer):
|
|
|
1711
1719
|
@v_args(meta=True)
|
|
1712
1720
|
def fsub(self, meta, args) -> Function:
|
|
1713
1721
|
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
1714
|
-
output_datatype = arg_to_datatype(args
|
|
1722
|
+
output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
|
|
1715
1723
|
return Function(
|
|
1716
1724
|
operator=FunctionType.SUBTRACT,
|
|
1717
1725
|
arguments=args,
|
|
@@ -1724,7 +1732,7 @@ class ParseToObjects(Transformer):
|
|
|
1724
1732
|
@v_args(meta=True)
|
|
1725
1733
|
def fmul(self, meta, args) -> Function:
|
|
1726
1734
|
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
1727
|
-
output_datatype = arg_to_datatype(args
|
|
1735
|
+
output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
|
|
1728
1736
|
return Function(
|
|
1729
1737
|
operator=FunctionType.MULTIPLY,
|
|
1730
1738
|
arguments=args,
|
|
@@ -1736,8 +1744,8 @@ class ParseToObjects(Transformer):
|
|
|
1736
1744
|
|
|
1737
1745
|
@v_args(meta=True)
|
|
1738
1746
|
def fdiv(self, meta: Meta, args):
|
|
1739
|
-
output_datatype = arg_to_datatype(args[0])
|
|
1740
1747
|
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
1748
|
+
output_datatype = merge_datatypes([arg_to_datatype(x) for x in args])
|
|
1741
1749
|
return Function(
|
|
1742
1750
|
operator=FunctionType.DIVIDE,
|
|
1743
1751
|
arguments=args,
|
|
@@ -1749,12 +1757,11 @@ class ParseToObjects(Transformer):
|
|
|
1749
1757
|
|
|
1750
1758
|
@v_args(meta=True)
|
|
1751
1759
|
def fmod(self, meta: Meta, args):
|
|
1752
|
-
output_datatype = arg_to_datatype(args[0])
|
|
1753
1760
|
args = process_function_args(args, meta=meta, environment=self.environment)
|
|
1754
1761
|
return Function(
|
|
1755
1762
|
operator=FunctionType.MOD,
|
|
1756
1763
|
arguments=args,
|
|
1757
|
-
output_datatype=
|
|
1764
|
+
output_datatype=DataType.INTEGER,
|
|
1758
1765
|
output_purpose=function_args_to_output_purpose(args),
|
|
1759
1766
|
valid_inputs=[
|
|
1760
1767
|
{DataType.INTEGER, DataType.FLOAT, DataType.NUMBER},
|
|
@@ -1781,12 +1788,15 @@ class ParseToObjects(Transformer):
|
|
|
1781
1788
|
|
|
1782
1789
|
def fcase(self, args: List[Union[CaseWhen, CaseElse]]):
|
|
1783
1790
|
datatypes = set()
|
|
1791
|
+
mapz = dict()
|
|
1784
1792
|
for arg in args:
|
|
1785
1793
|
output_datatype = arg_to_datatype(arg.expr)
|
|
1786
|
-
|
|
1794
|
+
if output_datatype != DataType.NULL:
|
|
1795
|
+
datatypes.add(output_datatype)
|
|
1796
|
+
mapz[str(arg.expr)] = output_datatype
|
|
1787
1797
|
if not len(datatypes) == 1:
|
|
1788
1798
|
raise SyntaxError(
|
|
1789
|
-
f"All case expressions must have the same output datatype, got {datatypes}"
|
|
1799
|
+
f"All case expressions must have the same output datatype, got {datatypes} from {mapz}"
|
|
1790
1800
|
)
|
|
1791
1801
|
return Function(
|
|
1792
1802
|
operator=FunctionType.CASE,
|
|
@@ -1830,6 +1840,8 @@ def unpack_visit_error(e: VisitError):
|
|
|
1830
1840
|
unpack_visit_error(e.orig_exc)
|
|
1831
1841
|
elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
|
|
1832
1842
|
raise e.orig_exc
|
|
1843
|
+
elif isinstance(e.orig_exc, SyntaxError):
|
|
1844
|
+
raise InvalidSyntaxException(str(e.orig_exc) + str(e.rule) + str(e.obj))
|
|
1833
1845
|
elif isinstance(e.orig_exc, (ValidationError, TypeError)):
|
|
1834
1846
|
raise InvalidSyntaxException(str(e.orig_exc) + str(e.rule) + str(e.obj))
|
|
1835
1847
|
raise e
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
|
|
44
44
|
query: "query" MULTILINE_STRING
|
|
45
45
|
|
|
46
|
-
concept_assignment: SHORTHAND_MODIFIER
|
|
46
|
+
concept_assignment: SHORTHAND_MODIFIER* IDENTIFIER
|
|
47
47
|
|
|
48
48
|
//column_assignment
|
|
49
49
|
//figure out if we want static
|
|
@@ -90,9 +90,9 @@
|
|
|
90
90
|
|
|
91
91
|
|
|
92
92
|
// rank/lag/lead
|
|
93
|
-
WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i) /[\s]+/
|
|
93
|
+
WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i | "avg"i | "max"i | "min"i ) /[\s]+/
|
|
94
94
|
|
|
95
|
-
window_item: WINDOW_TYPE int_lit?
|
|
95
|
+
window_item: WINDOW_TYPE int_lit? expr window_item_over? window_item_order?
|
|
96
96
|
|
|
97
97
|
window_item_over: ("OVER"i over_list)
|
|
98
98
|
|
|
@@ -150,7 +150,7 @@
|
|
|
150
150
|
|
|
151
151
|
subselect_comparison: expr array_comparison (literal | _constant_functions | _string_functions | concept_lit | filter_item | window_item | unnest | fgroup | expr_tuple | parenthetical )
|
|
152
152
|
|
|
153
|
-
expr_tuple: "(" expr ("," expr)+ ","? ")"
|
|
153
|
+
expr_tuple: ("(" expr ("," expr)+ ","? ")") | ("(" expr "," ")")
|
|
154
154
|
|
|
155
155
|
parenthetical: "(" expr ")"
|
|
156
156
|
|
|
@@ -296,7 +296,7 @@
|
|
|
296
296
|
|
|
297
297
|
MODIFIER: "Optional"i | "Partial"i | "Nullable"i
|
|
298
298
|
|
|
299
|
-
SHORTHAND_MODIFIER: "~"
|
|
299
|
+
SHORTHAND_MODIFIER: "~" | "?"
|
|
300
300
|
|
|
301
301
|
struct_type: "struct"i "<" ((data_type | IDENTIFIER) ",")* (data_type | IDENTIFIER) ","? ">"
|
|
302
302
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|