pytrilogy 0.0.3.112__py3-none-any.whl → 0.0.3.115__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.112.dist-info → pytrilogy-0.0.3.115.dist-info}/METADATA +14 -34
- {pytrilogy-0.0.3.112.dist-info → pytrilogy-0.0.3.115.dist-info}/RECORD +25 -25
- trilogy/__init__.py +1 -1
- trilogy/constants.py +28 -0
- trilogy/core/enums.py +7 -1
- trilogy/core/functions.py +51 -0
- trilogy/core/models/author.py +10 -2
- trilogy/core/models/build.py +17 -2
- trilogy/core/models/environment.py +1 -0
- trilogy/core/processing/concept_strategies_v3.py +24 -5
- trilogy/core/processing/discovery_node_factory.py +2 -2
- trilogy/core/processing/discovery_utility.py +11 -4
- trilogy/core/processing/node_generators/filter_node.py +7 -0
- trilogy/core/processing/node_generators/unnest_node.py +77 -6
- trilogy/core/statements/author.py +18 -4
- trilogy/dialect/base.py +14 -0
- trilogy/dialect/duckdb.py +12 -1
- trilogy/parsing/common.py +52 -17
- trilogy/parsing/parse_engine.py +76 -7
- trilogy/parsing/render.py +2 -1
- trilogy/parsing/trilogy.lark +15 -5
- {pytrilogy-0.0.3.112.dist-info → pytrilogy-0.0.3.115.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.112.dist-info → pytrilogy-0.0.3.115.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.112.dist-info → pytrilogy-0.0.3.115.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.112.dist-info → pytrilogy-0.0.3.115.dist-info}/top_level.txt +0 -0
|
@@ -96,6 +96,8 @@ def build_parent_concepts(
|
|
|
96
96
|
continue
|
|
97
97
|
elif global_filter_is_local_filter:
|
|
98
98
|
same_filter_optional.append(x)
|
|
99
|
+
# also append it to the parent row concepts
|
|
100
|
+
parent_row_concepts.append(x)
|
|
99
101
|
|
|
100
102
|
# sometimes, it's okay to include other local optional above the filter
|
|
101
103
|
# in case it is, prep our list
|
|
@@ -204,11 +206,16 @@ def gen_filter_node(
|
|
|
204
206
|
f"{padding(depth)}{LOGGER_PREFIX} filter node row parents {[x.address for x in parent_row_concepts]} could not be found"
|
|
205
207
|
)
|
|
206
208
|
return None
|
|
209
|
+
else:
|
|
210
|
+
logger.info(
|
|
211
|
+
f"{padding(depth)}{LOGGER_PREFIX} filter node has row parents {[x.address for x in parent_row_concepts]} from node with output [{[x.address for x in row_parent.output_concepts]}] partial {row_parent.partial_concepts}"
|
|
212
|
+
)
|
|
207
213
|
if global_filter_is_local_filter:
|
|
208
214
|
logger.info(
|
|
209
215
|
f"{padding(depth)}{LOGGER_PREFIX} filter node conditions match global conditions adding row parent {row_parent.output_concepts} with condition {where.conditional}"
|
|
210
216
|
)
|
|
211
217
|
row_parent.add_parents(core_parent_nodes)
|
|
218
|
+
# all local optional will be in the parent already, so we can set outputs
|
|
212
219
|
row_parent.set_output_concepts([concept] + local_optional)
|
|
213
220
|
return row_parent
|
|
214
221
|
if optimized_pushdown:
|
|
@@ -9,6 +9,7 @@ from trilogy.core.models.build import (
|
|
|
9
9
|
from trilogy.core.models.build_environment import BuildEnvironment
|
|
10
10
|
from trilogy.core.processing.nodes import (
|
|
11
11
|
History,
|
|
12
|
+
MergeNode,
|
|
12
13
|
StrategyNode,
|
|
13
14
|
UnnestNode,
|
|
14
15
|
WhereSafetyNode,
|
|
@@ -18,6 +19,32 @@ from trilogy.core.processing.utility import padding
|
|
|
18
19
|
LOGGER_PREFIX = "[GEN_UNNEST_NODE]"
|
|
19
20
|
|
|
20
21
|
|
|
22
|
+
def get_pseudonym_parents(
|
|
23
|
+
concept: BuildConcept,
|
|
24
|
+
local_optional: List[BuildConcept],
|
|
25
|
+
source_concepts,
|
|
26
|
+
environment: BuildEnvironment,
|
|
27
|
+
g,
|
|
28
|
+
depth,
|
|
29
|
+
history,
|
|
30
|
+
conditions,
|
|
31
|
+
) -> List[StrategyNode]:
|
|
32
|
+
for x in concept.pseudonyms:
|
|
33
|
+
attempt = source_concepts(
|
|
34
|
+
mandatory_list=[environment.alias_origin_lookup[x]] + local_optional,
|
|
35
|
+
environment=environment,
|
|
36
|
+
g=g,
|
|
37
|
+
depth=depth + 1,
|
|
38
|
+
history=history,
|
|
39
|
+
conditions=conditions,
|
|
40
|
+
accept_partial=True,
|
|
41
|
+
)
|
|
42
|
+
if not attempt:
|
|
43
|
+
continue
|
|
44
|
+
return [attempt]
|
|
45
|
+
return []
|
|
46
|
+
|
|
47
|
+
|
|
21
48
|
def gen_unnest_node(
|
|
22
49
|
concept: BuildConcept,
|
|
23
50
|
local_optional: List[BuildConcept],
|
|
@@ -29,14 +56,34 @@ def gen_unnest_node(
|
|
|
29
56
|
conditions: BuildWhereClause | None = None,
|
|
30
57
|
) -> StrategyNode | None:
|
|
31
58
|
arguments = []
|
|
59
|
+
join_nodes: list[StrategyNode] = []
|
|
32
60
|
depth_prefix = "\t" * depth
|
|
33
61
|
if isinstance(concept.lineage, BuildFunction):
|
|
34
62
|
arguments = concept.lineage.concept_arguments
|
|
63
|
+
search_optional = local_optional
|
|
64
|
+
if (not arguments) and (local_optional and concept.pseudonyms):
|
|
65
|
+
logger.info(
|
|
66
|
+
f"{padding(depth)}{LOGGER_PREFIX} unnest node for {concept} has no parents; creating solo unnest node"
|
|
67
|
+
)
|
|
68
|
+
join_nodes += get_pseudonym_parents(
|
|
69
|
+
concept,
|
|
70
|
+
local_optional,
|
|
71
|
+
source_concepts,
|
|
72
|
+
environment,
|
|
73
|
+
g,
|
|
74
|
+
depth,
|
|
75
|
+
history,
|
|
76
|
+
conditions,
|
|
77
|
+
)
|
|
78
|
+
logger.info(
|
|
79
|
+
f"{padding(depth)}{LOGGER_PREFIX} unnest node for {concept} got join nodes {join_nodes}"
|
|
80
|
+
)
|
|
81
|
+
search_optional = []
|
|
35
82
|
|
|
36
|
-
equivalent_optional = [x for x in
|
|
83
|
+
equivalent_optional = [x for x in search_optional if x.lineage == concept.lineage]
|
|
37
84
|
|
|
38
85
|
non_equivalent_optional = [
|
|
39
|
-
x for x in
|
|
86
|
+
x for x in search_optional if x not in equivalent_optional
|
|
40
87
|
]
|
|
41
88
|
all_parents = arguments + non_equivalent_optional
|
|
42
89
|
logger.info(
|
|
@@ -44,7 +91,8 @@ def gen_unnest_node(
|
|
|
44
91
|
)
|
|
45
92
|
local_conditions = False
|
|
46
93
|
expected_outputs = [concept] + local_optional
|
|
47
|
-
|
|
94
|
+
parent: StrategyNode | None = None
|
|
95
|
+
if arguments or search_optional:
|
|
48
96
|
parent = source_concepts(
|
|
49
97
|
mandatory_list=all_parents,
|
|
50
98
|
environment=environment,
|
|
@@ -86,14 +134,37 @@ def gen_unnest_node(
|
|
|
86
134
|
base = UnnestNode(
|
|
87
135
|
unnest_concepts=[concept] + equivalent_optional,
|
|
88
136
|
input_concepts=arguments + non_equivalent_optional,
|
|
89
|
-
output_concepts=[concept] +
|
|
137
|
+
output_concepts=[concept] + search_optional,
|
|
90
138
|
environment=environment,
|
|
91
139
|
parents=([parent] if parent else []),
|
|
92
140
|
)
|
|
141
|
+
|
|
142
|
+
conditional = conditions.conditional if conditions else None
|
|
143
|
+
if join_nodes:
|
|
144
|
+
logger.info(
|
|
145
|
+
f"{depth_prefix}{LOGGER_PREFIX} unnest node for {concept} needs to merge with join nodes {join_nodes}"
|
|
146
|
+
)
|
|
147
|
+
for x in join_nodes:
|
|
148
|
+
logger.info(
|
|
149
|
+
f"{depth_prefix}{LOGGER_PREFIX} join node {x} with partial {x.partial_concepts}"
|
|
150
|
+
)
|
|
151
|
+
pseudonyms = [
|
|
152
|
+
environment.alias_origin_lookup[p] for p in concept.pseudonyms
|
|
153
|
+
]
|
|
154
|
+
x.add_partial_concepts(pseudonyms)
|
|
155
|
+
return MergeNode(
|
|
156
|
+
input_concepts=base.output_concepts
|
|
157
|
+
+ [j for n in join_nodes for j in n.output_concepts],
|
|
158
|
+
output_concepts=[concept] + local_optional,
|
|
159
|
+
environment=environment,
|
|
160
|
+
parents=[base] + join_nodes,
|
|
161
|
+
conditions=conditional if local_conditions is True else None,
|
|
162
|
+
preexisting_conditions=(
|
|
163
|
+
conditional if conditional and local_conditions is False else None
|
|
164
|
+
),
|
|
165
|
+
)
|
|
93
166
|
# we need to sometimes nest an unnest node,
|
|
94
167
|
# as unnest operations are not valid in all situations
|
|
95
|
-
# TODO: inline this node when we can detect it's safe
|
|
96
|
-
conditional = conditions.conditional if conditions else None
|
|
97
168
|
new = WhereSafetyNode(
|
|
98
169
|
input_concepts=base.output_concepts,
|
|
99
170
|
output_concepts=base.output_concepts,
|
|
@@ -31,6 +31,7 @@ from trilogy.core.models.author import (
|
|
|
31
31
|
Metadata,
|
|
32
32
|
MultiSelectLineage,
|
|
33
33
|
OrderBy,
|
|
34
|
+
Parenthetical,
|
|
34
35
|
SelectLineage,
|
|
35
36
|
UndefinedConcept,
|
|
36
37
|
WhereClause,
|
|
@@ -48,7 +49,12 @@ from trilogy.utility import unique
|
|
|
48
49
|
|
|
49
50
|
class ConceptTransform(BaseModel):
|
|
50
51
|
function: (
|
|
51
|
-
Function
|
|
52
|
+
Function
|
|
53
|
+
| FilterItem
|
|
54
|
+
| WindowItem
|
|
55
|
+
| AggregateWrapper
|
|
56
|
+
| FunctionCallWrapper
|
|
57
|
+
| Parenthetical
|
|
52
58
|
)
|
|
53
59
|
output: Concept # this has to be a full concept, as it may not exist in environment
|
|
54
60
|
modifiers: List[Modifier] = Field(default_factory=list)
|
|
@@ -190,9 +196,17 @@ class SelectStatement(HasUUID, SelectTypeMixin, BaseModel):
|
|
|
190
196
|
if self.where_clause:
|
|
191
197
|
for x in self.where_clause.concept_arguments:
|
|
192
198
|
if isinstance(x, UndefinedConcept):
|
|
193
|
-
environment.concepts.
|
|
194
|
-
|
|
195
|
-
|
|
199
|
+
validate = environment.concepts.get(x.address)
|
|
200
|
+
if validate and self.where_clause:
|
|
201
|
+
self.where_clause = (
|
|
202
|
+
self.where_clause.with_reference_replacement(
|
|
203
|
+
x.address, validate.reference
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
environment.concepts.raise_undefined(
|
|
208
|
+
x.address, x.metadata.line_number if x.metadata else None
|
|
209
|
+
)
|
|
196
210
|
all_in_output = [x for x in self.output_components]
|
|
197
211
|
if self.where_clause:
|
|
198
212
|
for cref in self.where_clause.concept_arguments:
|
trilogy/dialect/base.py
CHANGED
|
@@ -194,6 +194,13 @@ FUNCTION_MAP = {
|
|
|
194
194
|
FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
195
195
|
FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
196
196
|
FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
|
|
197
|
+
FunctionType.DATE_SPINE: lambda x: f"""unnest(
|
|
198
|
+
generate_series(
|
|
199
|
+
{x[0]},
|
|
200
|
+
{x[1]},
|
|
201
|
+
INTERVAL '1 day'
|
|
202
|
+
)
|
|
203
|
+
)""",
|
|
197
204
|
FunctionType.RECURSE_EDGE: lambda x: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
|
|
198
205
|
FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
|
|
199
206
|
FunctionType.STRUCT: lambda x: f"{{{', '.join(struct_arg(x))}}}",
|
|
@@ -213,6 +220,9 @@ FUNCTION_MAP = {
|
|
|
213
220
|
FunctionType.ARRAY_TO_STRING: lambda args: (
|
|
214
221
|
f"array_to_string({args[0]}, {args[1]})"
|
|
215
222
|
),
|
|
223
|
+
FunctionType.ARRAY_FILTER: lambda args: (
|
|
224
|
+
f"array_filter({args[0]}, {args[1]} -> {args[2]})"
|
|
225
|
+
),
|
|
216
226
|
# math
|
|
217
227
|
FunctionType.ADD: lambda x: " + ".join(x),
|
|
218
228
|
FunctionType.ABS: lambda x: f"abs({x[0]})",
|
|
@@ -237,6 +247,7 @@ FUNCTION_MAP = {
|
|
|
237
247
|
FunctionType.AVG: lambda x: f"avg({x[0]})",
|
|
238
248
|
FunctionType.MAX: lambda x: f"max({x[0]})",
|
|
239
249
|
FunctionType.MIN: lambda x: f"min({x[0]})",
|
|
250
|
+
FunctionType.ANY: lambda x: f"any_value({x[0]})",
|
|
240
251
|
# string types
|
|
241
252
|
FunctionType.LIKE: lambda x: f" {x[0]} like {x[1]} ",
|
|
242
253
|
FunctionType.UPPER: lambda x: f"UPPER({x[0]}) ",
|
|
@@ -263,9 +274,11 @@ FUNCTION_MAP = {
|
|
|
263
274
|
FunctionType.MINUTE: lambda x: f"minute({x[0]})",
|
|
264
275
|
FunctionType.HOUR: lambda x: f"hour({x[0]})",
|
|
265
276
|
FunctionType.DAY: lambda x: f"day({x[0]})",
|
|
277
|
+
FunctionType.DAY_NAME: lambda x: f"dayname({x[0]})",
|
|
266
278
|
FunctionType.DAY_OF_WEEK: lambda x: f"day_of_week({x[0]})",
|
|
267
279
|
FunctionType.WEEK: lambda x: f"week({x[0]})",
|
|
268
280
|
FunctionType.MONTH: lambda x: f"month({x[0]})",
|
|
281
|
+
FunctionType.MONTH_NAME: lambda x: f"monthname({x[0]})",
|
|
269
282
|
FunctionType.QUARTER: lambda x: f"quarter({x[0]})",
|
|
270
283
|
FunctionType.YEAR: lambda x: f"year({x[0]})",
|
|
271
284
|
# string types
|
|
@@ -283,6 +296,7 @@ FUNCTION_GRAIN_MATCH_MAP = {
|
|
|
283
296
|
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
284
297
|
FunctionType.MAX: lambda args: f"{args[0]}",
|
|
285
298
|
FunctionType.MIN: lambda args: f"{args[0]}",
|
|
299
|
+
FunctionType.ANY: lambda args: f"{args[0]}",
|
|
286
300
|
}
|
|
287
301
|
|
|
288
302
|
|
trilogy/dialect/duckdb.py
CHANGED
|
@@ -57,6 +57,15 @@ def render_log(args):
|
|
|
57
57
|
raise ValueError("log function requires 1 or 2 arguments")
|
|
58
58
|
|
|
59
59
|
|
|
60
|
+
def map_date_part_specifier(specifier: str) -> str:
|
|
61
|
+
"""Map date part specifiers to DuckDB-compatible names"""
|
|
62
|
+
mapping = {
|
|
63
|
+
"day_of_week": "dow",
|
|
64
|
+
# Add other mappings if needed
|
|
65
|
+
}
|
|
66
|
+
return mapping.get(specifier, specifier)
|
|
67
|
+
|
|
68
|
+
|
|
60
69
|
FUNCTION_MAP = {
|
|
61
70
|
FunctionType.COUNT: lambda args: f"count({args[0]})",
|
|
62
71
|
FunctionType.SUM: lambda args: f"sum({args[0]})",
|
|
@@ -84,11 +93,13 @@ FUNCTION_MAP = {
|
|
|
84
93
|
FunctionType.ARRAY_AGG: lambda args: f"array_agg({args[0]})",
|
|
85
94
|
# datetime is aliased
|
|
86
95
|
FunctionType.CURRENT_DATETIME: lambda x: "cast(get_current_timestamp() as datetime)",
|
|
96
|
+
FunctionType.DATETIME: lambda x: f"cast({x[0]} as datetime)",
|
|
97
|
+
FunctionType.TIMESTAMP: lambda x: f"cast({x[0]} as timestamp)",
|
|
87
98
|
FunctionType.DATE: lambda x: f"cast({x[0]} as date)",
|
|
88
99
|
FunctionType.DATE_TRUNCATE: lambda x: f"date_trunc('{x[1]}', {x[0]})",
|
|
89
100
|
FunctionType.DATE_ADD: lambda x: f"date_add({x[0]}, {x[2]} * INTERVAL 1 {x[1]})",
|
|
90
101
|
FunctionType.DATE_SUB: lambda x: f"date_add({x[0]}, -{x[2]} * INTERVAL 1 {x[1]})",
|
|
91
|
-
FunctionType.DATE_PART: lambda x: f"date_part('{x[1]}', {x[0]})",
|
|
102
|
+
FunctionType.DATE_PART: lambda x: f"date_part('{map_date_part_specifier(x[1])}', {x[0]})",
|
|
92
103
|
FunctionType.DATE_DIFF: lambda x: f"date_diff('{x[2]}', {x[0]}, {x[1]})",
|
|
93
104
|
FunctionType.CONCAT: lambda x: f"({' || '.join(x)})",
|
|
94
105
|
FunctionType.DATE_LITERAL: lambda x: f"date '{x}'",
|
trilogy/parsing/common.py
CHANGED
|
@@ -55,6 +55,21 @@ from trilogy.core.models.environment import Environment
|
|
|
55
55
|
from trilogy.core.statements.author import RowsetDerivationStatement, SelectStatement
|
|
56
56
|
from trilogy.utility import string_to_hash, unique
|
|
57
57
|
|
|
58
|
+
ARBITRARY_INPUTS = (
|
|
59
|
+
AggregateWrapper
|
|
60
|
+
| FunctionCallWrapper
|
|
61
|
+
| WindowItem
|
|
62
|
+
| FilterItem
|
|
63
|
+
| Function
|
|
64
|
+
| Parenthetical
|
|
65
|
+
| ListWrapper
|
|
66
|
+
| MapWrapper
|
|
67
|
+
| int
|
|
68
|
+
| float
|
|
69
|
+
| str
|
|
70
|
+
| date
|
|
71
|
+
)
|
|
72
|
+
|
|
58
73
|
|
|
59
74
|
def process_function_arg(
|
|
60
75
|
arg,
|
|
@@ -76,7 +91,7 @@ def process_function_arg(
|
|
|
76
91
|
# to simplify anonymous function handling
|
|
77
92
|
if (
|
|
78
93
|
arg.operator not in FunctionClass.AGGREGATE_FUNCTIONS.value
|
|
79
|
-
and arg.operator
|
|
94
|
+
and arg.operator not in FunctionClass.ONE_TO_MANY.value
|
|
80
95
|
):
|
|
81
96
|
return arg
|
|
82
97
|
id_hash = string_to_hash(str(arg))
|
|
@@ -296,13 +311,18 @@ def concept_is_relevant(
|
|
|
296
311
|
if concept.purpose in (Purpose.METRIC,):
|
|
297
312
|
if all([c in others for c in concept.grain.components]):
|
|
298
313
|
return False
|
|
314
|
+
if (
|
|
315
|
+
concept.derivation in (Derivation.BASIC,)
|
|
316
|
+
and isinstance(concept.lineage, Function)
|
|
317
|
+
and concept.lineage.operator == FunctionType.DATE_SPINE
|
|
318
|
+
):
|
|
319
|
+
return True
|
|
299
320
|
if concept.derivation in (Derivation.BASIC,) and isinstance(
|
|
300
321
|
concept.lineage, (Function, CaseWhen)
|
|
301
322
|
):
|
|
302
323
|
relevant = False
|
|
303
324
|
for arg in concept.lineage.arguments:
|
|
304
325
|
relevant = atom_is_relevant(arg, others, environment) or relevant
|
|
305
|
-
|
|
306
326
|
return relevant
|
|
307
327
|
if concept.derivation in (Derivation.BASIC,) and isinstance(
|
|
308
328
|
concept.lineage, Parenthetical
|
|
@@ -514,7 +534,7 @@ def function_to_concept(
|
|
|
514
534
|
elif parent.operator == FunctionType.UNION:
|
|
515
535
|
derivation = Derivation.UNION
|
|
516
536
|
granularity = Granularity.MULTI_ROW
|
|
517
|
-
elif parent.operator
|
|
537
|
+
elif parent.operator in FunctionClass.ONE_TO_MANY.value:
|
|
518
538
|
derivation = Derivation.UNNEST
|
|
519
539
|
granularity = Granularity.MULTI_ROW
|
|
520
540
|
elif parent.operator == FunctionType.RECURSE_EDGE:
|
|
@@ -625,7 +645,7 @@ def window_item_to_concept(
|
|
|
625
645
|
fmetadata = metadata or Metadata()
|
|
626
646
|
if not isinstance(parent.content, ConceptRef):
|
|
627
647
|
raise NotImplementedError(
|
|
628
|
-
f"Window function
|
|
648
|
+
f"Window function with non ref content {parent.content} not yet supported"
|
|
629
649
|
)
|
|
630
650
|
bcontent = environment.concepts[parent.content.address]
|
|
631
651
|
if isinstance(bcontent, UndefinedConcept):
|
|
@@ -844,6 +864,7 @@ def generate_concept_name(
|
|
|
844
864
|
| Function
|
|
845
865
|
| ListWrapper
|
|
846
866
|
| MapWrapper
|
|
867
|
+
| Parenthetical
|
|
847
868
|
| int
|
|
848
869
|
| float
|
|
849
870
|
| str
|
|
@@ -865,24 +886,36 @@ def generate_concept_name(
|
|
|
865
886
|
return f"{VIRTUAL_CONCEPT_PREFIX}_group_to_{string_to_hash(str(parent))}"
|
|
866
887
|
else:
|
|
867
888
|
return f"{VIRTUAL_CONCEPT_PREFIX}_func_{parent.operator.value}_{string_to_hash(str(parent))}"
|
|
889
|
+
elif isinstance(parent, Parenthetical):
|
|
890
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_paren_{string_to_hash(str(parent))}"
|
|
891
|
+
elif isinstance(parent, FunctionCallWrapper):
|
|
892
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_{parent.name}_{string_to_hash(str(parent))}"
|
|
868
893
|
else: # ListWrapper, MapWrapper, or primitive types
|
|
869
894
|
return f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
|
|
870
895
|
|
|
871
896
|
|
|
897
|
+
def parenthetical_to_concept(
|
|
898
|
+
parent: Parenthetical,
|
|
899
|
+
name: str,
|
|
900
|
+
namespace: str,
|
|
901
|
+
environment: Environment,
|
|
902
|
+
metadata: Metadata | None = None,
|
|
903
|
+
) -> Concept:
|
|
904
|
+
if isinstance(
|
|
905
|
+
parent.content,
|
|
906
|
+
ARBITRARY_INPUTS,
|
|
907
|
+
):
|
|
908
|
+
|
|
909
|
+
return arbitrary_to_concept(
|
|
910
|
+
parent.content, environment, namespace, name, metadata
|
|
911
|
+
)
|
|
912
|
+
raise NotImplementedError(
|
|
913
|
+
f"Parenthetical with non-supported content {parent.content} ({type(parent.content)}) not yet supported"
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
|
|
872
917
|
def arbitrary_to_concept(
|
|
873
|
-
parent:
|
|
874
|
-
AggregateWrapper
|
|
875
|
-
| FunctionCallWrapper
|
|
876
|
-
| WindowItem
|
|
877
|
-
| FilterItem
|
|
878
|
-
| Function
|
|
879
|
-
| ListWrapper
|
|
880
|
-
| MapWrapper
|
|
881
|
-
| int
|
|
882
|
-
| float
|
|
883
|
-
| str
|
|
884
|
-
| date
|
|
885
|
-
),
|
|
918
|
+
parent: ARBITRARY_INPUTS,
|
|
886
919
|
environment: Environment,
|
|
887
920
|
namespace: str | None = None,
|
|
888
921
|
name: str | None = None,
|
|
@@ -938,5 +971,7 @@ def arbitrary_to_concept(
|
|
|
938
971
|
)
|
|
939
972
|
elif isinstance(parent, ListWrapper):
|
|
940
973
|
return constant_to_concept(parent, name, namespace, metadata)
|
|
974
|
+
elif isinstance(parent, Parenthetical):
|
|
975
|
+
return parenthetical_to_concept(parent, name, namespace, environment, metadata)
|
|
941
976
|
else:
|
|
942
977
|
return constant_to_concept(parent, name, namespace, metadata)
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -223,7 +223,14 @@ def expr_to_boolean(
|
|
|
223
223
|
def unwrap_transformation(
|
|
224
224
|
input: Expr,
|
|
225
225
|
environment: Environment,
|
|
226
|
-
) ->
|
|
226
|
+
) -> (
|
|
227
|
+
Function
|
|
228
|
+
| FilterItem
|
|
229
|
+
| WindowItem
|
|
230
|
+
| AggregateWrapper
|
|
231
|
+
| FunctionCallWrapper
|
|
232
|
+
| Parenthetical
|
|
233
|
+
):
|
|
227
234
|
if isinstance(input, Function):
|
|
228
235
|
return input
|
|
229
236
|
elif isinstance(input, AggregateWrapper):
|
|
@@ -243,7 +250,7 @@ def unwrap_transformation(
|
|
|
243
250
|
elif isinstance(input, FunctionCallWrapper):
|
|
244
251
|
return input
|
|
245
252
|
elif isinstance(input, Parenthetical):
|
|
246
|
-
return
|
|
253
|
+
return input
|
|
247
254
|
else:
|
|
248
255
|
return Function.model_construct(
|
|
249
256
|
operator=FunctionType.CONSTANT,
|
|
@@ -779,7 +786,6 @@ class ParseToObjects(Transformer):
|
|
|
779
786
|
lookup, namespace, name, parent = parse_concept_reference(
|
|
780
787
|
name, self.environment
|
|
781
788
|
)
|
|
782
|
-
|
|
783
789
|
concept = Concept(
|
|
784
790
|
name=name,
|
|
785
791
|
datatype=arg_to_datatype(constant),
|
|
@@ -1813,6 +1819,10 @@ class ParseToObjects(Transformer):
|
|
|
1813
1819
|
def array_agg(self, meta, args):
|
|
1814
1820
|
return self.function_factory.create_function(args, FunctionType.ARRAY_AGG, meta)
|
|
1815
1821
|
|
|
1822
|
+
@v_args(meta=True)
|
|
1823
|
+
def any(self, meta, args):
|
|
1824
|
+
return self.function_factory.create_function(args, FunctionType.ANY, meta)
|
|
1825
|
+
|
|
1816
1826
|
@v_args(meta=True)
|
|
1817
1827
|
def avg(self, meta, args):
|
|
1818
1828
|
return self.function_factory.create_function(args, FunctionType.AVG, meta)
|
|
@@ -1953,6 +1963,10 @@ class ParseToObjects(Transformer):
|
|
|
1953
1963
|
def fday(self, meta, args):
|
|
1954
1964
|
return self.function_factory.create_function(args, FunctionType.DAY, meta)
|
|
1955
1965
|
|
|
1966
|
+
@v_args(meta=True)
|
|
1967
|
+
def fday_name(self, meta, args):
|
|
1968
|
+
return self.function_factory.create_function(args, FunctionType.DAY_NAME, meta)
|
|
1969
|
+
|
|
1956
1970
|
@v_args(meta=True)
|
|
1957
1971
|
def fday_of_week(self, meta, args):
|
|
1958
1972
|
return self.function_factory.create_function(
|
|
@@ -1967,6 +1981,12 @@ class ParseToObjects(Transformer):
|
|
|
1967
1981
|
def fmonth(self, meta, args):
|
|
1968
1982
|
return self.function_factory.create_function(args, FunctionType.MONTH, meta)
|
|
1969
1983
|
|
|
1984
|
+
@v_args(meta=True)
|
|
1985
|
+
def fmonth_name(self, meta, args):
|
|
1986
|
+
return self.function_factory.create_function(
|
|
1987
|
+
args, FunctionType.MONTH_NAME, meta
|
|
1988
|
+
)
|
|
1989
|
+
|
|
1970
1990
|
@v_args(meta=True)
|
|
1971
1991
|
def fquarter(self, meta, args):
|
|
1972
1992
|
return self.function_factory.create_function(args, FunctionType.QUARTER, meta)
|
|
@@ -2006,6 +2026,12 @@ class ParseToObjects(Transformer):
|
|
|
2006
2026
|
)
|
|
2007
2027
|
return self.function_factory.create_function(args, FunctionType.CAST, meta)
|
|
2008
2028
|
|
|
2029
|
+
@v_args(meta=True)
|
|
2030
|
+
def fdate_spine(self, meta, args) -> Function:
|
|
2031
|
+
return self.function_factory.create_function(
|
|
2032
|
+
args, FunctionType.DATE_SPINE, meta
|
|
2033
|
+
)
|
|
2034
|
+
|
|
2009
2035
|
# utility functions
|
|
2010
2036
|
@v_args(meta=True)
|
|
2011
2037
|
def fcast(self, meta, args) -> Function:
|
|
@@ -2175,6 +2201,33 @@ class ParseToObjects(Transformer):
|
|
|
2175
2201
|
meta,
|
|
2176
2202
|
)
|
|
2177
2203
|
|
|
2204
|
+
@v_args(meta=True)
|
|
2205
|
+
def farray_filter(self, meta, args) -> Function:
|
|
2206
|
+
factory: CustomFunctionFactory = args[1]
|
|
2207
|
+
if not len(factory.function_arguments) == 1:
|
|
2208
|
+
raise InvalidSyntaxException(
|
|
2209
|
+
"Array filter function must have exactly one argument;"
|
|
2210
|
+
)
|
|
2211
|
+
array_type = arg_to_datatype(args[0])
|
|
2212
|
+
if not isinstance(array_type, ArrayType):
|
|
2213
|
+
raise InvalidSyntaxException(
|
|
2214
|
+
f"Array filter function must be applied to an array, not {array_type}"
|
|
2215
|
+
)
|
|
2216
|
+
return self.function_factory.create_function(
|
|
2217
|
+
[
|
|
2218
|
+
args[0],
|
|
2219
|
+
factory.function_arguments[0],
|
|
2220
|
+
factory(
|
|
2221
|
+
ArgBinding(
|
|
2222
|
+
name=factory.function_arguments[0].name,
|
|
2223
|
+
datatype=array_type.value_data_type,
|
|
2224
|
+
)
|
|
2225
|
+
),
|
|
2226
|
+
],
|
|
2227
|
+
FunctionType.ARRAY_FILTER,
|
|
2228
|
+
meta,
|
|
2229
|
+
)
|
|
2230
|
+
|
|
2178
2231
|
|
|
2179
2232
|
def unpack_visit_error(e: VisitError, text: str | None = None):
|
|
2180
2233
|
"""This is required to get exceptions from imports, which would
|
|
@@ -2211,6 +2264,7 @@ ERROR_CODES: dict[int, str] = {
|
|
|
2211
2264
|
101: "Using FROM keyword? Trilogy does not have a FROM clause (Datasource resolution is automatic).",
|
|
2212
2265
|
# 200 codes relate to required explicit syntax (we could loosen these?)
|
|
2213
2266
|
201: 'Missing alias? Alias must be specified with "AS" - e.g. `SELECT x+1 AS y`',
|
|
2267
|
+
202: "Missing closing semicolon? Statements must be terminated with a semicolon `;`.",
|
|
2214
2268
|
210: "Missing order direction? Order by must be explicit about direction - specify `asc` or `desc`.",
|
|
2215
2269
|
}
|
|
2216
2270
|
|
|
@@ -2291,7 +2345,7 @@ def parse_text(
|
|
|
2291
2345
|
)
|
|
2292
2346
|
|
|
2293
2347
|
def _handle_unexpected_token(e: UnexpectedToken, text: str) -> None:
|
|
2294
|
-
"""Handle UnexpectedToken errors
|
|
2348
|
+
"""Handle UnexpectedToken errors to make friendlier error messages."""
|
|
2295
2349
|
# Handle ordering direction error
|
|
2296
2350
|
pos = e.pos_in_stream or 0
|
|
2297
2351
|
if e.expected == {"ORDERING_DIRECTION"}:
|
|
@@ -2303,12 +2357,27 @@ def parse_text(
|
|
|
2303
2357
|
)
|
|
2304
2358
|
if parsed_tokens == ["FROM"]:
|
|
2305
2359
|
raise _create_syntax_error(101, pos, text)
|
|
2306
|
-
|
|
2307
|
-
|
|
2360
|
+
# check if they are missing a semicolon
|
|
2361
|
+
try:
|
|
2362
|
+
e.interactive_parser.feed_token(Token("_TERMINATOR", ";"))
|
|
2363
|
+
state = e.interactive_parser.lexer_thread.state
|
|
2364
|
+
if state and state.last_token:
|
|
2365
|
+
new_pos = state.last_token.end_pos or pos
|
|
2366
|
+
else:
|
|
2367
|
+
new_pos = pos
|
|
2368
|
+
raise _create_syntax_error(202, new_pos, text)
|
|
2369
|
+
except UnexpectedToken:
|
|
2370
|
+
pass
|
|
2371
|
+
# check if they forgot an as
|
|
2308
2372
|
try:
|
|
2309
2373
|
e.interactive_parser.feed_token(Token("AS", "AS"))
|
|
2374
|
+
state = e.interactive_parser.lexer_thread.state
|
|
2375
|
+
if state and state.last_token:
|
|
2376
|
+
new_pos = state.last_token.end_pos or pos
|
|
2377
|
+
else:
|
|
2378
|
+
new_pos = pos
|
|
2310
2379
|
e.interactive_parser.feed_token(Token("IDENTIFIER", e.token.value))
|
|
2311
|
-
raise _create_syntax_error(201,
|
|
2380
|
+
raise _create_syntax_error(201, new_pos, text)
|
|
2312
2381
|
except UnexpectedToken:
|
|
2313
2382
|
pass
|
|
2314
2383
|
|
trilogy/parsing/render.py
CHANGED
|
@@ -662,7 +662,8 @@ class Renderer:
|
|
|
662
662
|
pair_strings.append(self.indent_lines(pair_line))
|
|
663
663
|
inputs = ",\n".join(pair_strings)
|
|
664
664
|
return f"struct(\n{inputs}\n{self.indent_context.current_indent})"
|
|
665
|
-
|
|
665
|
+
if arg.operator == FunctionType.ALIAS:
|
|
666
|
+
return f"{self.to_string(arg.arguments[0])}"
|
|
666
667
|
inputs = ",".join(args)
|
|
667
668
|
return f"{arg.operator.value}({inputs})"
|
|
668
669
|
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -113,7 +113,7 @@
|
|
|
113
113
|
filter_item: _filter_base | _filter_alt
|
|
114
114
|
|
|
115
115
|
// rank/lag/lead
|
|
116
|
-
WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i | "avg"i | "max"i | "min"i ) /[\s]+/
|
|
116
|
+
WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i | "avg"i | "max"i | "min"i | "count"i ) /[\s]+/
|
|
117
117
|
|
|
118
118
|
window_item_over: ("OVER"i over_list)
|
|
119
119
|
|
|
@@ -323,8 +323,10 @@
|
|
|
323
323
|
_ARRAY_TRANSFORM.1: "array_transform("i
|
|
324
324
|
transform_lambda: "@" IDENTIFIER
|
|
325
325
|
farray_transform: _ARRAY_TRANSFORM expr "," transform_lambda ")"
|
|
326
|
+
_ARRAY_FILTER.1: "array_filter("i
|
|
327
|
+
farray_filter: _ARRAY_FILTER expr "," transform_lambda ")"
|
|
326
328
|
|
|
327
|
-
_array_functions: farray_sum | farray_distinct | farray_sort | farray_transform | farray_to_string
|
|
329
|
+
_array_functions: farray_sum | farray_distinct | farray_sort | farray_transform | farray_to_string | farray_filter
|
|
328
330
|
|
|
329
331
|
//map_functions
|
|
330
332
|
_MAP_KEYS.1: "map_keys("i
|
|
@@ -358,11 +360,13 @@
|
|
|
358
360
|
min: _MIN expr ")"
|
|
359
361
|
_ARRAY_AGG.1: "array_agg("i
|
|
360
362
|
array_agg: _ARRAY_AGG expr ")"
|
|
363
|
+
_ANY.1: "any("i
|
|
364
|
+
any: _ANY expr ")"
|
|
361
365
|
|
|
362
366
|
//aggregates can force a grain
|
|
363
367
|
aggregate_all: "*"
|
|
364
368
|
aggregate_over: ("BY"i (aggregate_all | over_list))
|
|
365
|
-
aggregate_functions: (count | count_distinct | sum | avg | max | min | array_agg) aggregate_over?
|
|
369
|
+
aggregate_functions: (count | count_distinct | sum | avg | max | min | array_agg | any) aggregate_over?
|
|
366
370
|
|
|
367
371
|
// date functions
|
|
368
372
|
_DATE.1: "date("i
|
|
@@ -379,12 +383,16 @@
|
|
|
379
383
|
fhour: _HOUR expr ")"
|
|
380
384
|
_DAY.1: "day("i
|
|
381
385
|
fday: _DAY expr ")"
|
|
386
|
+
_DAY_NAME.1: "day_name("i
|
|
387
|
+
fday_name: _DAY_NAME expr ")"
|
|
382
388
|
_DAY_OF_WEEK.1: "day_of_week("i
|
|
383
389
|
fday_of_week: _DAY_OF_WEEK expr ")"
|
|
384
390
|
_WEEK.1: "week("i
|
|
385
391
|
fweek: _WEEK expr ")"
|
|
386
392
|
_MONTH.1: "month("i
|
|
387
393
|
fmonth: _MONTH expr ")"
|
|
394
|
+
_MONTH_NAME.1: "month_name("i
|
|
395
|
+
fmonth_name: _MONTH_NAME expr ")"
|
|
388
396
|
_QUARTER.1: "quarter("i
|
|
389
397
|
fquarter: _QUARTER expr ")"
|
|
390
398
|
_YEAR.1: "year("i
|
|
@@ -401,8 +409,10 @@
|
|
|
401
409
|
fdate_sub: _DATE_SUB expr "," DATE_PART "," expr ")"
|
|
402
410
|
_DATE_DIFF.1: "date_diff("i
|
|
403
411
|
fdate_diff: _DATE_DIFF expr "," expr "," DATE_PART ")"
|
|
412
|
+
_DATE_SPINE.1: "date_spine("i
|
|
413
|
+
fdate_spine: _DATE_SPINE expr "," expr ")"
|
|
404
414
|
|
|
405
|
-
_date_functions: fdate | fdate_add | fdate_sub | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday | fday_of_week | fweek | fmonth | fquarter | fyear | fdate_part | fdate_trunc
|
|
415
|
+
_date_functions: fdate | fdate_add | fdate_sub | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday |fday_name | fday_of_week | fweek | fmonth | fmonth_name | fquarter | fyear | fdate_part | fdate_trunc | fdate_spine
|
|
406
416
|
|
|
407
417
|
_static_functions: _string_functions | _math_functions | _array_functions | _map_functions
|
|
408
418
|
|
|
@@ -430,7 +440,7 @@
|
|
|
430
440
|
|
|
431
441
|
float_lit: /\-?[0-9]*\.[0-9]+/
|
|
432
442
|
|
|
433
|
-
array_lit: "[" (
|
|
443
|
+
array_lit: "[" (expr ",")* expr ","? "]"()
|
|
434
444
|
|
|
435
445
|
tuple_lit: "(" (literal ",")* literal ","? ")"
|
|
436
446
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|