pytrilogy 0.0.3.113__py3-none-any.whl → 0.0.3.116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/RECORD +30 -30
- trilogy/__init__.py +1 -1
- trilogy/constants.py +29 -0
- trilogy/core/enums.py +6 -1
- trilogy/core/functions.py +33 -0
- trilogy/core/models/author.py +126 -2
- trilogy/core/models/build.py +70 -7
- trilogy/core/models/environment.py +2 -1
- trilogy/core/optimization.py +3 -2
- trilogy/core/optimizations/hide_unused_concept.py +1 -5
- trilogy/core/processing/concept_strategies_v3.py +26 -5
- trilogy/core/processing/discovery_node_factory.py +2 -2
- trilogy/core/processing/discovery_utility.py +11 -4
- trilogy/core/processing/node_generators/basic_node.py +26 -15
- trilogy/core/processing/node_generators/common.py +4 -1
- trilogy/core/processing/node_generators/filter_node.py +7 -0
- trilogy/core/processing/node_generators/multiselect_node.py +3 -3
- trilogy/core/processing/node_generators/unnest_node.py +77 -6
- trilogy/core/statements/author.py +4 -1
- trilogy/dialect/base.py +42 -2
- trilogy/executor.py +1 -1
- trilogy/parsing/common.py +117 -20
- trilogy/parsing/parse_engine.py +115 -5
- trilogy/parsing/render.py +2 -1
- trilogy/parsing/trilogy.lark +20 -7
- {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.113.dist-info → pytrilogy-0.0.3.116.dist-info}/top_level.txt +0 -0
trilogy/dialect/base.py
CHANGED
|
@@ -176,6 +176,20 @@ def struct_arg(args):
|
|
|
176
176
|
return [f"{x[1]}: {x[0]}" for x in zip(args[::2], args[1::2])]
|
|
177
177
|
|
|
178
178
|
|
|
179
|
+
def hash_from_args(val, hash_type):
|
|
180
|
+
hash_type = hash_type[1:-1]
|
|
181
|
+
if hash_type.lower() == "md5":
|
|
182
|
+
return f"md5({val})"
|
|
183
|
+
elif hash_type.lower() == "sha1":
|
|
184
|
+
return f"sha1({val})"
|
|
185
|
+
elif hash_type.lower() == "sha256":
|
|
186
|
+
return f"sha256({val})"
|
|
187
|
+
elif hash_type.lower() == "sha512":
|
|
188
|
+
return f"sha512({val})"
|
|
189
|
+
else:
|
|
190
|
+
raise ValueError(f"Unsupported hash type: {hash_type}")
|
|
191
|
+
|
|
192
|
+
|
|
179
193
|
FUNCTION_MAP = {
|
|
180
194
|
# generic types
|
|
181
195
|
FunctionType.ALIAS: lambda x: f"{x[0]}",
|
|
@@ -194,6 +208,13 @@ FUNCTION_MAP = {
|
|
|
194
208
|
FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
195
209
|
FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
196
210
|
FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
|
|
211
|
+
FunctionType.DATE_SPINE: lambda x: f"""unnest(
|
|
212
|
+
generate_series(
|
|
213
|
+
{x[0]},
|
|
214
|
+
{x[1]},
|
|
215
|
+
INTERVAL '1 day'
|
|
216
|
+
)
|
|
217
|
+
)""",
|
|
197
218
|
FunctionType.RECURSE_EDGE: lambda x: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
|
|
198
219
|
FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
|
|
199
220
|
FunctionType.STRUCT: lambda x: f"{{{', '.join(struct_arg(x))}}}",
|
|
@@ -213,6 +234,9 @@ FUNCTION_MAP = {
|
|
|
213
234
|
FunctionType.ARRAY_TO_STRING: lambda args: (
|
|
214
235
|
f"array_to_string({args[0]}, {args[1]})"
|
|
215
236
|
),
|
|
237
|
+
FunctionType.ARRAY_FILTER: lambda args: (
|
|
238
|
+
f"array_filter({args[0]}, {args[1]} -> {args[2]})"
|
|
239
|
+
),
|
|
216
240
|
# math
|
|
217
241
|
FunctionType.ADD: lambda x: " + ".join(x),
|
|
218
242
|
FunctionType.ABS: lambda x: f"abs({x[0]})",
|
|
@@ -237,6 +261,7 @@ FUNCTION_MAP = {
|
|
|
237
261
|
FunctionType.AVG: lambda x: f"avg({x[0]})",
|
|
238
262
|
FunctionType.MAX: lambda x: f"max({x[0]})",
|
|
239
263
|
FunctionType.MIN: lambda x: f"min({x[0]})",
|
|
264
|
+
FunctionType.ANY: lambda x: f"any_value({x[0]})",
|
|
240
265
|
# string types
|
|
241
266
|
FunctionType.LIKE: lambda x: f" {x[0]} like {x[1]} ",
|
|
242
267
|
FunctionType.UPPER: lambda x: f"UPPER({x[0]}) ",
|
|
@@ -249,6 +274,7 @@ FUNCTION_MAP = {
|
|
|
249
274
|
FunctionType.REGEXP_REPLACE: lambda x: f"REGEXP_REPLACE({x[0]},{x[1]}, {x[2]})",
|
|
250
275
|
FunctionType.TRIM: lambda x: f"TRIM({x[0]})",
|
|
251
276
|
FunctionType.REPLACE: lambda x: f"REPLACE({x[0]},{x[1]},{x[2]})",
|
|
277
|
+
FunctionType.HASH: lambda x: hash_from_args(x[0], x[1]),
|
|
252
278
|
# FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
|
|
253
279
|
# date types
|
|
254
280
|
FunctionType.DATE_TRUNCATE: lambda x: f"date_trunc({x[0]},{x[1]})",
|
|
@@ -285,6 +311,7 @@ FUNCTION_GRAIN_MATCH_MAP = {
|
|
|
285
311
|
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
286
312
|
FunctionType.MAX: lambda args: f"{args[0]}",
|
|
287
313
|
FunctionType.MIN: lambda args: f"{args[0]}",
|
|
314
|
+
FunctionType.ANY: lambda args: f"{args[0]}",
|
|
288
315
|
}
|
|
289
316
|
|
|
290
317
|
|
|
@@ -472,7 +499,20 @@ class BaseDialect:
|
|
|
472
499
|
elif isinstance(c.lineage, BuildRowsetItem):
|
|
473
500
|
rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False, raise_invalid=raise_invalid)}"
|
|
474
501
|
elif isinstance(c.lineage, BuildMultiSelectLineage):
|
|
475
|
-
|
|
502
|
+
if c.address in c.lineage.calculated_derivations:
|
|
503
|
+
assert c.lineage.derive is not None
|
|
504
|
+
for x in c.lineage.derive.items:
|
|
505
|
+
if x.address == c.address:
|
|
506
|
+
rval = self.render_expr(
|
|
507
|
+
x.expr,
|
|
508
|
+
cte=cte,
|
|
509
|
+
raise_invalid=raise_invalid,
|
|
510
|
+
)
|
|
511
|
+
break
|
|
512
|
+
else:
|
|
513
|
+
rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False, raise_invalid=raise_invalid)}"
|
|
514
|
+
elif isinstance(c.lineage, BuildComparison):
|
|
515
|
+
rval = f"{self.render_expr(c.lineage.left, cte=cte, raise_invalid=raise_invalid)} {c.lineage.operator.value} {self.render_expr(c.lineage.right, cte=cte, raise_invalid=raise_invalid)}"
|
|
476
516
|
elif isinstance(c.lineage, AGGREGATE_ITEMS):
|
|
477
517
|
args = [
|
|
478
518
|
self.render_expr(v, cte) # , alias=False)
|
|
@@ -804,7 +844,7 @@ class BaseDialect:
|
|
|
804
844
|
if self.rendering.parameters:
|
|
805
845
|
if e.concept.namespace == DEFAULT_NAMESPACE:
|
|
806
846
|
return f":{e.concept.name}"
|
|
807
|
-
return f":{e.concept.address}"
|
|
847
|
+
return f":{e.concept.address.replace('.', '_')}"
|
|
808
848
|
elif e.concept.lineage:
|
|
809
849
|
return self.render_expr(e.concept.lineage, cte=cte, cte_map=cte_map)
|
|
810
850
|
return f"{self.QUOTE_CHARACTER}{e.concept.address}{self.QUOTE_CHARACTER}"
|
trilogy/executor.py
CHANGED
|
@@ -397,7 +397,7 @@ class Executor(object):
|
|
|
397
397
|
if v.safe_address == param or v.address == param
|
|
398
398
|
]
|
|
399
399
|
if not matched:
|
|
400
|
-
raise SyntaxError(f"No concept found for parameter {param}")
|
|
400
|
+
raise SyntaxError(f"No concept found for parameter {param};")
|
|
401
401
|
|
|
402
402
|
concept: Concept = matched.pop()
|
|
403
403
|
return self._concept_to_value(concept, local_concepts=local_concepts)
|
trilogy/parsing/common.py
CHANGED
|
@@ -3,9 +3,7 @@ from typing import Iterable, List, Sequence, Tuple
|
|
|
3
3
|
|
|
4
4
|
from lark.tree import Meta
|
|
5
5
|
|
|
6
|
-
from trilogy.constants import
|
|
7
|
-
VIRTUAL_CONCEPT_PREFIX,
|
|
8
|
-
)
|
|
6
|
+
from trilogy.constants import DEFAULT_NAMESPACE, VIRTUAL_CONCEPT_PREFIX
|
|
9
7
|
from trilogy.core.constants import ALL_ROWS_CONCEPT
|
|
10
8
|
from trilogy.core.enums import (
|
|
11
9
|
ConceptSource,
|
|
@@ -64,10 +62,12 @@ ARBITRARY_INPUTS = (
|
|
|
64
62
|
| Parenthetical
|
|
65
63
|
| ListWrapper
|
|
66
64
|
| MapWrapper
|
|
65
|
+
| Comparison
|
|
67
66
|
| int
|
|
68
67
|
| float
|
|
69
68
|
| str
|
|
70
69
|
| date
|
|
70
|
+
| bool
|
|
71
71
|
)
|
|
72
72
|
|
|
73
73
|
|
|
@@ -91,7 +91,7 @@ def process_function_arg(
|
|
|
91
91
|
# to simplify anonymous function handling
|
|
92
92
|
if (
|
|
93
93
|
arg.operator not in FunctionClass.AGGREGATE_FUNCTIONS.value
|
|
94
|
-
and arg.operator
|
|
94
|
+
and arg.operator not in FunctionClass.ONE_TO_MANY.value
|
|
95
95
|
):
|
|
96
96
|
return arg
|
|
97
97
|
id_hash = string_to_hash(str(arg))
|
|
@@ -311,13 +311,18 @@ def concept_is_relevant(
|
|
|
311
311
|
if concept.purpose in (Purpose.METRIC,):
|
|
312
312
|
if all([c in others for c in concept.grain.components]):
|
|
313
313
|
return False
|
|
314
|
+
if (
|
|
315
|
+
concept.derivation in (Derivation.BASIC,)
|
|
316
|
+
and isinstance(concept.lineage, Function)
|
|
317
|
+
and concept.lineage.operator == FunctionType.DATE_SPINE
|
|
318
|
+
):
|
|
319
|
+
return True
|
|
314
320
|
if concept.derivation in (Derivation.BASIC,) and isinstance(
|
|
315
321
|
concept.lineage, (Function, CaseWhen)
|
|
316
322
|
):
|
|
317
323
|
relevant = False
|
|
318
324
|
for arg in concept.lineage.arguments:
|
|
319
325
|
relevant = atom_is_relevant(arg, others, environment) or relevant
|
|
320
|
-
|
|
321
326
|
return relevant
|
|
322
327
|
if concept.derivation in (Derivation.BASIC,) and isinstance(
|
|
323
328
|
concept.lineage, Parenthetical
|
|
@@ -394,6 +399,10 @@ def _get_relevant_parent_concepts(arg) -> tuple[list[ConceptRef], bool]:
|
|
|
394
399
|
return [x.reference for x in arg.by], True
|
|
395
400
|
elif isinstance(arg, FunctionCallWrapper):
|
|
396
401
|
return get_relevant_parent_concepts(arg.content)
|
|
402
|
+
elif isinstance(arg, Comparison):
|
|
403
|
+
left, lflag = get_relevant_parent_concepts(arg.left)
|
|
404
|
+
right, rflag = get_relevant_parent_concepts(arg.right)
|
|
405
|
+
return left + right, lflag or rflag
|
|
397
406
|
return get_concept_arguments(arg), False
|
|
398
407
|
|
|
399
408
|
|
|
@@ -529,7 +538,7 @@ def function_to_concept(
|
|
|
529
538
|
elif parent.operator == FunctionType.UNION:
|
|
530
539
|
derivation = Derivation.UNION
|
|
531
540
|
granularity = Granularity.MULTI_ROW
|
|
532
|
-
elif parent.operator
|
|
541
|
+
elif parent.operator in FunctionClass.ONE_TO_MANY.value:
|
|
533
542
|
derivation = Derivation.UNNEST
|
|
534
543
|
granularity = Granularity.MULTI_ROW
|
|
535
544
|
elif parent.operator == FunctionType.RECURSE_EDGE:
|
|
@@ -770,6 +779,27 @@ def align_item_to_concept(
|
|
|
770
779
|
return new
|
|
771
780
|
|
|
772
781
|
|
|
782
|
+
def derive_item_to_concept(
|
|
783
|
+
parent: ARBITRARY_INPUTS,
|
|
784
|
+
name: str,
|
|
785
|
+
lineage: MultiSelectLineage,
|
|
786
|
+
namespace: str | None = None,
|
|
787
|
+
) -> Concept:
|
|
788
|
+
datatype = arg_to_datatype(parent)
|
|
789
|
+
grain = Grain()
|
|
790
|
+
new = Concept(
|
|
791
|
+
name=name,
|
|
792
|
+
datatype=datatype,
|
|
793
|
+
purpose=Purpose.PROPERTY,
|
|
794
|
+
lineage=lineage,
|
|
795
|
+
grain=grain,
|
|
796
|
+
namespace=namespace or DEFAULT_NAMESPACE,
|
|
797
|
+
granularity=Granularity.MULTI_ROW,
|
|
798
|
+
derivation=Derivation.MULTISELECT,
|
|
799
|
+
)
|
|
800
|
+
return new
|
|
801
|
+
|
|
802
|
+
|
|
773
803
|
def rowset_concept(
|
|
774
804
|
orig_address: ConceptRef,
|
|
775
805
|
environment: Environment,
|
|
@@ -851,20 +881,7 @@ def rowset_to_concepts(rowset: RowsetDerivationStatement, environment: Environme
|
|
|
851
881
|
|
|
852
882
|
|
|
853
883
|
def generate_concept_name(
|
|
854
|
-
parent:
|
|
855
|
-
AggregateWrapper
|
|
856
|
-
| FunctionCallWrapper
|
|
857
|
-
| WindowItem
|
|
858
|
-
| FilterItem
|
|
859
|
-
| Function
|
|
860
|
-
| ListWrapper
|
|
861
|
-
| MapWrapper
|
|
862
|
-
| Parenthetical
|
|
863
|
-
| int
|
|
864
|
-
| float
|
|
865
|
-
| str
|
|
866
|
-
| date
|
|
867
|
-
),
|
|
884
|
+
parent: ARBITRARY_INPUTS,
|
|
868
885
|
) -> str:
|
|
869
886
|
"""Generate a name for a concept based on its parent type and content."""
|
|
870
887
|
if isinstance(parent, AggregateWrapper):
|
|
@@ -885,6 +902,8 @@ def generate_concept_name(
|
|
|
885
902
|
return f"{VIRTUAL_CONCEPT_PREFIX}_paren_{string_to_hash(str(parent))}"
|
|
886
903
|
elif isinstance(parent, FunctionCallWrapper):
|
|
887
904
|
return f"{VIRTUAL_CONCEPT_PREFIX}_{parent.name}_{string_to_hash(str(parent))}"
|
|
905
|
+
elif isinstance(parent, Comparison):
|
|
906
|
+
return f"{VIRTUAL_CONCEPT_PREFIX}_comp_{string_to_hash(str(parent))}"
|
|
888
907
|
else: # ListWrapper, MapWrapper, or primitive types
|
|
889
908
|
return f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
|
|
890
909
|
|
|
@@ -909,6 +928,82 @@ def parenthetical_to_concept(
|
|
|
909
928
|
)
|
|
910
929
|
|
|
911
930
|
|
|
931
|
+
def comparison_to_concept(
|
|
932
|
+
parent: Comparison,
|
|
933
|
+
name: str,
|
|
934
|
+
namespace: str,
|
|
935
|
+
environment: Environment,
|
|
936
|
+
metadata: Metadata | None = None,
|
|
937
|
+
):
|
|
938
|
+
fmetadata = metadata or Metadata()
|
|
939
|
+
|
|
940
|
+
pkeys: List[Concept] = []
|
|
941
|
+
namespace = namespace or environment.namespace
|
|
942
|
+
is_metric = False
|
|
943
|
+
ref_args, is_metric = get_relevant_parent_concepts(parent)
|
|
944
|
+
concrete_args = [environment.concepts[c.address] for c in ref_args]
|
|
945
|
+
pkeys += [
|
|
946
|
+
x
|
|
947
|
+
for x in concrete_args
|
|
948
|
+
if not x.derivation == Derivation.CONSTANT
|
|
949
|
+
and not (x.derivation == Derivation.AGGREGATE and not x.grain.components)
|
|
950
|
+
]
|
|
951
|
+
grain: Grain | None = Grain()
|
|
952
|
+
for x in pkeys:
|
|
953
|
+
grain += x.grain
|
|
954
|
+
if parent.operator in FunctionClass.ONE_TO_MANY.value:
|
|
955
|
+
# if the function will create more rows, we don't know what grain this is at
|
|
956
|
+
grain = None
|
|
957
|
+
modifiers = get_upstream_modifiers(pkeys, environment)
|
|
958
|
+
key_grain: list[str] = []
|
|
959
|
+
for x in pkeys:
|
|
960
|
+
# metrics will group to keys, so do not do key traversal
|
|
961
|
+
if is_metric:
|
|
962
|
+
key_grain.append(x.address)
|
|
963
|
+
# otherwse, for row ops, assume keys are transitive
|
|
964
|
+
elif x.keys:
|
|
965
|
+
key_grain += [*x.keys]
|
|
966
|
+
else:
|
|
967
|
+
key_grain.append(x.address)
|
|
968
|
+
keys = set(key_grain)
|
|
969
|
+
if is_metric:
|
|
970
|
+
purpose = Purpose.METRIC
|
|
971
|
+
elif not pkeys:
|
|
972
|
+
purpose = Purpose.CONSTANT
|
|
973
|
+
else:
|
|
974
|
+
purpose = Purpose.PROPERTY
|
|
975
|
+
fmetadata = metadata or Metadata()
|
|
976
|
+
|
|
977
|
+
if grain is not None:
|
|
978
|
+
r = Concept(
|
|
979
|
+
name=name,
|
|
980
|
+
datatype=parent.output_datatype,
|
|
981
|
+
purpose=purpose,
|
|
982
|
+
lineage=parent,
|
|
983
|
+
namespace=namespace,
|
|
984
|
+
keys=keys,
|
|
985
|
+
modifiers=modifiers,
|
|
986
|
+
grain=grain,
|
|
987
|
+
metadata=fmetadata,
|
|
988
|
+
derivation=Derivation.BASIC,
|
|
989
|
+
granularity=Granularity.MULTI_ROW,
|
|
990
|
+
)
|
|
991
|
+
return r
|
|
992
|
+
|
|
993
|
+
return Concept(
|
|
994
|
+
name=name,
|
|
995
|
+
datatype=parent.output_datatype,
|
|
996
|
+
purpose=purpose,
|
|
997
|
+
lineage=parent,
|
|
998
|
+
namespace=namespace,
|
|
999
|
+
keys=keys,
|
|
1000
|
+
modifiers=modifiers,
|
|
1001
|
+
metadata=fmetadata,
|
|
1002
|
+
derivation=Derivation.BASIC,
|
|
1003
|
+
granularity=Granularity.MULTI_ROW,
|
|
1004
|
+
)
|
|
1005
|
+
|
|
1006
|
+
|
|
912
1007
|
def arbitrary_to_concept(
|
|
913
1008
|
parent: ARBITRARY_INPUTS,
|
|
914
1009
|
environment: Environment,
|
|
@@ -968,5 +1063,7 @@ def arbitrary_to_concept(
|
|
|
968
1063
|
return constant_to_concept(parent, name, namespace, metadata)
|
|
969
1064
|
elif isinstance(parent, Parenthetical):
|
|
970
1065
|
return parenthetical_to_concept(parent, name, namespace, environment, metadata)
|
|
1066
|
+
elif isinstance(parent, Comparison):
|
|
1067
|
+
return comparison_to_concept(parent, name, namespace, environment, metadata)
|
|
971
1068
|
else:
|
|
972
1069
|
return constant_to_concept(parent, name, namespace, metadata)
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -62,6 +62,8 @@ from trilogy.core.models.author import (
|
|
|
62
62
|
Conditional,
|
|
63
63
|
CustomFunctionFactory,
|
|
64
64
|
CustomType,
|
|
65
|
+
DeriveClause,
|
|
66
|
+
DeriveItem,
|
|
65
67
|
Expr,
|
|
66
68
|
FilterItem,
|
|
67
69
|
Function,
|
|
@@ -69,6 +71,7 @@ from trilogy.core.models.author import (
|
|
|
69
71
|
Grain,
|
|
70
72
|
HavingClause,
|
|
71
73
|
Metadata,
|
|
74
|
+
MultiSelectLineage,
|
|
72
75
|
OrderBy,
|
|
73
76
|
OrderItem,
|
|
74
77
|
Parenthetical,
|
|
@@ -135,6 +138,7 @@ from trilogy.parsing.common import (
|
|
|
135
138
|
align_item_to_concept,
|
|
136
139
|
arbitrary_to_concept,
|
|
137
140
|
constant_to_concept,
|
|
141
|
+
derive_item_to_concept,
|
|
138
142
|
process_function_args,
|
|
139
143
|
rowset_to_concepts,
|
|
140
144
|
)
|
|
@@ -603,6 +607,9 @@ class ParseToObjects(Transformer):
|
|
|
603
607
|
def PROPERTY(self, args):
|
|
604
608
|
return Purpose.PROPERTY
|
|
605
609
|
|
|
610
|
+
def HASH_TYPE(self, args):
|
|
611
|
+
return args.value
|
|
612
|
+
|
|
606
613
|
@v_args(meta=True)
|
|
607
614
|
def prop_ident(self, meta: Meta, args) -> Tuple[List[Concept], str]:
|
|
608
615
|
return [self.environment.concepts[grain] for grain in args[:-1]], args[-1]
|
|
@@ -707,7 +714,14 @@ class ParseToObjects(Transformer):
|
|
|
707
714
|
|
|
708
715
|
if isinstance(
|
|
709
716
|
source_value,
|
|
710
|
-
(
|
|
717
|
+
(
|
|
718
|
+
FilterItem,
|
|
719
|
+
WindowItem,
|
|
720
|
+
AggregateWrapper,
|
|
721
|
+
Function,
|
|
722
|
+
FunctionCallWrapper,
|
|
723
|
+
Comparison,
|
|
724
|
+
),
|
|
711
725
|
):
|
|
712
726
|
concept = arbitrary_to_concept(
|
|
713
727
|
source_value,
|
|
@@ -1275,6 +1289,17 @@ class ParseToObjects(Transformer):
|
|
|
1275
1289
|
def align_clause(self, meta: Meta, args) -> AlignClause:
|
|
1276
1290
|
return AlignClause(items=args)
|
|
1277
1291
|
|
|
1292
|
+
@v_args(meta=True)
|
|
1293
|
+
def derive_item(self, meta: Meta, args) -> DeriveItem:
|
|
1294
|
+
return DeriveItem(
|
|
1295
|
+
expr=args[0], name=args[1], namespace=self.environment.namespace
|
|
1296
|
+
)
|
|
1297
|
+
|
|
1298
|
+
@v_args(meta=True)
|
|
1299
|
+
def derive_clause(self, meta: Meta, args) -> DeriveClause:
|
|
1300
|
+
|
|
1301
|
+
return DeriveClause(items=args)
|
|
1302
|
+
|
|
1278
1303
|
@v_args(meta=True)
|
|
1279
1304
|
def multi_select_statement(self, meta: Meta, args) -> MultiSelectStatement:
|
|
1280
1305
|
|
|
@@ -1284,6 +1309,7 @@ class ParseToObjects(Transformer):
|
|
|
1284
1309
|
order_by: OrderBy | None = None
|
|
1285
1310
|
where: WhereClause | None = None
|
|
1286
1311
|
having: HavingClause | None = None
|
|
1312
|
+
derive: DeriveClause | None = None
|
|
1287
1313
|
for arg in args:
|
|
1288
1314
|
if isinstance(arg, SelectStatement):
|
|
1289
1315
|
selects.append(arg)
|
|
@@ -1297,11 +1323,24 @@ class ParseToObjects(Transformer):
|
|
|
1297
1323
|
having = arg
|
|
1298
1324
|
elif isinstance(arg, AlignClause):
|
|
1299
1325
|
align = arg
|
|
1326
|
+
elif isinstance(arg, DeriveClause):
|
|
1327
|
+
derive = arg
|
|
1300
1328
|
|
|
1301
1329
|
assert align
|
|
1302
1330
|
assert align is not None
|
|
1303
1331
|
|
|
1304
1332
|
derived_concepts = []
|
|
1333
|
+
new_selects = [x.as_lineage(self.environment) for x in selects]
|
|
1334
|
+
lineage = MultiSelectLineage(
|
|
1335
|
+
selects=new_selects,
|
|
1336
|
+
align=align,
|
|
1337
|
+
derive=derive,
|
|
1338
|
+
namespace=self.environment.namespace,
|
|
1339
|
+
where_clause=where,
|
|
1340
|
+
having_clause=having,
|
|
1341
|
+
limit=limit,
|
|
1342
|
+
hidden_components=set(y for x in new_selects for y in x.hidden_components),
|
|
1343
|
+
)
|
|
1305
1344
|
for x in align.items:
|
|
1306
1345
|
concept = align_item_to_concept(
|
|
1307
1346
|
x,
|
|
@@ -1314,6 +1353,19 @@ class ParseToObjects(Transformer):
|
|
|
1314
1353
|
)
|
|
1315
1354
|
derived_concepts.append(concept)
|
|
1316
1355
|
self.environment.add_concept(concept, meta=meta)
|
|
1356
|
+
if derive:
|
|
1357
|
+
for derived in derive.items:
|
|
1358
|
+
derivation = derived.expr
|
|
1359
|
+
name = derived.name
|
|
1360
|
+
if not isinstance(derivation, (Function, Comparison, WindowItem)):
|
|
1361
|
+
raise SyntaxError(
|
|
1362
|
+
f"Invalid derive expression {derivation} in {meta.line}, must be a function or conditional"
|
|
1363
|
+
)
|
|
1364
|
+
concept = derive_item_to_concept(
|
|
1365
|
+
derivation, name, lineage, self.environment.namespace
|
|
1366
|
+
)
|
|
1367
|
+
derived_concepts.append(concept)
|
|
1368
|
+
self.environment.add_concept(concept, meta=meta)
|
|
1317
1369
|
multi = MultiSelectStatement(
|
|
1318
1370
|
selects=selects,
|
|
1319
1371
|
align=align,
|
|
@@ -1323,6 +1375,7 @@ class ParseToObjects(Transformer):
|
|
|
1323
1375
|
limit=limit,
|
|
1324
1376
|
meta=Metadata(line_number=meta.line),
|
|
1325
1377
|
derived_concepts=derived_concepts,
|
|
1378
|
+
derive=derive,
|
|
1326
1379
|
)
|
|
1327
1380
|
return multi
|
|
1328
1381
|
|
|
@@ -1819,6 +1872,10 @@ class ParseToObjects(Transformer):
|
|
|
1819
1872
|
def array_agg(self, meta, args):
|
|
1820
1873
|
return self.function_factory.create_function(args, FunctionType.ARRAY_AGG, meta)
|
|
1821
1874
|
|
|
1875
|
+
@v_args(meta=True)
|
|
1876
|
+
def any(self, meta, args):
|
|
1877
|
+
return self.function_factory.create_function(args, FunctionType.ANY, meta)
|
|
1878
|
+
|
|
1822
1879
|
@v_args(meta=True)
|
|
1823
1880
|
def avg(self, meta, args):
|
|
1824
1881
|
return self.function_factory.create_function(args, FunctionType.AVG, meta)
|
|
@@ -1879,6 +1936,10 @@ class ParseToObjects(Transformer):
|
|
|
1879
1936
|
def ftrim(self, meta, args):
|
|
1880
1937
|
return self.function_factory.create_function(args, FunctionType.TRIM, meta)
|
|
1881
1938
|
|
|
1939
|
+
@v_args(meta=True)
|
|
1940
|
+
def fhash(self, meta, args):
|
|
1941
|
+
return self.function_factory.create_function(args, FunctionType.HASH, meta)
|
|
1942
|
+
|
|
1882
1943
|
@v_args(meta=True)
|
|
1883
1944
|
def fsubstring(self, meta, args):
|
|
1884
1945
|
return self.function_factory.create_function(args, FunctionType.SUBSTRING, meta)
|
|
@@ -2022,6 +2083,12 @@ class ParseToObjects(Transformer):
|
|
|
2022
2083
|
)
|
|
2023
2084
|
return self.function_factory.create_function(args, FunctionType.CAST, meta)
|
|
2024
2085
|
|
|
2086
|
+
@v_args(meta=True)
|
|
2087
|
+
def fdate_spine(self, meta, args) -> Function:
|
|
2088
|
+
return self.function_factory.create_function(
|
|
2089
|
+
args, FunctionType.DATE_SPINE, meta
|
|
2090
|
+
)
|
|
2091
|
+
|
|
2025
2092
|
# utility functions
|
|
2026
2093
|
@v_args(meta=True)
|
|
2027
2094
|
def fcast(self, meta, args) -> Function:
|
|
@@ -2191,6 +2258,33 @@ class ParseToObjects(Transformer):
|
|
|
2191
2258
|
meta,
|
|
2192
2259
|
)
|
|
2193
2260
|
|
|
2261
|
+
@v_args(meta=True)
|
|
2262
|
+
def farray_filter(self, meta, args) -> Function:
|
|
2263
|
+
factory: CustomFunctionFactory = args[1]
|
|
2264
|
+
if not len(factory.function_arguments) == 1:
|
|
2265
|
+
raise InvalidSyntaxException(
|
|
2266
|
+
"Array filter function must have exactly one argument;"
|
|
2267
|
+
)
|
|
2268
|
+
array_type = arg_to_datatype(args[0])
|
|
2269
|
+
if not isinstance(array_type, ArrayType):
|
|
2270
|
+
raise InvalidSyntaxException(
|
|
2271
|
+
f"Array filter function must be applied to an array, not {array_type}"
|
|
2272
|
+
)
|
|
2273
|
+
return self.function_factory.create_function(
|
|
2274
|
+
[
|
|
2275
|
+
args[0],
|
|
2276
|
+
factory.function_arguments[0],
|
|
2277
|
+
factory(
|
|
2278
|
+
ArgBinding(
|
|
2279
|
+
name=factory.function_arguments[0].name,
|
|
2280
|
+
datatype=array_type.value_data_type,
|
|
2281
|
+
)
|
|
2282
|
+
),
|
|
2283
|
+
],
|
|
2284
|
+
FunctionType.ARRAY_FILTER,
|
|
2285
|
+
meta,
|
|
2286
|
+
)
|
|
2287
|
+
|
|
2194
2288
|
|
|
2195
2289
|
def unpack_visit_error(e: VisitError, text: str | None = None):
|
|
2196
2290
|
"""This is required to get exceptions from imports, which would
|
|
@@ -2227,6 +2321,7 @@ ERROR_CODES: dict[int, str] = {
|
|
|
2227
2321
|
101: "Using FROM keyword? Trilogy does not have a FROM clause (Datasource resolution is automatic).",
|
|
2228
2322
|
# 200 codes relate to required explicit syntax (we could loosen these?)
|
|
2229
2323
|
201: 'Missing alias? Alias must be specified with "AS" - e.g. `SELECT x+1 AS y`',
|
|
2324
|
+
202: "Missing closing semicolon? Statements must be terminated with a semicolon `;`.",
|
|
2230
2325
|
210: "Missing order direction? Order by must be explicit about direction - specify `asc` or `desc`.",
|
|
2231
2326
|
}
|
|
2232
2327
|
|
|
@@ -2307,7 +2402,7 @@ def parse_text(
|
|
|
2307
2402
|
)
|
|
2308
2403
|
|
|
2309
2404
|
def _handle_unexpected_token(e: UnexpectedToken, text: str) -> None:
|
|
2310
|
-
"""Handle UnexpectedToken errors
|
|
2405
|
+
"""Handle UnexpectedToken errors to make friendlier error messages."""
|
|
2311
2406
|
# Handle ordering direction error
|
|
2312
2407
|
pos = e.pos_in_stream or 0
|
|
2313
2408
|
if e.expected == {"ORDERING_DIRECTION"}:
|
|
@@ -2319,12 +2414,27 @@ def parse_text(
|
|
|
2319
2414
|
)
|
|
2320
2415
|
if parsed_tokens == ["FROM"]:
|
|
2321
2416
|
raise _create_syntax_error(101, pos, text)
|
|
2322
|
-
|
|
2323
|
-
|
|
2417
|
+
# check if they are missing a semicolon
|
|
2418
|
+
try:
|
|
2419
|
+
e.interactive_parser.feed_token(Token("_TERMINATOR", ";"))
|
|
2420
|
+
state = e.interactive_parser.lexer_thread.state
|
|
2421
|
+
if state and state.last_token:
|
|
2422
|
+
new_pos = state.last_token.end_pos or pos
|
|
2423
|
+
else:
|
|
2424
|
+
new_pos = pos
|
|
2425
|
+
raise _create_syntax_error(202, new_pos, text)
|
|
2426
|
+
except UnexpectedToken:
|
|
2427
|
+
pass
|
|
2428
|
+
# check if they forgot an as
|
|
2324
2429
|
try:
|
|
2325
2430
|
e.interactive_parser.feed_token(Token("AS", "AS"))
|
|
2431
|
+
state = e.interactive_parser.lexer_thread.state
|
|
2432
|
+
if state and state.last_token:
|
|
2433
|
+
new_pos = state.last_token.end_pos or pos
|
|
2434
|
+
else:
|
|
2435
|
+
new_pos = pos
|
|
2326
2436
|
e.interactive_parser.feed_token(Token("IDENTIFIER", e.token.value))
|
|
2327
|
-
raise _create_syntax_error(201,
|
|
2437
|
+
raise _create_syntax_error(201, new_pos, text)
|
|
2328
2438
|
except UnexpectedToken:
|
|
2329
2439
|
pass
|
|
2330
2440
|
|
trilogy/parsing/render.py
CHANGED
|
@@ -662,7 +662,8 @@ class Renderer:
|
|
|
662
662
|
pair_strings.append(self.indent_lines(pair_line))
|
|
663
663
|
inputs = ",\n".join(pair_strings)
|
|
664
664
|
return f"struct(\n{inputs}\n{self.indent_context.current_indent})"
|
|
665
|
-
|
|
665
|
+
if arg.operator == FunctionType.ALIAS:
|
|
666
|
+
return f"{self.to_string(arg.arguments[0])}"
|
|
666
667
|
inputs = ",".join(args)
|
|
667
668
|
return f"{arg.operator.value}({inputs})"
|
|
668
669
|
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -74,12 +74,16 @@
|
|
|
74
74
|
select_statement: where? "select"i select_list where? having? order_by? limit?
|
|
75
75
|
|
|
76
76
|
// multiple_selects
|
|
77
|
-
multi_select_statement: select_statement ("merge" select_statement)+ "align"i align_clause where? order_by? limit?
|
|
77
|
+
multi_select_statement: select_statement ("merge" select_statement)+ "align"i align_clause ("derive" derive_clause)? where? order_by? limit?
|
|
78
78
|
|
|
79
79
|
align_item: IDENTIFIER ":" IDENTIFIER ("," IDENTIFIER)* ","?
|
|
80
80
|
|
|
81
81
|
align_clause: align_item ("AND"i align_item)* "AND"i?
|
|
82
82
|
|
|
83
|
+
derive_item: expr "->" IDENTIFIER
|
|
84
|
+
|
|
85
|
+
derive_clause: derive_item ("," derive_item)* ","?
|
|
86
|
+
|
|
83
87
|
merge_statement: "merge"i WILDCARD_IDENTIFIER "into"i SHORTHAND_MODIFIER? WILDCARD_IDENTIFIER
|
|
84
88
|
|
|
85
89
|
// raw sql statement
|
|
@@ -113,7 +117,7 @@
|
|
|
113
117
|
filter_item: _filter_base | _filter_alt
|
|
114
118
|
|
|
115
119
|
// rank/lag/lead
|
|
116
|
-
WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i | "avg"i | "max"i | "min"i ) /[\s]+/
|
|
120
|
+
WINDOW_TYPE: ("row_number"i|"rank"i|"lag"i|"lead"i | "sum"i | "avg"i | "max"i | "min"i | "count"i ) /[\s]+/
|
|
117
121
|
|
|
118
122
|
window_item_over: ("OVER"i over_list)
|
|
119
123
|
|
|
@@ -308,8 +312,11 @@
|
|
|
308
312
|
fregexp_contains: _REGEXP_CONTAINS expr "," expr ")"
|
|
309
313
|
_REGEXP_REPLACE.1: "regexp_replace("
|
|
310
314
|
fregexp_replace: _REGEXP_REPLACE expr "," expr "," expr ")"
|
|
315
|
+
_HASH.1: "hash("
|
|
316
|
+
HASH_TYPE: "md5"i | "sha1"i | "sha256"i | "sha512"i
|
|
317
|
+
fhash: _HASH expr "," HASH_TYPE ")"
|
|
311
318
|
|
|
312
|
-
_string_functions: like | ilike | upper | flower | fsplit | fstrpos | fsubstring | fcontains | ftrim | freplace | fregexp_extract | fregexp_contains | fregexp_replace
|
|
319
|
+
_string_functions: like | ilike | upper | flower | fsplit | fstrpos | fsubstring | fcontains | ftrim | freplace | fregexp_extract | fregexp_contains | fregexp_replace | fhash
|
|
313
320
|
|
|
314
321
|
//array_functions
|
|
315
322
|
_ARRAY_SUM.1: "array_sum("i
|
|
@@ -323,8 +330,10 @@
|
|
|
323
330
|
_ARRAY_TRANSFORM.1: "array_transform("i
|
|
324
331
|
transform_lambda: "@" IDENTIFIER
|
|
325
332
|
farray_transform: _ARRAY_TRANSFORM expr "," transform_lambda ")"
|
|
333
|
+
_ARRAY_FILTER.1: "array_filter("i
|
|
334
|
+
farray_filter: _ARRAY_FILTER expr "," transform_lambda ")"
|
|
326
335
|
|
|
327
|
-
_array_functions: farray_sum | farray_distinct | farray_sort | farray_transform | farray_to_string
|
|
336
|
+
_array_functions: farray_sum | farray_distinct | farray_sort | farray_transform | farray_to_string | farray_filter
|
|
328
337
|
|
|
329
338
|
//map_functions
|
|
330
339
|
_MAP_KEYS.1: "map_keys("i
|
|
@@ -358,11 +367,13 @@
|
|
|
358
367
|
min: _MIN expr ")"
|
|
359
368
|
_ARRAY_AGG.1: "array_agg("i
|
|
360
369
|
array_agg: _ARRAY_AGG expr ")"
|
|
370
|
+
_ANY.1: "any("i
|
|
371
|
+
any: _ANY expr ")"
|
|
361
372
|
|
|
362
373
|
//aggregates can force a grain
|
|
363
374
|
aggregate_all: "*"
|
|
364
375
|
aggregate_over: ("BY"i (aggregate_all | over_list))
|
|
365
|
-
aggregate_functions: (count | count_distinct | sum | avg | max | min | array_agg) aggregate_over?
|
|
376
|
+
aggregate_functions: (count | count_distinct | sum | avg | max | min | array_agg | any) aggregate_over?
|
|
366
377
|
|
|
367
378
|
// date functions
|
|
368
379
|
_DATE.1: "date("i
|
|
@@ -405,8 +416,10 @@
|
|
|
405
416
|
fdate_sub: _DATE_SUB expr "," DATE_PART "," expr ")"
|
|
406
417
|
_DATE_DIFF.1: "date_diff("i
|
|
407
418
|
fdate_diff: _DATE_DIFF expr "," expr "," DATE_PART ")"
|
|
419
|
+
_DATE_SPINE.1: "date_spine("i
|
|
420
|
+
fdate_spine: _DATE_SPINE expr "," expr ")"
|
|
408
421
|
|
|
409
|
-
_date_functions: fdate | fdate_add | fdate_sub | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday |fday_name | fday_of_week | fweek | fmonth | fmonth_name | fquarter | fyear | fdate_part | fdate_trunc
|
|
422
|
+
_date_functions: fdate | fdate_add | fdate_sub | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday |fday_name | fday_of_week | fweek | fmonth | fmonth_name | fquarter | fyear | fdate_part | fdate_trunc | fdate_spine
|
|
410
423
|
|
|
411
424
|
_static_functions: _string_functions | _math_functions | _array_functions | _map_functions
|
|
412
425
|
|
|
@@ -434,7 +447,7 @@
|
|
|
434
447
|
|
|
435
448
|
float_lit: /\-?[0-9]*\.[0-9]+/
|
|
436
449
|
|
|
437
|
-
array_lit: "[" (
|
|
450
|
+
array_lit: "[" (expr ",")* expr ","? "]"()
|
|
438
451
|
|
|
439
452
|
tuple_lit: "(" (literal ",")* literal ","? ")"
|
|
440
453
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|