pytrilogy 0.0.2.22__py3-none-any.whl → 0.0.2.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.22.dist-info → pytrilogy-0.0.2.25.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.22.dist-info → pytrilogy-0.0.2.25.dist-info}/RECORD +21 -21
- {pytrilogy-0.0.2.22.dist-info → pytrilogy-0.0.2.25.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/core/enums.py +10 -0
- trilogy/core/env_processor.py +12 -6
- trilogy/core/environment_helpers.py +0 -1
- trilogy/core/functions.py +2 -0
- trilogy/core/models.py +113 -14
- trilogy/core/processing/concept_strategies_v3.py +23 -4
- trilogy/core/processing/node_generators/node_merge_node.py +4 -4
- trilogy/core/processing/utility.py +11 -6
- trilogy/core/query_processor.py +20 -0
- trilogy/dialect/base.py +21 -3
- trilogy/executor.py +41 -36
- trilogy/parsing/parse_engine.py +42 -11
- trilogy/parsing/render.py +23 -3
- trilogy/parsing/trilogy.lark +11 -3
- {pytrilogy-0.0.2.22.dist-info → pytrilogy-0.0.2.25.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.22.dist-info → pytrilogy-0.0.2.25.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.22.dist-info → pytrilogy-0.0.2.25.dist-info}/top_level.txt +0 -0
|
@@ -1,33 +1,33 @@
|
|
|
1
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
trilogy/__init__.py,sha256=5XwNYAlRMOuSKIFUPwSVWUdNK1RpSEPCsc_H7W06R7w,291
|
|
2
2
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
trilogy/constants.py,sha256=rHCe0Pe3LuB-VwCr2765QhzkUrTqZKEYPJ7rS0ykxYw,1273
|
|
4
4
|
trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
|
|
5
|
-
trilogy/executor.py,sha256=
|
|
5
|
+
trilogy/executor.py,sha256=b2pUL_Ha1H7pyhqssc2-hTd0OUO2KIcS0x6BLMPckZw,11822
|
|
6
6
|
trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
|
|
7
7
|
trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
|
|
9
9
|
trilogy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
trilogy/core/constants.py,sha256=7XaCpZn5mQmjTobbeBn56SzPWq9eMNDfzfsRU-fP0VE,171
|
|
11
|
-
trilogy/core/enums.py,sha256=
|
|
12
|
-
trilogy/core/env_processor.py,sha256=
|
|
13
|
-
trilogy/core/environment_helpers.py,sha256=
|
|
11
|
+
trilogy/core/enums.py,sha256=y0Z0m-xtcVw1ktkQ5yD3fJYWfOa4ncN_MzCTpREAxy0,6374
|
|
12
|
+
trilogy/core/env_processor.py,sha256=SHVB3nkidIlFc5dz-sofRMKXx66stpLQNuVdQSjC-So,2586
|
|
13
|
+
trilogy/core/environment_helpers.py,sha256=DIsoo-GcXmXVPB1JbNh8Oku25Nyef9mexPIdy2ur_sk,7159
|
|
14
14
|
trilogy/core/ergonomics.py,sha256=ASLDd0RqKWrZiG3XcKHo8nyTjaB_8xfE9t4NZ1UvGpc,1639
|
|
15
15
|
trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,561
|
|
16
|
-
trilogy/core/functions.py,sha256=
|
|
16
|
+
trilogy/core/functions.py,sha256=IhVpt3n6wEanKHnGu3oA2w6-hKIlxWpEyz7fHN66mpo,10720
|
|
17
17
|
trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
|
|
18
18
|
trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
|
|
19
|
-
trilogy/core/models.py,sha256=
|
|
19
|
+
trilogy/core/models.py,sha256=LUaoxk4twHRY9_Qatdbo1GjjCZRo_91Hql7BwKjLbfM,156934
|
|
20
20
|
trilogy/core/optimization.py,sha256=od_60A9F8J8Nj24MHgrxl4vwRwmBFH13TMdoMQvgVKs,7717
|
|
21
|
-
trilogy/core/query_processor.py,sha256=
|
|
21
|
+
trilogy/core/query_processor.py,sha256=sdG0XcHNBS0kuqUPztDZ1i-kpDV5LJLrO55Og2Y8hSg,17140
|
|
22
22
|
trilogy/core/optimizations/__init__.py,sha256=bWQecbeiwiDx9LJnLsa7dkWxdbl2wcnkcTN69JyP8iI,356
|
|
23
23
|
trilogy/core/optimizations/base_optimization.py,sha256=tWWT-xnTbnEU-mNi_isMNbywm8B9WTRsNFwGpeh3rqE,468
|
|
24
24
|
trilogy/core/optimizations/inline_constant.py,sha256=kHNyc2UoaPVdYfVAPAFwnWuk4sJ_IF5faRtVcDOrBtw,1110
|
|
25
25
|
trilogy/core/optimizations/inline_datasource.py,sha256=AATzQ6YrtW_1-aQFjQyTYqEYKBoMFhek7ADfBr4uUdQ,3634
|
|
26
26
|
trilogy/core/optimizations/predicate_pushdown.py,sha256=1l9WnFOSv79e341typG3tTdk0XGl1J_ToQih3LYoGIY,8435
|
|
27
27
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
trilogy/core/processing/concept_strategies_v3.py,sha256=
|
|
28
|
+
trilogy/core/processing/concept_strategies_v3.py,sha256=7MT_x6QFHrbSDmjz21pYdQB5ux419ES4QS-8lO16eyw,36091
|
|
29
29
|
trilogy/core/processing/graph_utils.py,sha256=aq-kqk4Iado2HywDxWEejWc-7PGO6Oa-ZQLAM6XWPHw,1199
|
|
30
|
-
trilogy/core/processing/utility.py,sha256=
|
|
30
|
+
trilogy/core/processing/utility.py,sha256=hzuEsNqP5dq1GBT96lWYX1UkakMl6XItp-nIKFH6wLg,19617
|
|
31
31
|
trilogy/core/processing/node_generators/__init__.py,sha256=-mzYkRsaRNa_dfTckYkKVFSR8h8a3ihEiPJDU_tAmDo,672
|
|
32
32
|
trilogy/core/processing/node_generators/basic_node.py,sha256=WQNgJ1MwrMS_BQ-b3XwGGB6eToDykelAVj_fesJuqe0,2069
|
|
33
33
|
trilogy/core/processing/node_generators/common.py,sha256=LwDgPlhWeuw0t07f3kX9IE5LXBdZhXfh-aY0XGk50ak,8946
|
|
@@ -35,7 +35,7 @@ trilogy/core/processing/node_generators/filter_node.py,sha256=Vz9Rb67e1dfZgnliek
|
|
|
35
35
|
trilogy/core/processing/node_generators/group_node.py,sha256=r54IVEhXW-tzod6uEHIQObrxgQt6aNySk5emWkWyqCU,4938
|
|
36
36
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=R9i_wHipxjXJyfYEwfeTw2EPpuanXVA327XyfcP2tBg,2537
|
|
37
37
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=_KO9lqzHQoy4VAviO0ttQlmK0tjaqrJj4SJPhmoIYm8,6229
|
|
38
|
-
trilogy/core/processing/node_generators/node_merge_node.py,sha256=
|
|
38
|
+
trilogy/core/processing/node_generators/node_merge_node.py,sha256=dIEv5P2MTViAES2MzqJgccYzM3HldjHrQYFwH00cqyc,14003
|
|
39
39
|
trilogy/core/processing/node_generators/rowset_node.py,sha256=KtdN6t2xM8CJxobc4aQX4W8uX98U6IabeuBF_FtBLR4,4583
|
|
40
40
|
trilogy/core/processing/node_generators/select_merge_node.py,sha256=MKjlXqFBSin6cTnS6n5lEcNBJsMvSefDIXOwYNVbM0s,10371
|
|
41
41
|
trilogy/core/processing/node_generators/select_node.py,sha256=nwXHQF6C-aQUIelx9dyxN2pK3muL-4-6RIqnqQqNwtw,1808
|
|
@@ -50,7 +50,7 @@ trilogy/core/processing/nodes/select_node_v2.py,sha256=gS9OQgS2TSEK59BQ9R0i83pTH
|
|
|
50
50
|
trilogy/core/processing/nodes/unnest_node.py,sha256=mAmFluzm2yeeiQ6NfIB7BU_8atRGh-UJfPf9ROwbhr8,2152
|
|
51
51
|
trilogy/core/processing/nodes/window_node.py,sha256=ro0QfMFi4ZmIn5Q4D0M_vJWfnHH_C0MN7XkVkx8Gygg,1214
|
|
52
52
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
-
trilogy/dialect/base.py,sha256=
|
|
53
|
+
trilogy/dialect/base.py,sha256=zqPBWv5gsnk4KhyzHdxcpDkVXU7luEVvHepUWRzjUfo,34019
|
|
54
54
|
trilogy/dialect/bigquery.py,sha256=15KJ-cOpBlk9O7FPviPgmg8xIydJeKx7WfmL3SSsPE8,2953
|
|
55
55
|
trilogy/dialect/common.py,sha256=Hr0mxcNxjSvhpBM5Wvb_Q7aklAuYj5aBDrW433py0Zs,4403
|
|
56
56
|
trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
|
|
@@ -70,14 +70,14 @@ trilogy/parsing/common.py,sha256=kbqWy30nnVc7ID-sdSDwxYomnxd3guyuIJF3yvlpQwg,996
|
|
|
70
70
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
71
71
|
trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
|
|
72
72
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
73
|
-
trilogy/parsing/parse_engine.py,sha256=
|
|
74
|
-
trilogy/parsing/render.py,sha256=
|
|
75
|
-
trilogy/parsing/trilogy.lark,sha256=
|
|
73
|
+
trilogy/parsing/parse_engine.py,sha256=tcBgjfew0kAfSEt1aFo9Pu3yacEBB1KFm7v_Iobz52g,64467
|
|
74
|
+
trilogy/parsing/render.py,sha256=7mEEe5DWVAafaGl__oQE7FPn_4QhcsGT2VVp-nk1Lr8,13078
|
|
75
|
+
trilogy/parsing/trilogy.lark,sha256=ZP9USPgD8-Fq5UzIl4iGpAeGuh2JLGzSoYJhvEGOi2c,12188
|
|
76
76
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
|
|
78
|
-
pytrilogy-0.0.2.
|
|
79
|
-
pytrilogy-0.0.2.
|
|
80
|
-
pytrilogy-0.0.2.
|
|
81
|
-
pytrilogy-0.0.2.
|
|
82
|
-
pytrilogy-0.0.2.
|
|
83
|
-
pytrilogy-0.0.2.
|
|
78
|
+
pytrilogy-0.0.2.25.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
79
|
+
pytrilogy-0.0.2.25.dist-info/METADATA,sha256=GtN5gVcDusbMKaXTn5O_CLcZAk00h5Xe5df4i5QqZzs,8403
|
|
80
|
+
pytrilogy-0.0.2.25.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
|
81
|
+
pytrilogy-0.0.2.25.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
|
|
82
|
+
pytrilogy-0.0.2.25.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
83
|
+
pytrilogy-0.0.2.25.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/core/enums.py
CHANGED
|
@@ -292,3 +292,13 @@ class SelectFiltering(Enum):
|
|
|
292
292
|
NONE = "none"
|
|
293
293
|
EXPLICIT = "explicit" # the filtering contains only selected values
|
|
294
294
|
IMPLICIT = "implicit" # the filtering contains unselected values
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class IOType(Enum):
|
|
298
|
+
CSV = "csv"
|
|
299
|
+
|
|
300
|
+
@classmethod
|
|
301
|
+
def _missing_(cls, value):
|
|
302
|
+
if isinstance(value, str) and value.lower() != value:
|
|
303
|
+
return IOType(value.lower())
|
|
304
|
+
return super()._missing_(value)
|
trilogy/core/env_processor.py
CHANGED
|
@@ -6,17 +6,20 @@ from trilogy.core.graph_models import (
|
|
|
6
6
|
from trilogy.core.models import Environment, Concept, Datasource
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def add_concept(
|
|
9
|
+
def add_concept(
|
|
10
|
+
concept: Concept, g: ReferenceGraph, concept_mapping: dict[str, Concept]
|
|
11
|
+
):
|
|
10
12
|
g.add_node(concept)
|
|
11
13
|
# if we have sources, recursively add them
|
|
12
14
|
node_name = concept_to_node(concept)
|
|
13
15
|
if concept.concept_arguments:
|
|
14
16
|
for source in concept.concept_arguments:
|
|
15
17
|
generic = source.with_default_grain()
|
|
16
|
-
add_concept(generic, g)
|
|
18
|
+
add_concept(generic, g, concept_mapping)
|
|
17
19
|
|
|
18
20
|
g.add_edge(generic, node_name)
|
|
19
|
-
for
|
|
21
|
+
for ps_address in concept.pseudonyms:
|
|
22
|
+
pseudonym = concept_mapping[ps_address]
|
|
20
23
|
pseudonym = pseudonym.with_default_grain()
|
|
21
24
|
pseudonym_node = concept_to_node(pseudonym)
|
|
22
25
|
if (pseudonym_node, node_name) in g.edges and (
|
|
@@ -28,7 +31,7 @@ def add_concept(concept: Concept, g: ReferenceGraph):
|
|
|
28
31
|
continue
|
|
29
32
|
g.add_edge(pseudonym_node, node_name, pseudonym=True)
|
|
30
33
|
g.add_edge(node_name, pseudonym_node, pseudonym=True)
|
|
31
|
-
add_concept(pseudonym, g)
|
|
34
|
+
add_concept(pseudonym, g, concept_mapping)
|
|
32
35
|
|
|
33
36
|
|
|
34
37
|
def generate_adhoc_graph(
|
|
@@ -37,10 +40,11 @@ def generate_adhoc_graph(
|
|
|
37
40
|
restrict_to_listed: bool = False,
|
|
38
41
|
) -> ReferenceGraph:
|
|
39
42
|
g = ReferenceGraph()
|
|
43
|
+
concept_mapping = {x.address: x for x in concepts}
|
|
40
44
|
|
|
41
45
|
# add all parsed concepts
|
|
42
46
|
for concept in concepts:
|
|
43
|
-
add_concept(concept, g)
|
|
47
|
+
add_concept(concept, g, concept_mapping)
|
|
44
48
|
|
|
45
49
|
for dataset in datasources:
|
|
46
50
|
node = datasource_to_node(dataset)
|
|
@@ -66,5 +70,7 @@ def generate_graph(
|
|
|
66
70
|
) -> ReferenceGraph:
|
|
67
71
|
|
|
68
72
|
return generate_adhoc_graph(
|
|
69
|
-
list(environment.concepts.values())
|
|
73
|
+
list(environment.concepts.values())
|
|
74
|
+
+ list(environment.alias_origin_lookup.values()),
|
|
75
|
+
list(environment.datasources.values()),
|
|
70
76
|
)
|
trilogy/core/functions.py
CHANGED
|
@@ -104,6 +104,8 @@ def Unnest(args: list[Concept]) -> Function:
|
|
|
104
104
|
output = arg_to_datatype(args[0])
|
|
105
105
|
if isinstance(output, (ListType)):
|
|
106
106
|
output = output.value_data_type
|
|
107
|
+
else:
|
|
108
|
+
output = DataType.STRING
|
|
107
109
|
return Function(
|
|
108
110
|
operator=FunctionType.UNNEST,
|
|
109
111
|
arguments=args,
|
trilogy/core/models.py
CHANGED
|
@@ -65,6 +65,7 @@ from trilogy.core.enums import (
|
|
|
65
65
|
ShowCategory,
|
|
66
66
|
Granularity,
|
|
67
67
|
SelectFiltering,
|
|
68
|
+
IOType,
|
|
68
69
|
)
|
|
69
70
|
from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
|
|
70
71
|
from trilogy.utility import unique
|
|
@@ -81,6 +82,9 @@ LT = TypeVar("LT")
|
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
def is_compatible_datatype(left, right):
|
|
85
|
+
# for unknown types, we can't make any assumptions
|
|
86
|
+
if right == DataType.UNKNOWN or left == DataType.UNKNOWN:
|
|
87
|
+
return True
|
|
84
88
|
if left == right:
|
|
85
89
|
return True
|
|
86
90
|
if {left, right} == {DataType.NUMERIC, DataType.FLOAT}:
|
|
@@ -98,6 +102,12 @@ def get_version():
|
|
|
98
102
|
return __version__
|
|
99
103
|
|
|
100
104
|
|
|
105
|
+
def address_with_namespace(address: str, namespace: str) -> str:
|
|
106
|
+
if address.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
107
|
+
return f"{namespace}.{address.split('.',1)[1]}"
|
|
108
|
+
return f"{namespace}.{address}"
|
|
109
|
+
|
|
110
|
+
|
|
101
111
|
def get_concept_arguments(expr) -> List["Concept"]:
|
|
102
112
|
output = []
|
|
103
113
|
if isinstance(expr, Concept):
|
|
@@ -432,7 +442,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
432
442
|
keys: Optional[Tuple["Concept", ...]] = None
|
|
433
443
|
grain: "Grain" = Field(default=None, validate_default=True)
|
|
434
444
|
modifiers: Optional[List[Modifier]] = Field(default_factory=list)
|
|
435
|
-
pseudonyms:
|
|
445
|
+
pseudonyms: set[str] = Field(default_factory=set)
|
|
436
446
|
_address_cache: str | None = None
|
|
437
447
|
|
|
438
448
|
def __hash__(self):
|
|
@@ -458,7 +468,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
458
468
|
def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
|
|
459
469
|
if self.address == source.address:
|
|
460
470
|
new = target.with_grain(self.grain.with_merge(source, target, modifiers))
|
|
461
|
-
new.pseudonyms
|
|
471
|
+
new.pseudonyms.add(self.address)
|
|
462
472
|
return new
|
|
463
473
|
return self.__class__(
|
|
464
474
|
name=self.name,
|
|
@@ -612,9 +622,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
612
622
|
else None
|
|
613
623
|
),
|
|
614
624
|
modifiers=self.modifiers,
|
|
615
|
-
pseudonyms={
|
|
616
|
-
k: v.with_namespace(namespace) for k, v in self.pseudonyms.items()
|
|
617
|
-
},
|
|
625
|
+
pseudonyms={address_with_namespace(v, namespace) for v in self.pseudonyms},
|
|
618
626
|
)
|
|
619
627
|
|
|
620
628
|
def with_select_context(
|
|
@@ -858,7 +866,7 @@ class Grain(Mergeable, BaseModel):
|
|
|
858
866
|
)
|
|
859
867
|
else:
|
|
860
868
|
v2 = unique(v, "address")
|
|
861
|
-
final = []
|
|
869
|
+
final: List[Concept] = []
|
|
862
870
|
for sub in v2:
|
|
863
871
|
if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
|
|
864
872
|
if all([c in v2 for c in sub.keys]):
|
|
@@ -912,6 +920,20 @@ class Grain(Mergeable, BaseModel):
|
|
|
912
920
|
[c.name == ALL_ROWS_CONCEPT for c in self.components]
|
|
913
921
|
)
|
|
914
922
|
|
|
923
|
+
@property
|
|
924
|
+
def synonym_set(self) -> set[str]:
|
|
925
|
+
base = []
|
|
926
|
+
for x in self.components_copy:
|
|
927
|
+
if isinstance(x.lineage, RowsetItem):
|
|
928
|
+
base.append(x.lineage.content.address)
|
|
929
|
+
for c in x.lineage.content.pseudonyms:
|
|
930
|
+
base.append(c)
|
|
931
|
+
else:
|
|
932
|
+
base.append(x.address)
|
|
933
|
+
for c in x.pseudonyms:
|
|
934
|
+
base.append(c)
|
|
935
|
+
return set(base)
|
|
936
|
+
|
|
915
937
|
@cached_property
|
|
916
938
|
def set(self) -> set[str]:
|
|
917
939
|
base = []
|
|
@@ -927,7 +949,11 @@ class Grain(Mergeable, BaseModel):
|
|
|
927
949
|
return self.set == set([c.address for c in other])
|
|
928
950
|
if not isinstance(other, Grain):
|
|
929
951
|
return False
|
|
930
|
-
|
|
952
|
+
if self.set == other.set:
|
|
953
|
+
return True
|
|
954
|
+
elif self.synonym_set == other.synonym_set:
|
|
955
|
+
return True
|
|
956
|
+
return False
|
|
931
957
|
|
|
932
958
|
def issubset(self, other: "Grain"):
|
|
933
959
|
return self.set.issubset(other.set)
|
|
@@ -1778,6 +1804,16 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
|
1778
1804
|
)
|
|
1779
1805
|
|
|
1780
1806
|
|
|
1807
|
+
class CopyStatement(BaseModel):
|
|
1808
|
+
target: str
|
|
1809
|
+
target_type: IOType
|
|
1810
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1811
|
+
select: SelectStatement
|
|
1812
|
+
|
|
1813
|
+
def refresh_bindings(self, environment: Environment):
|
|
1814
|
+
self.select.refresh_bindings(environment)
|
|
1815
|
+
|
|
1816
|
+
|
|
1781
1817
|
class AlignItem(Namespaced, BaseModel):
|
|
1782
1818
|
alias: str
|
|
1783
1819
|
concepts: List[Concept]
|
|
@@ -2248,7 +2284,7 @@ class BaseJoin(BaseModel):
|
|
|
2248
2284
|
for ds in [self.left_datasource, self.right_datasource]:
|
|
2249
2285
|
synonyms = []
|
|
2250
2286
|
for c in ds.output_concepts:
|
|
2251
|
-
synonyms += list(c.pseudonyms
|
|
2287
|
+
synonyms += list(c.pseudonyms)
|
|
2252
2288
|
if (
|
|
2253
2289
|
concept.address not in [c.address for c in ds.output_concepts]
|
|
2254
2290
|
and concept.address not in synonyms
|
|
@@ -2823,9 +2859,21 @@ class CTE(BaseModel):
|
|
|
2823
2859
|
return self.parent_ctes[0].name
|
|
2824
2860
|
return self.name
|
|
2825
2861
|
|
|
2862
|
+
def get_concept(self, address: str) -> Concept | None:
|
|
2863
|
+
for cte in self.parent_ctes:
|
|
2864
|
+
if address in cte.output_columns:
|
|
2865
|
+
match = [x for x in cte.output_columns if x.address == address].pop()
|
|
2866
|
+
return match
|
|
2867
|
+
|
|
2868
|
+
for array in [self.source.input_concepts, self.source.output_concepts]:
|
|
2869
|
+
match_list = [x for x in array if x.address == address]
|
|
2870
|
+
if match_list:
|
|
2871
|
+
return match_list.pop()
|
|
2872
|
+
return None
|
|
2873
|
+
|
|
2826
2874
|
def get_alias(self, concept: Concept, source: str | None = None) -> str:
|
|
2827
2875
|
for cte in self.parent_ctes:
|
|
2828
|
-
if concept.address in
|
|
2876
|
+
if concept.address in cte.output_columns:
|
|
2829
2877
|
if source and source != cte.name:
|
|
2830
2878
|
continue
|
|
2831
2879
|
return concept.safe_address
|
|
@@ -2977,7 +3025,7 @@ class UndefinedConcept(Concept, Mergeable, Namespaced):
|
|
|
2977
3025
|
) -> "UndefinedConcept" | Concept:
|
|
2978
3026
|
if self.address == source.address:
|
|
2979
3027
|
new = target.with_grain(self.grain.with_merge(source, target, modifiers))
|
|
2980
|
-
new.pseudonyms
|
|
3028
|
+
new.pseudonyms.add(self.address)
|
|
2981
3029
|
return new
|
|
2982
3030
|
return self.__class__(
|
|
2983
3031
|
name=self.name,
|
|
@@ -3509,6 +3557,7 @@ class Environment(BaseModel):
|
|
|
3509
3557
|
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
3510
3558
|
):
|
|
3511
3559
|
replacements = {}
|
|
3560
|
+
|
|
3512
3561
|
# exit early if we've run this
|
|
3513
3562
|
if source.address in self.alias_origin_lookup:
|
|
3514
3563
|
if self.concepts[source.address] == target:
|
|
@@ -3517,11 +3566,11 @@ class Environment(BaseModel):
|
|
|
3517
3566
|
for k, v in self.concepts.items():
|
|
3518
3567
|
|
|
3519
3568
|
if v.address == target.address:
|
|
3520
|
-
v.pseudonyms
|
|
3569
|
+
v.pseudonyms.add(source.address)
|
|
3521
3570
|
if v.address == source.address:
|
|
3522
3571
|
replacements[k] = target
|
|
3523
3572
|
self.canonical_map[k] = target.address
|
|
3524
|
-
v.pseudonyms
|
|
3573
|
+
v.pseudonyms.add(target.address)
|
|
3525
3574
|
# we need to update keys and grains of all concepts
|
|
3526
3575
|
else:
|
|
3527
3576
|
replacements[k] = v.with_merge(source, target, modifiers)
|
|
@@ -3599,6 +3648,7 @@ class Comparison(
|
|
|
3599
3648
|
MagicConstants,
|
|
3600
3649
|
WindowItem,
|
|
3601
3650
|
AggregateWrapper,
|
|
3651
|
+
TupleWrapper,
|
|
3602
3652
|
]
|
|
3603
3653
|
operator: ComparisonOperator
|
|
3604
3654
|
|
|
@@ -4258,13 +4308,23 @@ class ProcessedQuery(BaseModel):
|
|
|
4258
4308
|
order_by: Optional[OrderBy] = None
|
|
4259
4309
|
|
|
4260
4310
|
|
|
4261
|
-
class
|
|
4311
|
+
class PersistQueryMixin(BaseModel):
|
|
4262
4312
|
output_to: MaterializedDataset
|
|
4263
4313
|
datasource: Datasource
|
|
4264
4314
|
# base:Dataset
|
|
4265
4315
|
|
|
4266
4316
|
|
|
4267
|
-
class ProcessedQueryPersist(ProcessedQuery,
|
|
4317
|
+
class ProcessedQueryPersist(ProcessedQuery, PersistQueryMixin):
|
|
4318
|
+
pass
|
|
4319
|
+
|
|
4320
|
+
|
|
4321
|
+
class CopyQueryMixin(BaseModel):
|
|
4322
|
+
target: str
|
|
4323
|
+
target_type: IOType
|
|
4324
|
+
# base:Dataset
|
|
4325
|
+
|
|
4326
|
+
|
|
4327
|
+
class ProcessedCopyStatement(ProcessedQuery, CopyQueryMixin):
|
|
4268
4328
|
pass
|
|
4269
4329
|
|
|
4270
4330
|
|
|
@@ -4523,6 +4583,37 @@ class Parenthetical(
|
|
|
4523
4583
|
return base
|
|
4524
4584
|
|
|
4525
4585
|
|
|
4586
|
+
class TupleWrapper(Generic[VT], tuple):
|
|
4587
|
+
"""Used to distinguish parsed tuple objects from other tuples"""
|
|
4588
|
+
|
|
4589
|
+
def __init__(self, val, type: DataType, **kwargs):
|
|
4590
|
+
super().__init__()
|
|
4591
|
+
self.type = type
|
|
4592
|
+
self.val = val
|
|
4593
|
+
|
|
4594
|
+
def __getnewargs__(self):
|
|
4595
|
+
return (self.val, self.type)
|
|
4596
|
+
|
|
4597
|
+
def __new__(cls, val, type: DataType, **kwargs):
|
|
4598
|
+
return super().__new__(cls, tuple(val))
|
|
4599
|
+
# self.type = type
|
|
4600
|
+
|
|
4601
|
+
@classmethod
|
|
4602
|
+
def __get_pydantic_core_schema__(
|
|
4603
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
4604
|
+
) -> core_schema.CoreSchema:
|
|
4605
|
+
args = get_args(source_type)
|
|
4606
|
+
if args:
|
|
4607
|
+
schema = handler(Tuple[args]) # type: ignore
|
|
4608
|
+
else:
|
|
4609
|
+
schema = handler(Tuple)
|
|
4610
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
4611
|
+
|
|
4612
|
+
@classmethod
|
|
4613
|
+
def validate(cls, v):
|
|
4614
|
+
return cls(v, type=arg_to_datatype(v[0]))
|
|
4615
|
+
|
|
4616
|
+
|
|
4526
4617
|
class PersistStatement(BaseModel):
|
|
4527
4618
|
datasource: Datasource
|
|
4528
4619
|
select: SelectStatement
|
|
@@ -4589,6 +4680,12 @@ def list_to_wrapper(args):
|
|
|
4589
4680
|
return ListWrapper(args, type=types[0])
|
|
4590
4681
|
|
|
4591
4682
|
|
|
4683
|
+
def tuple_to_wrapper(args):
|
|
4684
|
+
types = [arg_to_datatype(arg) for arg in args]
|
|
4685
|
+
assert len(set(types)) == 1
|
|
4686
|
+
return TupleWrapper(args, type=types[0])
|
|
4687
|
+
|
|
4688
|
+
|
|
4592
4689
|
def dict_to_map_wrapper(arg):
|
|
4593
4690
|
key_types = [arg_to_datatype(arg) for arg in arg.keys()]
|
|
4594
4691
|
|
|
@@ -4644,6 +4741,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType | Numeric
|
|
|
4644
4741
|
return arg.function.output_datatype
|
|
4645
4742
|
elif isinstance(arg, Parenthetical):
|
|
4646
4743
|
return arg_to_datatype(arg.content)
|
|
4744
|
+
elif isinstance(arg, TupleWrapper):
|
|
4745
|
+
return ListType(type=arg.type)
|
|
4647
4746
|
elif isinstance(arg, WindowItem):
|
|
4648
4747
|
if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
|
|
4649
4748
|
return DataType.INTEGER
|
|
@@ -455,7 +455,20 @@ def generate_node(
|
|
|
455
455
|
if x.address not in [y.address for y in root_targets]
|
|
456
456
|
and x not in ex_resolve.grain.components
|
|
457
457
|
]
|
|
458
|
-
|
|
458
|
+
|
|
459
|
+
pseudonyms = [
|
|
460
|
+
x
|
|
461
|
+
for x in extra
|
|
462
|
+
if any(x.address in y.pseudonyms for y in root_targets)
|
|
463
|
+
]
|
|
464
|
+
# if we're only connected by a pseudonym, keep those in output
|
|
465
|
+
expanded.set_output_concepts(root_targets + pseudonyms)
|
|
466
|
+
# but hide them
|
|
467
|
+
if pseudonyms:
|
|
468
|
+
logger.info(
|
|
469
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Hiding pseudonyms{[c.address for c in pseudonyms]}"
|
|
470
|
+
)
|
|
471
|
+
expanded.hide_output_concepts(pseudonyms)
|
|
459
472
|
|
|
460
473
|
logger.info(
|
|
461
474
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in root_targets]} via concept addition; removing extra {[c.address for c in extra]}"
|
|
@@ -480,6 +493,7 @@ def validate_concept(
|
|
|
480
493
|
found_map: dict[str, set[Concept]],
|
|
481
494
|
accept_partial: bool,
|
|
482
495
|
seen: set[str],
|
|
496
|
+
environment: Environment,
|
|
483
497
|
):
|
|
484
498
|
|
|
485
499
|
found_map[str(node)].add(concept)
|
|
@@ -500,10 +514,11 @@ def validate_concept(
|
|
|
500
514
|
if accept_partial:
|
|
501
515
|
found_addresses.add(concept.address)
|
|
502
516
|
found_map[str(node)].add(concept)
|
|
503
|
-
for
|
|
504
|
-
|
|
517
|
+
for v_address in concept.pseudonyms:
|
|
518
|
+
v = environment.concepts[v_address]
|
|
519
|
+
if v == concept.address:
|
|
505
520
|
return
|
|
506
|
-
if v
|
|
521
|
+
if v in seen:
|
|
507
522
|
return
|
|
508
523
|
validate_concept(
|
|
509
524
|
v,
|
|
@@ -515,10 +530,12 @@ def validate_concept(
|
|
|
515
530
|
found_map,
|
|
516
531
|
accept_partial,
|
|
517
532
|
seen=seen,
|
|
533
|
+
environment=environment,
|
|
518
534
|
)
|
|
519
535
|
|
|
520
536
|
|
|
521
537
|
def validate_stack(
|
|
538
|
+
environment: Environment,
|
|
522
539
|
stack: List[StrategyNode],
|
|
523
540
|
concepts: List[Concept],
|
|
524
541
|
mandatory_with_filter: List[Concept],
|
|
@@ -546,6 +563,7 @@ def validate_stack(
|
|
|
546
563
|
found_map,
|
|
547
564
|
accept_partial,
|
|
548
565
|
seen,
|
|
566
|
+
environment,
|
|
549
567
|
)
|
|
550
568
|
for concept in node.virtual_output_concepts:
|
|
551
569
|
if concept.address in non_partial_addresses:
|
|
@@ -807,6 +825,7 @@ def _search_concepts(
|
|
|
807
825
|
break
|
|
808
826
|
attempted.add(priority_concept.address)
|
|
809
827
|
complete, found, missing, partial, virtual = validate_stack(
|
|
828
|
+
environment,
|
|
810
829
|
stack,
|
|
811
830
|
mandatory_list,
|
|
812
831
|
completion_mandatory,
|
|
@@ -209,9 +209,9 @@ def resolve_weak_components(
|
|
|
209
209
|
for c in all_concepts
|
|
210
210
|
if "__preql_internal" not in c.address
|
|
211
211
|
]
|
|
212
|
-
synonyms:
|
|
212
|
+
synonyms: set[str] = set()
|
|
213
213
|
for x in all_concepts:
|
|
214
|
-
synonyms
|
|
214
|
+
synonyms = synonyms.union(x.pseudonyms)
|
|
215
215
|
while break_flag is not True:
|
|
216
216
|
count += 1
|
|
217
217
|
if count > AMBIGUITY_CHECK_LIMIT:
|
|
@@ -385,9 +385,9 @@ def gen_merge_node(
|
|
|
385
385
|
# one concept handling may need to be kicked to alias
|
|
386
386
|
if len(all_concepts) == 1:
|
|
387
387
|
concept = all_concepts[0]
|
|
388
|
-
for
|
|
388
|
+
for v in concept.pseudonyms:
|
|
389
389
|
test = subgraphs_to_merge_node(
|
|
390
|
-
[[concept, v]],
|
|
390
|
+
[[concept, environment.alias_origin_lookup[v]]],
|
|
391
391
|
g=g,
|
|
392
392
|
all_concepts=[concept],
|
|
393
393
|
environment=environment,
|
|
@@ -28,6 +28,7 @@ from trilogy.core.models import (
|
|
|
28
28
|
DatePart,
|
|
29
29
|
NumericType,
|
|
30
30
|
ListType,
|
|
31
|
+
TupleWrapper,
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
from trilogy.core.enums import Purpose, Granularity, BooleanOperator, Modifier
|
|
@@ -161,17 +162,21 @@ def add_node_join_concept(
|
|
|
161
162
|
concept: Concept,
|
|
162
163
|
datasource: Datasource | QueryDatasource,
|
|
163
164
|
concepts: List[Concept],
|
|
165
|
+
environment: Environment,
|
|
164
166
|
):
|
|
165
167
|
|
|
166
168
|
concepts.append(concept)
|
|
167
169
|
|
|
168
170
|
graph.add_node(concept.address, type=NodeType.CONCEPT)
|
|
169
171
|
graph.add_edge(datasource.identifier, concept.address)
|
|
170
|
-
for
|
|
172
|
+
for v_address in concept.pseudonyms:
|
|
173
|
+
v = environment.alias_origin_lookup.get(
|
|
174
|
+
v_address, environment.concepts[v_address]
|
|
175
|
+
)
|
|
171
176
|
if v in concepts:
|
|
172
177
|
continue
|
|
173
|
-
if v
|
|
174
|
-
add_node_join_concept(graph, v, datasource, concepts)
|
|
178
|
+
if v != concept.address:
|
|
179
|
+
add_node_join_concept(graph, v, datasource, concepts, environment)
|
|
175
180
|
|
|
176
181
|
|
|
177
182
|
def get_node_joins(
|
|
@@ -185,7 +190,7 @@ def get_node_joins(
|
|
|
185
190
|
for datasource in datasources:
|
|
186
191
|
graph.add_node(datasource.identifier, type=NodeType.NODE)
|
|
187
192
|
for concept in datasource.output_concepts:
|
|
188
|
-
add_node_join_concept(graph, concept, datasource, concepts)
|
|
193
|
+
add_node_join_concept(graph, concept, datasource, concepts, environment)
|
|
189
194
|
|
|
190
195
|
# add edges for every constant to every datasource
|
|
191
196
|
for datasource in datasources:
|
|
@@ -194,7 +199,6 @@ def get_node_joins(
|
|
|
194
199
|
for node in graph.nodes:
|
|
195
200
|
if graph.nodes[node]["type"] == NodeType.NODE:
|
|
196
201
|
graph.add_edge(node, concept.address)
|
|
197
|
-
|
|
198
202
|
joins: defaultdict[str, set] = defaultdict(set)
|
|
199
203
|
identifier_map: dict[str, Datasource | QueryDatasource] = {
|
|
200
204
|
x.identifier: x for x in datasources
|
|
@@ -205,7 +209,7 @@ def get_node_joins(
|
|
|
205
209
|
# if we're looking up a pseudonym, we would have gotten the remapped value
|
|
206
210
|
# so double check we got what we were looking for
|
|
207
211
|
if env_lookup.address == g.address:
|
|
208
|
-
grain_pseudonyms.update(env_lookup.pseudonyms
|
|
212
|
+
grain_pseudonyms.update(env_lookup.pseudonyms)
|
|
209
213
|
|
|
210
214
|
node_list = sorted(
|
|
211
215
|
[x for x in graph.nodes if graph.nodes[x]["type"] == NodeType.NODE],
|
|
@@ -422,6 +426,7 @@ def is_scalar_condition(
|
|
|
422
426
|
| NumericType
|
|
423
427
|
| DatePart
|
|
424
428
|
| ListWrapper[Any]
|
|
429
|
+
| TupleWrapper[Any]
|
|
425
430
|
),
|
|
426
431
|
materialized: set[str] | None = None,
|
|
427
432
|
) -> bool:
|
trilogy/core/query_processor.py
CHANGED
|
@@ -26,6 +26,8 @@ from trilogy.core.models import (
|
|
|
26
26
|
BaseJoin,
|
|
27
27
|
InstantiatedUnnestJoin,
|
|
28
28
|
Conditional,
|
|
29
|
+
ProcessedCopyStatement,
|
|
30
|
+
CopyStatement,
|
|
29
31
|
)
|
|
30
32
|
|
|
31
33
|
from trilogy.utility import unique
|
|
@@ -418,6 +420,24 @@ def process_persist(
|
|
|
418
420
|
)
|
|
419
421
|
|
|
420
422
|
|
|
423
|
+
def process_copy(
|
|
424
|
+
environment: Environment,
|
|
425
|
+
statement: CopyStatement,
|
|
426
|
+
hooks: List[BaseHook] | None = None,
|
|
427
|
+
) -> ProcessedCopyStatement:
|
|
428
|
+
select = process_query(
|
|
429
|
+
environment=environment, statement=statement.select, hooks=hooks
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# build our object to return
|
|
433
|
+
arg_dict = {k: v for k, v in select.__dict__.items()}
|
|
434
|
+
return ProcessedCopyStatement(
|
|
435
|
+
**arg_dict,
|
|
436
|
+
target=statement.target,
|
|
437
|
+
target_type=statement.target_type,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
|
|
421
441
|
def process_query(
|
|
422
442
|
environment: Environment,
|
|
423
443
|
statement: SelectStatement | MultiSelectStatement,
|
trilogy/dialect/base.py
CHANGED
|
@@ -35,6 +35,7 @@ from trilogy.core.models import (
|
|
|
35
35
|
Environment,
|
|
36
36
|
RawColumnExpr,
|
|
37
37
|
ListWrapper,
|
|
38
|
+
TupleWrapper,
|
|
38
39
|
MapWrapper,
|
|
39
40
|
ShowStatement,
|
|
40
41
|
RowsetItem,
|
|
@@ -49,8 +50,10 @@ from trilogy.core.models import (
|
|
|
49
50
|
StructType,
|
|
50
51
|
MergeStatementV2,
|
|
51
52
|
Datasource,
|
|
53
|
+
CopyStatement,
|
|
54
|
+
ProcessedCopyStatement,
|
|
52
55
|
)
|
|
53
|
-
from trilogy.core.query_processor import process_query, process_persist
|
|
56
|
+
from trilogy.core.query_processor import process_query, process_persist, process_copy
|
|
54
57
|
from trilogy.dialect.common import render_join, render_unnest
|
|
55
58
|
from trilogy.hooks.base_hook import BaseHook
|
|
56
59
|
from trilogy.core.enums import UnnestMode
|
|
@@ -260,7 +263,11 @@ class BaseDialect:
|
|
|
260
263
|
) -> str:
|
|
261
264
|
result = None
|
|
262
265
|
if c.pseudonyms:
|
|
263
|
-
for
|
|
266
|
+
candidates = [y for y in [cte.get_concept(x) for x in c.pseudonyms] if y]
|
|
267
|
+
logger.debug(
|
|
268
|
+
f"{LOGGER_PREFIX} [{c.address}] pseudonym candidates are {[x.address for x in candidates]}"
|
|
269
|
+
)
|
|
270
|
+
for candidate in [c] + candidates:
|
|
264
271
|
try:
|
|
265
272
|
logger.debug(
|
|
266
273
|
f"{LOGGER_PREFIX} [{c.address}] Attempting rendering w/ candidate {candidate.address}"
|
|
@@ -391,6 +398,7 @@ class BaseDialect:
|
|
|
391
398
|
StructType,
|
|
392
399
|
ListType,
|
|
393
400
|
ListWrapper[Any],
|
|
401
|
+
TupleWrapper[Any],
|
|
394
402
|
DatePart,
|
|
395
403
|
CaseWhen,
|
|
396
404
|
CaseElse,
|
|
@@ -430,7 +438,7 @@ class BaseDialect:
|
|
|
430
438
|
f"Missing source CTE for {e.right.address}"
|
|
431
439
|
)
|
|
432
440
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
433
|
-
elif isinstance(e.right, (ListWrapper, Parenthetical, list)):
|
|
441
|
+
elif isinstance(e.right, (ListWrapper, TupleWrapper, Parenthetical, list)):
|
|
434
442
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
435
443
|
|
|
436
444
|
elif isinstance(
|
|
@@ -511,6 +519,8 @@ class BaseDialect:
|
|
|
511
519
|
return str(e)
|
|
512
520
|
elif isinstance(e, ListWrapper):
|
|
513
521
|
return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])}]"
|
|
522
|
+
elif isinstance(e, TupleWrapper):
|
|
523
|
+
return f"({','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])})"
|
|
514
524
|
elif isinstance(e, MapWrapper):
|
|
515
525
|
return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}:{self.render_expr(v, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}' for k, v in e.items()])}}}"
|
|
516
526
|
elif isinstance(e, list):
|
|
@@ -662,6 +672,7 @@ class BaseDialect:
|
|
|
662
672
|
| ImportStatement
|
|
663
673
|
| RawSQLStatement
|
|
664
674
|
| MergeStatementV2
|
|
675
|
+
| CopyStatement
|
|
665
676
|
],
|
|
666
677
|
hooks: Optional[List[BaseHook]] = None,
|
|
667
678
|
) -> List[
|
|
@@ -675,6 +686,7 @@ class BaseDialect:
|
|
|
675
686
|
| ProcessedQueryPersist
|
|
676
687
|
| ProcessedShowStatement
|
|
677
688
|
| ProcessedRawSQLStatement
|
|
689
|
+
| ProcessedCopyStatement
|
|
678
690
|
] = []
|
|
679
691
|
for statement in statements:
|
|
680
692
|
if isinstance(statement, PersistStatement):
|
|
@@ -683,6 +695,12 @@ class BaseDialect:
|
|
|
683
695
|
hook.process_persist_info(statement)
|
|
684
696
|
persist = process_persist(environment, statement, hooks=hooks)
|
|
685
697
|
output.append(persist)
|
|
698
|
+
elif isinstance(statement, CopyStatement):
|
|
699
|
+
if hooks:
|
|
700
|
+
for hook in hooks:
|
|
701
|
+
hook.process_select_info(statement.select)
|
|
702
|
+
copy = process_copy(environment, statement, hooks=hooks)
|
|
703
|
+
output.append(copy)
|
|
686
704
|
elif isinstance(statement, SelectStatement):
|
|
687
705
|
if hooks:
|
|
688
706
|
for hook in hooks:
|
trilogy/executor.py
CHANGED
|
@@ -10,6 +10,7 @@ from trilogy.core.models import (
|
|
|
10
10
|
ProcessedShowStatement,
|
|
11
11
|
ProcessedQueryPersist,
|
|
12
12
|
ProcessedRawSQLStatement,
|
|
13
|
+
ProcessedCopyStatement,
|
|
13
14
|
RawSQLStatement,
|
|
14
15
|
MultiSelectStatement,
|
|
15
16
|
SelectStatement,
|
|
@@ -18,9 +19,11 @@ from trilogy.core.models import (
|
|
|
18
19
|
Concept,
|
|
19
20
|
ConceptDeclarationStatement,
|
|
20
21
|
Datasource,
|
|
22
|
+
CopyStatement,
|
|
21
23
|
)
|
|
22
24
|
from trilogy.dialect.base import BaseDialect
|
|
23
25
|
from trilogy.dialect.enums import Dialects
|
|
26
|
+
from trilogy.core.enums import IOType
|
|
24
27
|
from trilogy.parser import parse_text
|
|
25
28
|
from trilogy.hooks.base_hook import BaseHook
|
|
26
29
|
from pathlib import Path
|
|
@@ -94,7 +97,15 @@ class Executor(object):
|
|
|
94
97
|
self.connection = self.engine.connect()
|
|
95
98
|
|
|
96
99
|
def execute_statement(self, statement) -> Optional[CursorResult]:
|
|
97
|
-
if not isinstance(
|
|
100
|
+
if not isinstance(
|
|
101
|
+
statement,
|
|
102
|
+
(
|
|
103
|
+
ProcessedQuery,
|
|
104
|
+
ProcessedShowStatement,
|
|
105
|
+
ProcessedQueryPersist,
|
|
106
|
+
ProcessedCopyStatement,
|
|
107
|
+
),
|
|
108
|
+
):
|
|
98
109
|
return None
|
|
99
110
|
return self.execute_query(statement)
|
|
100
111
|
|
|
@@ -183,12 +194,33 @@ class Executor(object):
|
|
|
183
194
|
|
|
184
195
|
@execute_query.register
|
|
185
196
|
def _(self, query: ProcessedQueryPersist) -> CursorResult:
|
|
197
|
+
|
|
186
198
|
sql = self.generator.compile_statement(query)
|
|
187
|
-
|
|
199
|
+
|
|
188
200
|
output = self.connection.execute(text(sql))
|
|
189
201
|
self.environment.add_datasource(query.datasource)
|
|
190
202
|
return output
|
|
191
203
|
|
|
204
|
+
@execute_query.register
|
|
205
|
+
def _(self, query: ProcessedCopyStatement) -> CursorResult:
|
|
206
|
+
sql = self.generator.compile_statement(query)
|
|
207
|
+
output: CursorResult = self.connection.execute(text(sql))
|
|
208
|
+
if query.target_type == IOType.CSV:
|
|
209
|
+
import csv
|
|
210
|
+
|
|
211
|
+
with open(query.target, "w", newline="", encoding="utf-8") as f:
|
|
212
|
+
outcsv = csv.writer(f)
|
|
213
|
+
outcsv.writerow(output.keys())
|
|
214
|
+
outcsv.writerows(output)
|
|
215
|
+
else:
|
|
216
|
+
raise NotImplementedError(f"Unsupported IOType {query.target_type}")
|
|
217
|
+
# now return the query we ran through IO
|
|
218
|
+
# TODO: instead return how many rows were written?
|
|
219
|
+
return generate_result_set(
|
|
220
|
+
query.output_columns,
|
|
221
|
+
[self.generator.compile_statement(query)],
|
|
222
|
+
)
|
|
223
|
+
|
|
192
224
|
@singledispatchmethod
|
|
193
225
|
def generate_sql(self, command) -> list[str]:
|
|
194
226
|
raise NotImplementedError(
|
|
@@ -251,39 +283,17 @@ class Executor(object):
|
|
|
251
283
|
| ProcessedQueryPersist
|
|
252
284
|
| ProcessedShowStatement
|
|
253
285
|
| ProcessedRawSQLStatement
|
|
286
|
+
| ProcessedCopyStatement
|
|
254
287
|
]:
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
generatable = [
|
|
258
|
-
x
|
|
259
|
-
for x in parsed
|
|
260
|
-
if isinstance(
|
|
261
|
-
x,
|
|
262
|
-
(
|
|
263
|
-
SelectStatement,
|
|
264
|
-
PersistStatement,
|
|
265
|
-
MultiSelectStatement,
|
|
266
|
-
ShowStatement,
|
|
267
|
-
RawSQLStatement,
|
|
268
|
-
),
|
|
269
|
-
)
|
|
270
|
-
]
|
|
271
|
-
sql = []
|
|
272
|
-
while generatable:
|
|
273
|
-
t = generatable.pop(0)
|
|
274
|
-
x = self.generator.generate_queries(
|
|
275
|
-
self.environment, [t], hooks=self.hooks
|
|
276
|
-
)[0]
|
|
277
|
-
if persist and isinstance(x, ProcessedQueryPersist):
|
|
278
|
-
self.environment.add_datasource(x.datasource)
|
|
279
|
-
sql.append(x)
|
|
280
|
-
return sql
|
|
288
|
+
|
|
289
|
+
return list(self.parse_text_generator(command, persist=persist))
|
|
281
290
|
|
|
282
291
|
def parse_text_generator(self, command: str, persist: bool = False) -> Generator[
|
|
283
292
|
ProcessedQuery
|
|
284
293
|
| ProcessedQueryPersist
|
|
285
294
|
| ProcessedShowStatement
|
|
286
|
-
| ProcessedRawSQLStatement
|
|
295
|
+
| ProcessedRawSQLStatement
|
|
296
|
+
| ProcessedCopyStatement,
|
|
287
297
|
None,
|
|
288
298
|
None,
|
|
289
299
|
]:
|
|
@@ -300,6 +310,7 @@ class Executor(object):
|
|
|
300
310
|
MultiSelectStatement,
|
|
301
311
|
ShowStatement,
|
|
302
312
|
RawSQLStatement,
|
|
313
|
+
CopyStatement,
|
|
303
314
|
),
|
|
304
315
|
)
|
|
305
316
|
]
|
|
@@ -340,13 +351,7 @@ class Executor(object):
|
|
|
340
351
|
)
|
|
341
352
|
)
|
|
342
353
|
continue
|
|
343
|
-
|
|
344
|
-
logger.debug(compiled_sql)
|
|
345
|
-
|
|
346
|
-
output.append(self.connection.execute(text(compiled_sql)))
|
|
347
|
-
# generalize post-run success hooks
|
|
348
|
-
if isinstance(statement, ProcessedQueryPersist):
|
|
349
|
-
self.environment.add_datasource(statement.datasource)
|
|
354
|
+
output.append(self.execute_query(statement))
|
|
350
355
|
return output
|
|
351
356
|
|
|
352
357
|
def execute_file(self, file: str | Path) -> List[CursorResult]:
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from os.path import dirname, join
|
|
2
2
|
from typing import List, Optional, Tuple, Union
|
|
3
3
|
from re import IGNORECASE
|
|
4
|
-
from lark import Lark, Transformer, v_args
|
|
4
|
+
from lark import Lark, Transformer, v_args, Tree
|
|
5
5
|
from lark.exceptions import (
|
|
6
6
|
UnexpectedCharacters,
|
|
7
7
|
UnexpectedEOF,
|
|
@@ -31,6 +31,7 @@ from trilogy.core.enums import (
|
|
|
31
31
|
DatePart,
|
|
32
32
|
ShowCategory,
|
|
33
33
|
FunctionClass,
|
|
34
|
+
IOType,
|
|
34
35
|
)
|
|
35
36
|
from trilogy.core.exceptions import InvalidSyntaxException, UndefinedConceptException
|
|
36
37
|
from trilogy.core.functions import (
|
|
@@ -84,6 +85,7 @@ from trilogy.core.models import (
|
|
|
84
85
|
PersistStatement,
|
|
85
86
|
Query,
|
|
86
87
|
RawSQLStatement,
|
|
88
|
+
CopyStatement,
|
|
87
89
|
SelectStatement,
|
|
88
90
|
SelectItem,
|
|
89
91
|
WhereClause,
|
|
@@ -105,9 +107,11 @@ from trilogy.core.models import (
|
|
|
105
107
|
ConceptDerivation,
|
|
106
108
|
RowsetDerivationStatement,
|
|
107
109
|
list_to_wrapper,
|
|
110
|
+
tuple_to_wrapper,
|
|
108
111
|
dict_to_map_wrapper,
|
|
109
112
|
NumericType,
|
|
110
113
|
HavingClause,
|
|
114
|
+
TupleWrapper,
|
|
111
115
|
)
|
|
112
116
|
from trilogy.parsing.exceptions import ParseError
|
|
113
117
|
from trilogy.parsing.common import (
|
|
@@ -748,13 +752,29 @@ class ParseToObjects(Transformer):
|
|
|
748
752
|
def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
|
|
749
753
|
return RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
|
|
750
754
|
|
|
755
|
+
def COPY_TYPE(self, args) -> IOType:
|
|
756
|
+
return IOType(args.value)
|
|
757
|
+
|
|
758
|
+
@v_args(meta=True)
|
|
759
|
+
def copy_statement(self, meta: Meta, args) -> CopyStatement:
|
|
760
|
+
|
|
761
|
+
return CopyStatement(
|
|
762
|
+
target=args[1],
|
|
763
|
+
target_type=args[0],
|
|
764
|
+
meta=Metadata(line_number=meta.line),
|
|
765
|
+
select=args[-1],
|
|
766
|
+
)
|
|
767
|
+
|
|
751
768
|
def resolve_import_address(self, address) -> str:
|
|
752
769
|
with open(address, "r", encoding="utf-8") as f:
|
|
753
770
|
text = f.read()
|
|
754
771
|
return text
|
|
755
772
|
|
|
756
773
|
def import_statement(self, args: list[str]) -> ImportStatement:
|
|
757
|
-
|
|
774
|
+
if len(args) == 2:
|
|
775
|
+
alias = args[-1]
|
|
776
|
+
else:
|
|
777
|
+
alias = self.environment.namespace
|
|
758
778
|
path = args[0].split(".")
|
|
759
779
|
|
|
760
780
|
target = join(self.environment.working_path, *path) + ".preql"
|
|
@@ -1064,6 +1084,9 @@ class ParseToObjects(Transformer):
|
|
|
1064
1084
|
def array_lit(self, args):
|
|
1065
1085
|
return list_to_wrapper(args)
|
|
1066
1086
|
|
|
1087
|
+
def tuple_lit(self, args):
|
|
1088
|
+
return tuple_to_wrapper(args)
|
|
1089
|
+
|
|
1067
1090
|
def struct_lit(self, args):
|
|
1068
1091
|
|
|
1069
1092
|
zipped = dict(zip(args[::2], args[1::2]))
|
|
@@ -1124,12 +1147,18 @@ class ParseToObjects(Transformer):
|
|
|
1124
1147
|
|
|
1125
1148
|
while isinstance(right, Parenthetical) and isinstance(
|
|
1126
1149
|
right.content,
|
|
1127
|
-
(
|
|
1150
|
+
(
|
|
1151
|
+
Concept,
|
|
1152
|
+
Function,
|
|
1153
|
+
FilterItem,
|
|
1154
|
+
WindowItem,
|
|
1155
|
+
AggregateWrapper,
|
|
1156
|
+
ListWrapper,
|
|
1157
|
+
TupleWrapper,
|
|
1158
|
+
),
|
|
1128
1159
|
):
|
|
1129
1160
|
right = right.content
|
|
1130
|
-
if isinstance(
|
|
1131
|
-
right, (Function, FilterItem, WindowItem, AggregateWrapper, ListWrapper)
|
|
1132
|
-
):
|
|
1161
|
+
if isinstance(right, (Function, FilterItem, WindowItem, AggregateWrapper)):
|
|
1133
1162
|
right = arbitrary_to_concept(
|
|
1134
1163
|
right,
|
|
1135
1164
|
namespace=self.environment.namespace,
|
|
@@ -1142,7 +1171,7 @@ class ParseToObjects(Transformer):
|
|
|
1142
1171
|
)
|
|
1143
1172
|
|
|
1144
1173
|
def expr_tuple(self, args):
|
|
1145
|
-
return
|
|
1174
|
+
return TupleWrapper(content=tuple(args))
|
|
1146
1175
|
|
|
1147
1176
|
def parenthetical(self, args):
|
|
1148
1177
|
return Parenthetical(content=args[0])
|
|
@@ -1840,10 +1869,12 @@ def unpack_visit_error(e: VisitError):
|
|
|
1840
1869
|
unpack_visit_error(e.orig_exc)
|
|
1841
1870
|
elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
|
|
1842
1871
|
raise e.orig_exc
|
|
1843
|
-
elif isinstance(e.orig_exc, SyntaxError):
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1872
|
+
elif isinstance(e.orig_exc, (SyntaxError, TypeError)):
|
|
1873
|
+
if isinstance(e.obj, Tree):
|
|
1874
|
+
raise InvalidSyntaxException(
|
|
1875
|
+
str(e.orig_exc) + " in " + str(e.rule) + f" Line: {e.obj.meta.line}"
|
|
1876
|
+
)
|
|
1877
|
+
raise InvalidSyntaxException(str(e.orig_exc))
|
|
1847
1878
|
raise e
|
|
1848
1879
|
|
|
1849
1880
|
|
trilogy/parsing/render.py
CHANGED
|
@@ -32,6 +32,8 @@ from trilogy.core.models import (
|
|
|
32
32
|
AggregateWrapper,
|
|
33
33
|
PersistStatement,
|
|
34
34
|
ListWrapper,
|
|
35
|
+
ListType,
|
|
36
|
+
TupleWrapper,
|
|
35
37
|
RowsetDerivationStatement,
|
|
36
38
|
MultiSelectStatement,
|
|
37
39
|
OrderBy,
|
|
@@ -40,6 +42,7 @@ from trilogy.core.models import (
|
|
|
40
42
|
RawSQLStatement,
|
|
41
43
|
NumericType,
|
|
42
44
|
MergeStatementV2,
|
|
45
|
+
CopyStatement,
|
|
43
46
|
)
|
|
44
47
|
from trilogy.core.enums import Modifier
|
|
45
48
|
|
|
@@ -180,6 +183,10 @@ class Renderer:
|
|
|
180
183
|
def _(self, arg: ListWrapper):
|
|
181
184
|
return "[" + ", ".join([self.to_string(x) for x in arg]) + "]"
|
|
182
185
|
|
|
186
|
+
@to_string.register
|
|
187
|
+
def _(self, arg: TupleWrapper):
|
|
188
|
+
return "(" + ", ".join([self.to_string(x) for x in arg]) + ")"
|
|
189
|
+
|
|
183
190
|
@to_string.register
|
|
184
191
|
def _(self, arg: DatePart):
|
|
185
192
|
return arg.value
|
|
@@ -211,21 +218,30 @@ class Renderer:
|
|
|
211
218
|
base_description = concept.metadata.description
|
|
212
219
|
else:
|
|
213
220
|
base_description = None
|
|
214
|
-
if concept.namespace:
|
|
221
|
+
if concept.namespace and concept.namespace != DEFAULT_NAMESPACE:
|
|
215
222
|
namespace = f"{concept.namespace}."
|
|
216
223
|
else:
|
|
217
224
|
namespace = ""
|
|
218
225
|
if not concept.lineage:
|
|
219
226
|
if concept.purpose == Purpose.PROPERTY and concept.keys:
|
|
220
|
-
|
|
227
|
+
keys = ",".join([self.to_string(key) for key in concept.keys])
|
|
228
|
+
output = f"{concept.purpose.value} <{keys}>.{namespace}{concept.name} {self.to_string(concept.datatype)};"
|
|
221
229
|
else:
|
|
222
|
-
output = f"{concept.purpose.value} {namespace}{concept.name} {concept.datatype
|
|
230
|
+
output = f"{concept.purpose.value} {namespace}{concept.name} {self.to_string(concept.datatype)};"
|
|
223
231
|
else:
|
|
224
232
|
output = f"{concept.purpose.value} {namespace}{concept.name} <- {self.to_string(concept.lineage)};"
|
|
225
233
|
if base_description:
|
|
226
234
|
output += f" # {base_description}"
|
|
227
235
|
return output
|
|
228
236
|
|
|
237
|
+
@to_string.register
|
|
238
|
+
def _(self, arg: ListType):
|
|
239
|
+
return f"list<{self.to_string(arg.value_data_type)}>"
|
|
240
|
+
|
|
241
|
+
@to_string.register
|
|
242
|
+
def _(self, arg: DataType):
|
|
243
|
+
return arg.value
|
|
244
|
+
|
|
229
245
|
@to_string.register
|
|
230
246
|
def _(self, arg: ConceptDerivation):
|
|
231
247
|
# this is identical rendering;
|
|
@@ -271,6 +287,10 @@ class Renderer:
|
|
|
271
287
|
base += "\n;"
|
|
272
288
|
return base
|
|
273
289
|
|
|
290
|
+
@to_string.register
|
|
291
|
+
def _(self, arg: CopyStatement):
|
|
292
|
+
return f"COPY INTO {arg.target_type.value.upper()} '{arg.target}' FROM {self.to_string(arg.select)}"
|
|
293
|
+
|
|
274
294
|
@to_string.register
|
|
275
295
|
def _(self, arg: AlignClause):
|
|
276
296
|
return "\nALIGN\n\t" + ",\n\t".join([self.to_string(c) for c in arg.items])
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
| persist_statement
|
|
9
9
|
| rowset_derivation_statement
|
|
10
10
|
| import_statement
|
|
11
|
-
|
|
11
|
+
| copy_statement
|
|
12
12
|
| merge_statement_v2
|
|
13
13
|
| rawsql_statement
|
|
14
14
|
|
|
@@ -57,7 +57,7 @@
|
|
|
57
57
|
|
|
58
58
|
column_list : (IDENTIFIER "," )* IDENTIFIER ","?
|
|
59
59
|
|
|
60
|
-
import_statement: "import"
|
|
60
|
+
import_statement: "import" IDENTIFIER ("." IDENTIFIER)* ("as" IDENTIFIER)?
|
|
61
61
|
|
|
62
62
|
// persist_statement
|
|
63
63
|
persist_statement: "persist"i IDENTIFIER "into"i IDENTIFIER "from"i select_statement grain_clause?
|
|
@@ -78,6 +78,12 @@
|
|
|
78
78
|
// raw sql statement
|
|
79
79
|
rawsql_statement: "raw_sql"i "(" MULTILINE_STRING ")"
|
|
80
80
|
|
|
81
|
+
// copy statement
|
|
82
|
+
|
|
83
|
+
COPY_TYPE: "csv"i
|
|
84
|
+
|
|
85
|
+
copy_statement: "copy"i "into"i COPY_TYPE _string_lit "from"i select_statement
|
|
86
|
+
|
|
81
87
|
// FUNCTION blocks
|
|
82
88
|
function: raw_function
|
|
83
89
|
function_binding_item: IDENTIFIER ":" data_type
|
|
@@ -303,6 +309,8 @@
|
|
|
303
309
|
|
|
304
310
|
array_lit: "[" (literal ",")* literal ","? "]"()
|
|
305
311
|
|
|
312
|
+
tuple_lit: "(" (literal ",")* literal ","? ")"
|
|
313
|
+
|
|
306
314
|
map_lit: "{" (literal ":" literal ",")* literal ":" literal ","? "}"
|
|
307
315
|
|
|
308
316
|
_STRUCT.1: "struct("i
|
|
@@ -312,7 +320,7 @@
|
|
|
312
320
|
|
|
313
321
|
!null_lit.1: "null"i
|
|
314
322
|
|
|
315
|
-
literal: null_lit | _string_lit | int_lit | float_lit | bool_lit | array_lit | map_lit | struct_lit
|
|
323
|
+
literal: null_lit | _string_lit | int_lit | float_lit | bool_lit | array_lit | map_lit | struct_lit | tuple_lit
|
|
316
324
|
|
|
317
325
|
MODIFIER: "Optional"i | "Partial"i | "Nullable"i
|
|
318
326
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|