pytrilogy 0.0.2.21__py3-none-any.whl → 0.0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.21.dist-info → pytrilogy-0.0.2.23.dist-info}/METADATA +17 -11
- {pytrilogy-0.0.2.21.dist-info → pytrilogy-0.0.2.23.dist-info}/RECORD +24 -24
- {pytrilogy-0.0.2.21.dist-info → pytrilogy-0.0.2.23.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -0
- trilogy/core/enums.py +13 -0
- trilogy/core/env_processor.py +4 -2
- trilogy/core/ergonomics.py +11 -4
- trilogy/core/functions.py +2 -0
- trilogy/core/models.py +80 -3
- trilogy/core/processing/concept_strategies_v3.py +0 -1
- trilogy/core/processing/node_generators/node_merge_node.py +19 -6
- trilogy/core/processing/node_generators/rowset_node.py +2 -2
- trilogy/core/processing/utility.py +2 -0
- trilogy/core/query_processor.py +25 -4
- trilogy/dialect/base.py +18 -2
- trilogy/executor.py +55 -37
- trilogy/parsing/common.py +16 -2
- trilogy/parsing/parse_engine.py +43 -45
- trilogy/parsing/render.py +25 -3
- trilogy/parsing/trilogy.lark +46 -21
- {pytrilogy-0.0.2.21.dist-info → pytrilogy-0.0.2.23.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.21.dist-info → pytrilogy-0.0.2.23.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.21.dist-info → pytrilogy-0.0.2.23.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pytrilogy
|
|
3
|
-
Version: 0.0.2.
|
|
3
|
+
Version: 0.0.2.23
|
|
4
4
|
Summary: Declarative, typed query language that compiles to SQL.
|
|
5
5
|
Home-page:
|
|
6
6
|
Author:
|
|
@@ -145,7 +145,7 @@ Run the following from the directory the file is in.
|
|
|
145
145
|
trilogy run hello.trilogy duckdb
|
|
146
146
|
```
|
|
147
147
|
|
|
148
|
-

|
|
149
149
|
|
|
150
150
|
## Backends
|
|
151
151
|
|
|
@@ -158,9 +158,9 @@ The current Trilogy implementation supports these backends:
|
|
|
158
158
|
|
|
159
159
|
## Basic Example - Python
|
|
160
160
|
|
|
161
|
-
Trilogy can be run directly in python.
|
|
161
|
+
Trilogy can be run directly in python through the core SDK. Trilogy code can be defined and parsed inline or parsed out of files.
|
|
162
162
|
|
|
163
|
-
A bigquery example, similar to bigquery [the quickstart](https://cloud.google.com/bigquery/docs/quickstarts/query-public-dataset-console)
|
|
163
|
+
A bigquery example, similar to bigquery [the quickstart](https://cloud.google.com/bigquery/docs/quickstarts/query-public-dataset-console).
|
|
164
164
|
|
|
165
165
|
```python
|
|
166
166
|
|
|
@@ -224,7 +224,7 @@ and second the dialect to run.
|
|
|
224
224
|
To pass arguments to a backend, append additional --<option> flags after specifying the dialect.
|
|
225
225
|
|
|
226
226
|
Example:
|
|
227
|
-
`trilogy run key
|
|
227
|
+
`trilogy run "key x int; datasource test_source ( i:x) grain(in) address test; select x;" duckdb --path <path/to/database>`
|
|
228
228
|
|
|
229
229
|
### Bigquery Args
|
|
230
230
|
N/A, only supports default auth. In python you can pass in a custom client.
|
|
@@ -267,7 +267,7 @@ Clone repository and install requirements.txt and requirements-test.txt.
|
|
|
267
267
|
Please open an issue first to discuss what you would like to change, and then create a PR against that issue.
|
|
268
268
|
|
|
269
269
|
## Similar in space
|
|
270
|
-
Trilogy combines two aspects; a semantic layer and a query language.
|
|
270
|
+
Trilogy combines two aspects; a semantic layer and a query language. Examples of both are linked below:
|
|
271
271
|
|
|
272
272
|
Python "semantic layers" are tools for defining data access to a warehouse in a more abstract way.
|
|
273
273
|
|
|
@@ -284,20 +284,26 @@ but all are worth checking out. Please open PRs/comment for anything missed!
|
|
|
284
284
|
|
|
285
285
|
#### IMPORT
|
|
286
286
|
|
|
287
|
-
`import
|
|
287
|
+
`import [path] as [alias];`
|
|
288
288
|
|
|
289
289
|
#### CONCEPT
|
|
290
290
|
|
|
291
|
-
Types: `string | int | float | bool | date | datetime | time | numeric(scale, precision) | timestamp | interval
|
|
291
|
+
Types: `string | int | float | bool | date | datetime | time | numeric(scale, precision) | timestamp | interval | list<[type]> | map<[type], [type]> | struct<name:[type], name:[type]>`;
|
|
292
292
|
|
|
293
293
|
Key:
|
|
294
|
-
`key
|
|
294
|
+
`key [name] [type];`
|
|
295
295
|
|
|
296
296
|
Property:
|
|
297
|
-
`property
|
|
297
|
+
`property [key>].[name] [type];`
|
|
298
|
+
`property x.y int;`
|
|
299
|
+
or
|
|
300
|
+
`property <[key](,[key])?>.<name> [type];`
|
|
301
|
+
`property <x,y>.z int;`
|
|
302
|
+
|
|
298
303
|
|
|
299
304
|
Transformation:
|
|
300
|
-
`auto
|
|
305
|
+
`auto [name] <- [expression];`
|
|
306
|
+
`auto x <- y + 1;`
|
|
301
307
|
|
|
302
308
|
#### DATASOURCE
|
|
303
309
|
```sql
|
|
@@ -1,33 +1,33 @@
|
|
|
1
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
trilogy/__init__.py,sha256=ubM_nAcusmpKBFKEh_KcrJJbLAvprPMo6sNjwVkytbQ,291
|
|
2
2
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
trilogy/constants.py,sha256=
|
|
3
|
+
trilogy/constants.py,sha256=rHCe0Pe3LuB-VwCr2765QhzkUrTqZKEYPJ7rS0ykxYw,1273
|
|
4
4
|
trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
|
|
5
|
-
trilogy/executor.py,sha256=
|
|
5
|
+
trilogy/executor.py,sha256=b2pUL_Ha1H7pyhqssc2-hTd0OUO2KIcS0x6BLMPckZw,11822
|
|
6
6
|
trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
|
|
7
7
|
trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
|
|
9
9
|
trilogy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
trilogy/core/constants.py,sha256=7XaCpZn5mQmjTobbeBn56SzPWq9eMNDfzfsRU-fP0VE,171
|
|
11
|
-
trilogy/core/enums.py,sha256=
|
|
12
|
-
trilogy/core/env_processor.py,sha256=
|
|
11
|
+
trilogy/core/enums.py,sha256=y0Z0m-xtcVw1ktkQ5yD3fJYWfOa4ncN_MzCTpREAxy0,6374
|
|
12
|
+
trilogy/core/env_processor.py,sha256=z8pYgl5XpprA4ZzRvn7CVIG0hbMu04BlNkugKlT6i3o,2333
|
|
13
13
|
trilogy/core/environment_helpers.py,sha256=1miP4is4FEoci01KSAy2VZVYmlmT5TOCOALBekd2muQ,7211
|
|
14
|
-
trilogy/core/ergonomics.py,sha256=
|
|
14
|
+
trilogy/core/ergonomics.py,sha256=ASLDd0RqKWrZiG3XcKHo8nyTjaB_8xfE9t4NZ1UvGpc,1639
|
|
15
15
|
trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,561
|
|
16
|
-
trilogy/core/functions.py,sha256=
|
|
16
|
+
trilogy/core/functions.py,sha256=IhVpt3n6wEanKHnGu3oA2w6-hKIlxWpEyz7fHN66mpo,10720
|
|
17
17
|
trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
|
|
18
18
|
trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
|
|
19
|
-
trilogy/core/models.py,sha256=
|
|
19
|
+
trilogy/core/models.py,sha256=Q3lhch_w1YNrBx-jl6ch5tJKFBYop42En5bghdCGWXw,155612
|
|
20
20
|
trilogy/core/optimization.py,sha256=od_60A9F8J8Nj24MHgrxl4vwRwmBFH13TMdoMQvgVKs,7717
|
|
21
|
-
trilogy/core/query_processor.py,sha256=
|
|
21
|
+
trilogy/core/query_processor.py,sha256=sdG0XcHNBS0kuqUPztDZ1i-kpDV5LJLrO55Og2Y8hSg,17140
|
|
22
22
|
trilogy/core/optimizations/__init__.py,sha256=bWQecbeiwiDx9LJnLsa7dkWxdbl2wcnkcTN69JyP8iI,356
|
|
23
23
|
trilogy/core/optimizations/base_optimization.py,sha256=tWWT-xnTbnEU-mNi_isMNbywm8B9WTRsNFwGpeh3rqE,468
|
|
24
24
|
trilogy/core/optimizations/inline_constant.py,sha256=kHNyc2UoaPVdYfVAPAFwnWuk4sJ_IF5faRtVcDOrBtw,1110
|
|
25
25
|
trilogy/core/optimizations/inline_datasource.py,sha256=AATzQ6YrtW_1-aQFjQyTYqEYKBoMFhek7ADfBr4uUdQ,3634
|
|
26
26
|
trilogy/core/optimizations/predicate_pushdown.py,sha256=1l9WnFOSv79e341typG3tTdk0XGl1J_ToQih3LYoGIY,8435
|
|
27
27
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
trilogy/core/processing/concept_strategies_v3.py,sha256=
|
|
28
|
+
trilogy/core/processing/concept_strategies_v3.py,sha256=J4efhZCSGSo_CXVRObn7p3Lxr5Ry_G01265amsr2iIU,35294
|
|
29
29
|
trilogy/core/processing/graph_utils.py,sha256=aq-kqk4Iado2HywDxWEejWc-7PGO6Oa-ZQLAM6XWPHw,1199
|
|
30
|
-
trilogy/core/processing/utility.py,sha256=
|
|
30
|
+
trilogy/core/processing/utility.py,sha256=KkbyMyDucbvK6YuLfUNVlDZ-1Adl7hthHsZAXeIbWm8,19466
|
|
31
31
|
trilogy/core/processing/node_generators/__init__.py,sha256=-mzYkRsaRNa_dfTckYkKVFSR8h8a3ihEiPJDU_tAmDo,672
|
|
32
32
|
trilogy/core/processing/node_generators/basic_node.py,sha256=WQNgJ1MwrMS_BQ-b3XwGGB6eToDykelAVj_fesJuqe0,2069
|
|
33
33
|
trilogy/core/processing/node_generators/common.py,sha256=LwDgPlhWeuw0t07f3kX9IE5LXBdZhXfh-aY0XGk50ak,8946
|
|
@@ -35,8 +35,8 @@ trilogy/core/processing/node_generators/filter_node.py,sha256=Vz9Rb67e1dfZgnliek
|
|
|
35
35
|
trilogy/core/processing/node_generators/group_node.py,sha256=r54IVEhXW-tzod6uEHIQObrxgQt6aNySk5emWkWyqCU,4938
|
|
36
36
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=R9i_wHipxjXJyfYEwfeTw2EPpuanXVA327XyfcP2tBg,2537
|
|
37
37
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=_KO9lqzHQoy4VAviO0ttQlmK0tjaqrJj4SJPhmoIYm8,6229
|
|
38
|
-
trilogy/core/processing/node_generators/node_merge_node.py,sha256=
|
|
39
|
-
trilogy/core/processing/node_generators/rowset_node.py,sha256=
|
|
38
|
+
trilogy/core/processing/node_generators/node_merge_node.py,sha256=ArwE7QoLgSoc2WUR_z9kyWJROxn9qq3KhMcdMZdH3k0,13977
|
|
39
|
+
trilogy/core/processing/node_generators/rowset_node.py,sha256=KtdN6t2xM8CJxobc4aQX4W8uX98U6IabeuBF_FtBLR4,4583
|
|
40
40
|
trilogy/core/processing/node_generators/select_merge_node.py,sha256=MKjlXqFBSin6cTnS6n5lEcNBJsMvSefDIXOwYNVbM0s,10371
|
|
41
41
|
trilogy/core/processing/node_generators/select_node.py,sha256=nwXHQF6C-aQUIelx9dyxN2pK3muL-4-6RIqnqQqNwtw,1808
|
|
42
42
|
trilogy/core/processing/node_generators/unnest_node.py,sha256=cZ26CN338CBnd6asML1OBUtNcDzmNlFpY0Vnade4yrc,2256
|
|
@@ -50,7 +50,7 @@ trilogy/core/processing/nodes/select_node_v2.py,sha256=gS9OQgS2TSEK59BQ9R0i83pTH
|
|
|
50
50
|
trilogy/core/processing/nodes/unnest_node.py,sha256=mAmFluzm2yeeiQ6NfIB7BU_8atRGh-UJfPf9ROwbhr8,2152
|
|
51
51
|
trilogy/core/processing/nodes/window_node.py,sha256=ro0QfMFi4ZmIn5Q4D0M_vJWfnHH_C0MN7XkVkx8Gygg,1214
|
|
52
52
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
-
trilogy/dialect/base.py,sha256=
|
|
53
|
+
trilogy/dialect/base.py,sha256=BDxL4eFEmkcT8Nj8W9P4anCYkAAvfsl9G01-NpI7r6w,33802
|
|
54
54
|
trilogy/dialect/bigquery.py,sha256=15KJ-cOpBlk9O7FPviPgmg8xIydJeKx7WfmL3SSsPE8,2953
|
|
55
55
|
trilogy/dialect/common.py,sha256=Hr0mxcNxjSvhpBM5Wvb_Q7aklAuYj5aBDrW433py0Zs,4403
|
|
56
56
|
trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
|
|
@@ -66,18 +66,18 @@ trilogy/hooks/graph_hook.py,sha256=onHvMQPwj_KOS3HOTpRFiy7QLLKAiycq2MzJ_Q0Oh5Y,2
|
|
|
66
66
|
trilogy/hooks/query_debugger.py,sha256=Pe-Kw1JGngeLqQOMQb0E3-24jXEavqnPCQ-KOfTfjP8,4357
|
|
67
67
|
trilogy/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
68
|
trilogy/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
-
trilogy/parsing/common.py,sha256
|
|
69
|
+
trilogy/parsing/common.py,sha256=kbqWy30nnVc7ID-sdSDwxYomnxd3guyuIJF3yvlpQwg,9960
|
|
70
70
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
71
71
|
trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
|
|
72
72
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
73
|
-
trilogy/parsing/parse_engine.py,sha256=
|
|
74
|
-
trilogy/parsing/render.py,sha256=
|
|
75
|
-
trilogy/parsing/trilogy.lark,sha256=
|
|
73
|
+
trilogy/parsing/parse_engine.py,sha256=tcBgjfew0kAfSEt1aFo9Pu3yacEBB1KFm7v_Iobz52g,64467
|
|
74
|
+
trilogy/parsing/render.py,sha256=7mEEe5DWVAafaGl__oQE7FPn_4QhcsGT2VVp-nk1Lr8,13078
|
|
75
|
+
trilogy/parsing/trilogy.lark,sha256=ZP9USPgD8-Fq5UzIl4iGpAeGuh2JLGzSoYJhvEGOi2c,12188
|
|
76
76
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
|
|
78
|
-
pytrilogy-0.0.2.
|
|
79
|
-
pytrilogy-0.0.2.
|
|
80
|
-
pytrilogy-0.0.2.
|
|
81
|
-
pytrilogy-0.0.2.
|
|
82
|
-
pytrilogy-0.0.2.
|
|
83
|
-
pytrilogy-0.0.2.
|
|
78
|
+
pytrilogy-0.0.2.23.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
79
|
+
pytrilogy-0.0.2.23.dist-info/METADATA,sha256=w2tvs68fbIBkngB-SzrUElriNjj5eXa8PfsPniRU2vY,8403
|
|
80
|
+
pytrilogy-0.0.2.23.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
|
81
|
+
pytrilogy-0.0.2.23.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
|
|
82
|
+
pytrilogy-0.0.2.23.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
83
|
+
pytrilogy-0.0.2.23.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/constants.py
CHANGED
|
@@ -44,6 +44,7 @@ class Comments:
|
|
|
44
44
|
class Config:
|
|
45
45
|
strict_mode: bool = True
|
|
46
46
|
human_identifiers: bool = True
|
|
47
|
+
randomize_cte_names: bool = False
|
|
47
48
|
validate_missing: bool = True
|
|
48
49
|
comments: Comments = field(default_factory=Comments)
|
|
49
50
|
optimizations: Optimizations = field(default_factory=Optimizations)
|
trilogy/core/enums.py
CHANGED
|
@@ -12,6 +12,7 @@ class UnnestMode(Enum):
|
|
|
12
12
|
|
|
13
13
|
class ConceptSource(Enum):
|
|
14
14
|
MANUAL = "manual"
|
|
15
|
+
CTE = "cte"
|
|
15
16
|
PERSIST_STATEMENT = "persist_statement"
|
|
16
17
|
AUTO_DERIVED = "auto_derived"
|
|
17
18
|
|
|
@@ -206,6 +207,8 @@ class FunctionClass(Enum):
|
|
|
206
207
|
FunctionType.CURRENT_DATETIME,
|
|
207
208
|
]
|
|
208
209
|
|
|
210
|
+
ONE_TO_MANY = [FunctionType.UNNEST]
|
|
211
|
+
|
|
209
212
|
|
|
210
213
|
class Boolean(Enum):
|
|
211
214
|
TRUE = "true"
|
|
@@ -289,3 +292,13 @@ class SelectFiltering(Enum):
|
|
|
289
292
|
NONE = "none"
|
|
290
293
|
EXPLICIT = "explicit" # the filtering contains only selected values
|
|
291
294
|
IMPLICIT = "implicit" # the filtering contains unselected values
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class IOType(Enum):
|
|
298
|
+
CSV = "csv"
|
|
299
|
+
|
|
300
|
+
@classmethod
|
|
301
|
+
def _missing_(cls, value):
|
|
302
|
+
if isinstance(value, str) and value.lower() != value:
|
|
303
|
+
return IOType(value.lower())
|
|
304
|
+
return super()._missing_(value)
|
trilogy/core/env_processor.py
CHANGED
|
@@ -10,9 +10,11 @@ def add_concept(concept: Concept, g: ReferenceGraph):
|
|
|
10
10
|
g.add_node(concept)
|
|
11
11
|
# if we have sources, recursively add them
|
|
12
12
|
node_name = concept_to_node(concept)
|
|
13
|
-
if concept.
|
|
14
|
-
for source in concept.
|
|
13
|
+
if concept.concept_arguments:
|
|
14
|
+
for source in concept.concept_arguments:
|
|
15
15
|
generic = source.with_default_grain()
|
|
16
|
+
add_concept(generic, g)
|
|
17
|
+
|
|
16
18
|
g.add_edge(generic, node_name)
|
|
17
19
|
for _, pseudonym in concept.pseudonyms.items():
|
|
18
20
|
pseudonym = pseudonym.with_default_grain()
|
trilogy/core/ergonomics.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from trilogy.constants import CONFIG
|
|
2
|
+
|
|
1
3
|
# source: https://github.com/aaronbassett/Pass-phrase
|
|
2
4
|
CTE_NAMES = """quizzical
|
|
3
5
|
highfalutin
|
|
@@ -103,8 +105,6 @@ mandrill
|
|
|
103
105
|
marlin
|
|
104
106
|
monitor
|
|
105
107
|
ocelot
|
|
106
|
-
osprey
|
|
107
|
-
owl
|
|
108
108
|
petrel
|
|
109
109
|
python
|
|
110
110
|
ray
|
|
@@ -132,7 +132,6 @@ cuckoo
|
|
|
132
132
|
darter
|
|
133
133
|
dove
|
|
134
134
|
duck
|
|
135
|
-
eagle
|
|
136
135
|
falcon
|
|
137
136
|
finch
|
|
138
137
|
flamingo
|
|
@@ -184,4 +183,12 @@ warbler""".split(
|
|
|
184
183
|
"\n"
|
|
185
184
|
)
|
|
186
185
|
|
|
187
|
-
|
|
186
|
+
|
|
187
|
+
def generate_cte_names():
|
|
188
|
+
if CONFIG.randomize_cte_names:
|
|
189
|
+
from random import shuffle
|
|
190
|
+
|
|
191
|
+
new = [*CTE_NAMES]
|
|
192
|
+
shuffle(new)
|
|
193
|
+
return new
|
|
194
|
+
return CTE_NAMES
|
trilogy/core/functions.py
CHANGED
|
@@ -104,6 +104,8 @@ def Unnest(args: list[Concept]) -> Function:
|
|
|
104
104
|
output = arg_to_datatype(args[0])
|
|
105
105
|
if isinstance(output, (ListType)):
|
|
106
106
|
output = output.value_data_type
|
|
107
|
+
else:
|
|
108
|
+
output = DataType.STRING
|
|
107
109
|
return Function(
|
|
108
110
|
operator=FunctionType.UNNEST,
|
|
109
111
|
arguments=args,
|
trilogy/core/models.py
CHANGED
|
@@ -65,6 +65,7 @@ from trilogy.core.enums import (
|
|
|
65
65
|
ShowCategory,
|
|
66
66
|
Granularity,
|
|
67
67
|
SelectFiltering,
|
|
68
|
+
IOType,
|
|
68
69
|
)
|
|
69
70
|
from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
|
|
70
71
|
from trilogy.utility import unique
|
|
@@ -81,6 +82,9 @@ LT = TypeVar("LT")
|
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
def is_compatible_datatype(left, right):
|
|
85
|
+
# for unknown types, we can't make any assumptions
|
|
86
|
+
if right == DataType.UNKNOWN or left == DataType.UNKNOWN:
|
|
87
|
+
return True
|
|
84
88
|
if left == right:
|
|
85
89
|
return True
|
|
86
90
|
if {left, right} == {DataType.NUMERIC, DataType.FLOAT}:
|
|
@@ -1580,6 +1584,13 @@ class RawSQLStatement(BaseModel):
|
|
|
1580
1584
|
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1581
1585
|
|
|
1582
1586
|
|
|
1587
|
+
class CopyStatement(BaseModel):
|
|
1588
|
+
target: str
|
|
1589
|
+
target_type: IOType
|
|
1590
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1591
|
+
select: SelectStatement
|
|
1592
|
+
|
|
1593
|
+
|
|
1583
1594
|
class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
1584
1595
|
selection: List[SelectItem]
|
|
1585
1596
|
order_by: Optional[OrderBy] = None
|
|
@@ -3478,10 +3489,18 @@ class Environment(BaseModel):
|
|
|
3478
3489
|
# to make this a root for discovery purposes
|
|
3479
3490
|
# as it now "exists" in a table
|
|
3480
3491
|
current_concept.lineage = None
|
|
3492
|
+
current_concept = current_concept.with_default_grain()
|
|
3481
3493
|
self.add_concept(
|
|
3482
3494
|
current_concept, meta=meta, force=True, _ignore_cache=True
|
|
3483
3495
|
)
|
|
3484
3496
|
self.merge_concept(new_concept, current_concept, [])
|
|
3497
|
+
else:
|
|
3498
|
+
self.add_concept(current_concept, meta=meta, _ignore_cache=True)
|
|
3499
|
+
|
|
3500
|
+
# else:
|
|
3501
|
+
# self.add_concept(
|
|
3502
|
+
# current_concept, meta=meta, _ignore_cache=True
|
|
3503
|
+
# )
|
|
3485
3504
|
if not _ignore_cache:
|
|
3486
3505
|
self.gen_concept_list_caches()
|
|
3487
3506
|
return datasource
|
|
@@ -3591,6 +3610,7 @@ class Comparison(
|
|
|
3591
3610
|
MagicConstants,
|
|
3592
3611
|
WindowItem,
|
|
3593
3612
|
AggregateWrapper,
|
|
3613
|
+
TupleWrapper,
|
|
3594
3614
|
]
|
|
3595
3615
|
operator: ComparisonOperator
|
|
3596
3616
|
|
|
@@ -4250,13 +4270,23 @@ class ProcessedQuery(BaseModel):
|
|
|
4250
4270
|
order_by: Optional[OrderBy] = None
|
|
4251
4271
|
|
|
4252
4272
|
|
|
4253
|
-
class
|
|
4273
|
+
class PersistQueryMixin(BaseModel):
|
|
4254
4274
|
output_to: MaterializedDataset
|
|
4255
4275
|
datasource: Datasource
|
|
4256
4276
|
# base:Dataset
|
|
4257
4277
|
|
|
4258
4278
|
|
|
4259
|
-
class ProcessedQueryPersist(ProcessedQuery,
|
|
4279
|
+
class ProcessedQueryPersist(ProcessedQuery, PersistQueryMixin):
|
|
4280
|
+
pass
|
|
4281
|
+
|
|
4282
|
+
|
|
4283
|
+
class CopyQueryMixin(BaseModel):
|
|
4284
|
+
target: str
|
|
4285
|
+
target_type: IOType
|
|
4286
|
+
# base:Dataset
|
|
4287
|
+
|
|
4288
|
+
|
|
4289
|
+
class ProcessedCopyStatement(ProcessedQuery, CopyQueryMixin):
|
|
4260
4290
|
pass
|
|
4261
4291
|
|
|
4262
4292
|
|
|
@@ -4289,6 +4319,9 @@ class RowsetDerivationStatement(Namespaced, BaseModel):
|
|
|
4289
4319
|
def __repr__(self):
|
|
4290
4320
|
return f"RowsetDerivation<{str(self.select)}>"
|
|
4291
4321
|
|
|
4322
|
+
def __str__(self):
|
|
4323
|
+
return self.__repr__()
|
|
4324
|
+
|
|
4292
4325
|
@property
|
|
4293
4326
|
def derived_concepts(self) -> List[Concept]:
|
|
4294
4327
|
output: list[Concept] = []
|
|
@@ -4307,7 +4340,8 @@ class RowsetDerivationStatement(Namespaced, BaseModel):
|
|
|
4307
4340
|
content=orig_concept, where=self.select.where_clause, rowset=self
|
|
4308
4341
|
),
|
|
4309
4342
|
grain=orig_concept.grain,
|
|
4310
|
-
metadata
|
|
4343
|
+
# TODO: add proper metadata
|
|
4344
|
+
metadata=Metadata(concept_source=ConceptSource.CTE),
|
|
4311
4345
|
namespace=(
|
|
4312
4346
|
f"{self.name}.{orig_concept.namespace}"
|
|
4313
4347
|
if orig_concept.namespace != self.namespace
|
|
@@ -4334,6 +4368,7 @@ class RowsetDerivationStatement(Namespaced, BaseModel):
|
|
|
4334
4368
|
components=[orig[c.address] for c in x.grain.components_copy]
|
|
4335
4369
|
)
|
|
4336
4370
|
else:
|
|
4371
|
+
|
|
4337
4372
|
x.grain = default_grain
|
|
4338
4373
|
return output
|
|
4339
4374
|
|
|
@@ -4359,6 +4394,9 @@ class RowsetItem(Mergeable, Namespaced, BaseModel):
|
|
|
4359
4394
|
f"<Rowset<{self.rowset.name}>: {str(self.content)} where {str(self.where)}>"
|
|
4360
4395
|
)
|
|
4361
4396
|
|
|
4397
|
+
def __str__(self):
|
|
4398
|
+
return self.__repr__()
|
|
4399
|
+
|
|
4362
4400
|
def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
|
|
4363
4401
|
return RowsetItem(
|
|
4364
4402
|
content=self.content.with_merge(source, target, modifiers),
|
|
@@ -4507,6 +4545,37 @@ class Parenthetical(
|
|
|
4507
4545
|
return base
|
|
4508
4546
|
|
|
4509
4547
|
|
|
4548
|
+
class TupleWrapper(Generic[VT], tuple):
|
|
4549
|
+
"""Used to distinguish parsed tuple objects from other tuples"""
|
|
4550
|
+
|
|
4551
|
+
def __init__(self, val, type: DataType, **kwargs):
|
|
4552
|
+
super().__init__()
|
|
4553
|
+
self.type = type
|
|
4554
|
+
self.val = val
|
|
4555
|
+
|
|
4556
|
+
def __getnewargs__(self):
|
|
4557
|
+
return (self.val, self.type)
|
|
4558
|
+
|
|
4559
|
+
def __new__(cls, val, type: DataType, **kwargs):
|
|
4560
|
+
return super().__new__(cls, tuple(val))
|
|
4561
|
+
# self.type = type
|
|
4562
|
+
|
|
4563
|
+
@classmethod
|
|
4564
|
+
def __get_pydantic_core_schema__(
|
|
4565
|
+
cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
|
|
4566
|
+
) -> core_schema.CoreSchema:
|
|
4567
|
+
args = get_args(source_type)
|
|
4568
|
+
if args:
|
|
4569
|
+
schema = handler(Tuple[args]) # type: ignore
|
|
4570
|
+
else:
|
|
4571
|
+
schema = handler(Tuple)
|
|
4572
|
+
return core_schema.no_info_after_validator_function(cls.validate, schema)
|
|
4573
|
+
|
|
4574
|
+
@classmethod
|
|
4575
|
+
def validate(cls, v):
|
|
4576
|
+
return cls(v, type=arg_to_datatype(v[0]))
|
|
4577
|
+
|
|
4578
|
+
|
|
4510
4579
|
class PersistStatement(BaseModel):
|
|
4511
4580
|
datasource: Datasource
|
|
4512
4581
|
select: SelectStatement
|
|
@@ -4573,6 +4642,12 @@ def list_to_wrapper(args):
|
|
|
4573
4642
|
return ListWrapper(args, type=types[0])
|
|
4574
4643
|
|
|
4575
4644
|
|
|
4645
|
+
def tuple_to_wrapper(args):
|
|
4646
|
+
types = [arg_to_datatype(arg) for arg in args]
|
|
4647
|
+
assert len(set(types)) == 1
|
|
4648
|
+
return TupleWrapper(args, type=types[0])
|
|
4649
|
+
|
|
4650
|
+
|
|
4576
4651
|
def dict_to_map_wrapper(arg):
|
|
4577
4652
|
key_types = [arg_to_datatype(arg) for arg in arg.keys()]
|
|
4578
4653
|
|
|
@@ -4628,6 +4703,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType | Numeric
|
|
|
4628
4703
|
return arg.function.output_datatype
|
|
4629
4704
|
elif isinstance(arg, Parenthetical):
|
|
4630
4705
|
return arg_to_datatype(arg.content)
|
|
4706
|
+
elif isinstance(arg, TupleWrapper):
|
|
4707
|
+
return ListType(type=arg.type)
|
|
4631
4708
|
elif isinstance(arg, WindowItem):
|
|
4632
4709
|
if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
|
|
4633
4710
|
return DataType.INTEGER
|
|
@@ -87,12 +87,10 @@ def determine_induced_minimal_nodes(
|
|
|
87
87
|
for node in G.nodes:
|
|
88
88
|
if concepts.get(node):
|
|
89
89
|
lookup: Concept = concepts[node]
|
|
90
|
-
if lookup.derivation
|
|
91
|
-
nodes_to_remove.append(node)
|
|
92
|
-
elif lookup.derivation == PurposeLineage.BASIC and G.out_degree(node) == 0:
|
|
90
|
+
if lookup.derivation in (PurposeLineage.CONSTANT,):
|
|
93
91
|
nodes_to_remove.append(node)
|
|
94
92
|
# purge a node if we're already looking for all it's parents
|
|
95
|
-
|
|
93
|
+
if filter_downstream and lookup.derivation not in (PurposeLineage.ROOT,):
|
|
96
94
|
nodes_to_remove.append(node)
|
|
97
95
|
|
|
98
96
|
H.remove_nodes_from(nodes_to_remove)
|
|
@@ -105,11 +103,12 @@ def determine_induced_minimal_nodes(
|
|
|
105
103
|
zero_out = list(
|
|
106
104
|
x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist
|
|
107
105
|
)
|
|
106
|
+
|
|
108
107
|
try:
|
|
109
108
|
paths = nx.multi_source_dijkstra_path(H, nodelist)
|
|
110
109
|
except nx.exception.NodeNotFound:
|
|
110
|
+
logger.debug(f"Unable to find paths for {nodelist}")
|
|
111
111
|
return None
|
|
112
|
-
|
|
113
112
|
H.remove_nodes_from(list(x for x in H.nodes if x not in paths))
|
|
114
113
|
sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist).copy()
|
|
115
114
|
final: nx.DiGraph = nx.subgraph(G, sG.nodes).copy()
|
|
@@ -126,12 +125,24 @@ def determine_induced_minimal_nodes(
|
|
|
126
125
|
# all concept nodes must have a parent
|
|
127
126
|
|
|
128
127
|
if not all(
|
|
129
|
-
[
|
|
128
|
+
[
|
|
129
|
+
final.in_degree(node) > 0
|
|
130
|
+
for node in final.nodes
|
|
131
|
+
if node.startswith("c~") and node in nodelist
|
|
132
|
+
]
|
|
130
133
|
):
|
|
134
|
+
missing = [
|
|
135
|
+
node
|
|
136
|
+
for node in final.nodes
|
|
137
|
+
if node.startswith("c~") and final.in_degree(node) == 0
|
|
138
|
+
]
|
|
139
|
+
logger.debug(f"Skipping graph for {nodelist} as no in_degree {missing}")
|
|
131
140
|
return None
|
|
132
141
|
|
|
133
142
|
if not all([node in final.nodes for node in nodelist]):
|
|
143
|
+
logger.debug(f"Skipping graph for {nodelist} as missing nodes")
|
|
134
144
|
return None
|
|
145
|
+
logger.debug(f"Found final graph {final.nodes}")
|
|
135
146
|
return final
|
|
136
147
|
|
|
137
148
|
|
|
@@ -256,7 +267,9 @@ def resolve_weak_components(
|
|
|
256
267
|
|
|
257
268
|
subgraphs: list[list[Concept]] = []
|
|
258
269
|
# components = nx.strongly_connected_components(g)
|
|
270
|
+
node_list = [x for x in g.nodes if x.startswith("c~")]
|
|
259
271
|
components = extract_ds_components(g, node_list)
|
|
272
|
+
logger.debug(f"Extracted components {components} from {node_list}")
|
|
260
273
|
for component in components:
|
|
261
274
|
# we need to take unique again as different addresses may map to the same concept
|
|
262
275
|
sub_component = unique(
|
|
@@ -96,7 +96,7 @@ def gen_rowset_node(
|
|
|
96
96
|
# node.set_preexisting_conditions(conditions.conditional if conditions else None)
|
|
97
97
|
return node
|
|
98
98
|
|
|
99
|
-
possible_joins = concept_to_relevant_joins(
|
|
99
|
+
possible_joins = concept_to_relevant_joins(node.output_concepts)
|
|
100
100
|
if not possible_joins:
|
|
101
101
|
logger.info(
|
|
102
102
|
f"{padding(depth)}{LOGGER_PREFIX} no possible joins for rowset node to get {[x.address for x in local_optional]}; have {[x.address for x in node.output_concepts]}"
|
|
@@ -104,7 +104,7 @@ def gen_rowset_node(
|
|
|
104
104
|
return node
|
|
105
105
|
enrich_node: MergeNode = source_concepts( # this fetches the parent + join keys
|
|
106
106
|
# to then connect to the rest of the query
|
|
107
|
-
mandatory_list=
|
|
107
|
+
mandatory_list=possible_joins + local_optional,
|
|
108
108
|
environment=environment,
|
|
109
109
|
g=g,
|
|
110
110
|
depth=depth + 1,
|
|
@@ -28,6 +28,7 @@ from trilogy.core.models import (
|
|
|
28
28
|
DatePart,
|
|
29
29
|
NumericType,
|
|
30
30
|
ListType,
|
|
31
|
+
TupleWrapper,
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
from trilogy.core.enums import Purpose, Granularity, BooleanOperator, Modifier
|
|
@@ -422,6 +423,7 @@ def is_scalar_condition(
|
|
|
422
423
|
| NumericType
|
|
423
424
|
| DatePart
|
|
424
425
|
| ListWrapper[Any]
|
|
426
|
+
| TupleWrapper[Any]
|
|
425
427
|
),
|
|
426
428
|
materialized: set[str] | None = None,
|
|
427
429
|
) -> bool:
|
trilogy/core/query_processor.py
CHANGED
|
@@ -26,13 +26,15 @@ from trilogy.core.models import (
|
|
|
26
26
|
BaseJoin,
|
|
27
27
|
InstantiatedUnnestJoin,
|
|
28
28
|
Conditional,
|
|
29
|
+
ProcessedCopyStatement,
|
|
30
|
+
CopyStatement,
|
|
29
31
|
)
|
|
30
32
|
|
|
31
33
|
from trilogy.utility import unique
|
|
32
34
|
|
|
33
35
|
from trilogy.hooks.base_hook import BaseHook
|
|
34
36
|
from trilogy.constants import logger
|
|
35
|
-
from trilogy.core.ergonomics import
|
|
37
|
+
from trilogy.core.ergonomics import generate_cte_names
|
|
36
38
|
from trilogy.core.optimization import optimize_ctes
|
|
37
39
|
from math import ceil
|
|
38
40
|
from collections import defaultdict
|
|
@@ -169,15 +171,16 @@ def datasource_to_query_datasource(datasource: Datasource) -> QueryDatasource:
|
|
|
169
171
|
|
|
170
172
|
|
|
171
173
|
def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
|
|
174
|
+
cte_names = generate_cte_names()
|
|
172
175
|
if CONFIG.human_identifiers:
|
|
173
176
|
if full_name in name_map:
|
|
174
177
|
return name_map[full_name]
|
|
175
178
|
suffix = ""
|
|
176
179
|
idx = len(name_map)
|
|
177
|
-
if idx >= len(
|
|
178
|
-
int = ceil(idx / len(
|
|
180
|
+
if idx >= len(cte_names):
|
|
181
|
+
int = ceil(idx / len(cte_names))
|
|
179
182
|
suffix = f"_{int}"
|
|
180
|
-
valid = [x for x in
|
|
183
|
+
valid = [x for x in cte_names if x + suffix not in name_map.values()]
|
|
181
184
|
lookup = valid[0]
|
|
182
185
|
new_name = f"{lookup}{suffix}"
|
|
183
186
|
name_map[full_name] = new_name
|
|
@@ -417,6 +420,24 @@ def process_persist(
|
|
|
417
420
|
)
|
|
418
421
|
|
|
419
422
|
|
|
423
|
+
def process_copy(
|
|
424
|
+
environment: Environment,
|
|
425
|
+
statement: CopyStatement,
|
|
426
|
+
hooks: List[BaseHook] | None = None,
|
|
427
|
+
) -> ProcessedCopyStatement:
|
|
428
|
+
select = process_query(
|
|
429
|
+
environment=environment, statement=statement.select, hooks=hooks
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# build our object to return
|
|
433
|
+
arg_dict = {k: v for k, v in select.__dict__.items()}
|
|
434
|
+
return ProcessedCopyStatement(
|
|
435
|
+
**arg_dict,
|
|
436
|
+
target=statement.target,
|
|
437
|
+
target_type=statement.target_type,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
|
|
420
441
|
def process_query(
|
|
421
442
|
environment: Environment,
|
|
422
443
|
statement: SelectStatement | MultiSelectStatement,
|
trilogy/dialect/base.py
CHANGED
|
@@ -35,6 +35,7 @@ from trilogy.core.models import (
|
|
|
35
35
|
Environment,
|
|
36
36
|
RawColumnExpr,
|
|
37
37
|
ListWrapper,
|
|
38
|
+
TupleWrapper,
|
|
38
39
|
MapWrapper,
|
|
39
40
|
ShowStatement,
|
|
40
41
|
RowsetItem,
|
|
@@ -48,8 +49,11 @@ from trilogy.core.models import (
|
|
|
48
49
|
MapType,
|
|
49
50
|
StructType,
|
|
50
51
|
MergeStatementV2,
|
|
52
|
+
Datasource,
|
|
53
|
+
CopyStatement,
|
|
54
|
+
ProcessedCopyStatement,
|
|
51
55
|
)
|
|
52
|
-
from trilogy.core.query_processor import process_query, process_persist
|
|
56
|
+
from trilogy.core.query_processor import process_query, process_persist, process_copy
|
|
53
57
|
from trilogy.dialect.common import render_join, render_unnest
|
|
54
58
|
from trilogy.hooks.base_hook import BaseHook
|
|
55
59
|
from trilogy.core.enums import UnnestMode
|
|
@@ -390,6 +394,7 @@ class BaseDialect:
|
|
|
390
394
|
StructType,
|
|
391
395
|
ListType,
|
|
392
396
|
ListWrapper[Any],
|
|
397
|
+
TupleWrapper[Any],
|
|
393
398
|
DatePart,
|
|
394
399
|
CaseWhen,
|
|
395
400
|
CaseElse,
|
|
@@ -429,7 +434,7 @@ class BaseDialect:
|
|
|
429
434
|
f"Missing source CTE for {e.right.address}"
|
|
430
435
|
)
|
|
431
436
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
432
|
-
elif isinstance(e.right, (ListWrapper, Parenthetical, list)):
|
|
437
|
+
elif isinstance(e.right, (ListWrapper, TupleWrapper, Parenthetical, list)):
|
|
433
438
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
434
439
|
|
|
435
440
|
elif isinstance(
|
|
@@ -510,6 +515,8 @@ class BaseDialect:
|
|
|
510
515
|
return str(e)
|
|
511
516
|
elif isinstance(e, ListWrapper):
|
|
512
517
|
return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])}]"
|
|
518
|
+
elif isinstance(e, TupleWrapper):
|
|
519
|
+
return f"({','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])})"
|
|
513
520
|
elif isinstance(e, MapWrapper):
|
|
514
521
|
return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}:{self.render_expr(v, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}' for k, v in e.items()])}}}"
|
|
515
522
|
elif isinstance(e, list):
|
|
@@ -661,6 +668,7 @@ class BaseDialect:
|
|
|
661
668
|
| ImportStatement
|
|
662
669
|
| RawSQLStatement
|
|
663
670
|
| MergeStatementV2
|
|
671
|
+
| CopyStatement
|
|
664
672
|
],
|
|
665
673
|
hooks: Optional[List[BaseHook]] = None,
|
|
666
674
|
) -> List[
|
|
@@ -674,6 +682,7 @@ class BaseDialect:
|
|
|
674
682
|
| ProcessedQueryPersist
|
|
675
683
|
| ProcessedShowStatement
|
|
676
684
|
| ProcessedRawSQLStatement
|
|
685
|
+
| ProcessedCopyStatement
|
|
677
686
|
] = []
|
|
678
687
|
for statement in statements:
|
|
679
688
|
if isinstance(statement, PersistStatement):
|
|
@@ -682,6 +691,12 @@ class BaseDialect:
|
|
|
682
691
|
hook.process_persist_info(statement)
|
|
683
692
|
persist = process_persist(environment, statement, hooks=hooks)
|
|
684
693
|
output.append(persist)
|
|
694
|
+
elif isinstance(statement, CopyStatement):
|
|
695
|
+
if hooks:
|
|
696
|
+
for hook in hooks:
|
|
697
|
+
hook.process_select_info(statement.select)
|
|
698
|
+
copy = process_copy(environment, statement, hooks=hooks)
|
|
699
|
+
output.append(copy)
|
|
685
700
|
elif isinstance(statement, SelectStatement):
|
|
686
701
|
if hooks:
|
|
687
702
|
for hook in hooks:
|
|
@@ -724,6 +739,7 @@ class BaseDialect:
|
|
|
724
739
|
MergeStatementV2,
|
|
725
740
|
ImportStatement,
|
|
726
741
|
RowsetDerivationStatement,
|
|
742
|
+
Datasource,
|
|
727
743
|
),
|
|
728
744
|
):
|
|
729
745
|
continue
|
trilogy/executor.py
CHANGED
|
@@ -10,6 +10,7 @@ from trilogy.core.models import (
|
|
|
10
10
|
ProcessedShowStatement,
|
|
11
11
|
ProcessedQueryPersist,
|
|
12
12
|
ProcessedRawSQLStatement,
|
|
13
|
+
ProcessedCopyStatement,
|
|
13
14
|
RawSQLStatement,
|
|
14
15
|
MultiSelectStatement,
|
|
15
16
|
SelectStatement,
|
|
@@ -18,12 +19,14 @@ from trilogy.core.models import (
|
|
|
18
19
|
Concept,
|
|
19
20
|
ConceptDeclarationStatement,
|
|
20
21
|
Datasource,
|
|
22
|
+
CopyStatement,
|
|
21
23
|
)
|
|
22
24
|
from trilogy.dialect.base import BaseDialect
|
|
23
25
|
from trilogy.dialect.enums import Dialects
|
|
26
|
+
from trilogy.core.enums import IOType
|
|
24
27
|
from trilogy.parser import parse_text
|
|
25
28
|
from trilogy.hooks.base_hook import BaseHook
|
|
26
|
-
|
|
29
|
+
from pathlib import Path
|
|
27
30
|
from dataclasses import dataclass
|
|
28
31
|
|
|
29
32
|
|
|
@@ -94,7 +97,15 @@ class Executor(object):
|
|
|
94
97
|
self.connection = self.engine.connect()
|
|
95
98
|
|
|
96
99
|
def execute_statement(self, statement) -> Optional[CursorResult]:
|
|
97
|
-
if not isinstance(
|
|
100
|
+
if not isinstance(
|
|
101
|
+
statement,
|
|
102
|
+
(
|
|
103
|
+
ProcessedQuery,
|
|
104
|
+
ProcessedShowStatement,
|
|
105
|
+
ProcessedQueryPersist,
|
|
106
|
+
ProcessedCopyStatement,
|
|
107
|
+
),
|
|
108
|
+
):
|
|
98
109
|
return None
|
|
99
110
|
return self.execute_query(statement)
|
|
100
111
|
|
|
@@ -152,6 +163,13 @@ class Executor(object):
|
|
|
152
163
|
def _(self, query: RawSQLStatement) -> CursorResult:
|
|
153
164
|
return self.execute_raw_sql(query.text)
|
|
154
165
|
|
|
166
|
+
@execute_query.register
|
|
167
|
+
def _(self, query: ShowStatement) -> CursorResult:
|
|
168
|
+
sql = self.generator.generate_queries(
|
|
169
|
+
self.environment, [query], hooks=self.hooks
|
|
170
|
+
)
|
|
171
|
+
return self.execute_query(sql[0])
|
|
172
|
+
|
|
155
173
|
@execute_query.register
|
|
156
174
|
def _(self, query: ProcessedShowStatement) -> CursorResult:
|
|
157
175
|
return generate_result_set(
|
|
@@ -176,12 +194,33 @@ class Executor(object):
|
|
|
176
194
|
|
|
177
195
|
@execute_query.register
|
|
178
196
|
def _(self, query: ProcessedQueryPersist) -> CursorResult:
|
|
197
|
+
|
|
179
198
|
sql = self.generator.compile_statement(query)
|
|
180
|
-
|
|
199
|
+
|
|
181
200
|
output = self.connection.execute(text(sql))
|
|
182
201
|
self.environment.add_datasource(query.datasource)
|
|
183
202
|
return output
|
|
184
203
|
|
|
204
|
+
@execute_query.register
|
|
205
|
+
def _(self, query: ProcessedCopyStatement) -> CursorResult:
|
|
206
|
+
sql = self.generator.compile_statement(query)
|
|
207
|
+
output: CursorResult = self.connection.execute(text(sql))
|
|
208
|
+
if query.target_type == IOType.CSV:
|
|
209
|
+
import csv
|
|
210
|
+
|
|
211
|
+
with open(query.target, "w", newline="", encoding="utf-8") as f:
|
|
212
|
+
outcsv = csv.writer(f)
|
|
213
|
+
outcsv.writerow(output.keys())
|
|
214
|
+
outcsv.writerows(output)
|
|
215
|
+
else:
|
|
216
|
+
raise NotImplementedError(f"Unsupported IOType {query.target_type}")
|
|
217
|
+
# now return the query we ran through IO
|
|
218
|
+
# TODO: instead return how many rows were written?
|
|
219
|
+
return generate_result_set(
|
|
220
|
+
query.output_columns,
|
|
221
|
+
[self.generator.compile_statement(query)],
|
|
222
|
+
)
|
|
223
|
+
|
|
185
224
|
@singledispatchmethod
|
|
186
225
|
def generate_sql(self, command) -> list[str]:
|
|
187
226
|
raise NotImplementedError(
|
|
@@ -244,39 +283,17 @@ class Executor(object):
|
|
|
244
283
|
| ProcessedQueryPersist
|
|
245
284
|
| ProcessedShowStatement
|
|
246
285
|
| ProcessedRawSQLStatement
|
|
286
|
+
| ProcessedCopyStatement
|
|
247
287
|
]:
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
generatable = [
|
|
251
|
-
x
|
|
252
|
-
for x in parsed
|
|
253
|
-
if isinstance(
|
|
254
|
-
x,
|
|
255
|
-
(
|
|
256
|
-
SelectStatement,
|
|
257
|
-
PersistStatement,
|
|
258
|
-
MultiSelectStatement,
|
|
259
|
-
ShowStatement,
|
|
260
|
-
RawSQLStatement,
|
|
261
|
-
),
|
|
262
|
-
)
|
|
263
|
-
]
|
|
264
|
-
sql = []
|
|
265
|
-
while generatable:
|
|
266
|
-
t = generatable.pop(0)
|
|
267
|
-
x = self.generator.generate_queries(
|
|
268
|
-
self.environment, [t], hooks=self.hooks
|
|
269
|
-
)[0]
|
|
270
|
-
if persist and isinstance(x, ProcessedQueryPersist):
|
|
271
|
-
self.environment.add_datasource(x.datasource)
|
|
272
|
-
sql.append(x)
|
|
273
|
-
return sql
|
|
288
|
+
|
|
289
|
+
return list(self.parse_text_generator(command, persist=persist))
|
|
274
290
|
|
|
275
291
|
def parse_text_generator(self, command: str, persist: bool = False) -> Generator[
|
|
276
292
|
ProcessedQuery
|
|
277
293
|
| ProcessedQueryPersist
|
|
278
294
|
| ProcessedShowStatement
|
|
279
|
-
| ProcessedRawSQLStatement
|
|
295
|
+
| ProcessedRawSQLStatement
|
|
296
|
+
| ProcessedCopyStatement,
|
|
280
297
|
None,
|
|
281
298
|
None,
|
|
282
299
|
]:
|
|
@@ -293,6 +310,7 @@ class Executor(object):
|
|
|
293
310
|
MultiSelectStatement,
|
|
294
311
|
ShowStatement,
|
|
295
312
|
RawSQLStatement,
|
|
313
|
+
CopyStatement,
|
|
296
314
|
),
|
|
297
315
|
)
|
|
298
316
|
]
|
|
@@ -333,11 +351,11 @@ class Executor(object):
|
|
|
333
351
|
)
|
|
334
352
|
)
|
|
335
353
|
continue
|
|
336
|
-
|
|
337
|
-
logger.debug(compiled_sql)
|
|
338
|
-
|
|
339
|
-
output.append(self.connection.execute(text(compiled_sql)))
|
|
340
|
-
# generalize post-run success hooks
|
|
341
|
-
if isinstance(statement, ProcessedQueryPersist):
|
|
342
|
-
self.environment.add_datasource(statement.datasource)
|
|
354
|
+
output.append(self.execute_query(statement))
|
|
343
355
|
return output
|
|
356
|
+
|
|
357
|
+
def execute_file(self, file: str | Path) -> List[CursorResult]:
|
|
358
|
+
file = Path(file)
|
|
359
|
+
with open(file, "r") as f:
|
|
360
|
+
command = f.read()
|
|
361
|
+
return self.execute_text(command)
|
trilogy/parsing/common.py
CHANGED
|
@@ -141,9 +141,12 @@ def function_to_concept(parent: Function, name: str, namespace: str) -> Concept:
|
|
|
141
141
|
for x in parent.concept_arguments
|
|
142
142
|
if not x.derivation == PurposeLineage.CONSTANT
|
|
143
143
|
]
|
|
144
|
-
grain = Grain()
|
|
144
|
+
grain: Grain | None = Grain()
|
|
145
145
|
for x in pkeys:
|
|
146
146
|
grain += x.grain
|
|
147
|
+
if parent.operator in FunctionClass.ONE_TO_MANY.value:
|
|
148
|
+
# if the function will create more rows, we don't know what grain this is at
|
|
149
|
+
grain = None
|
|
147
150
|
modifiers = get_upstream_modifiers(pkeys)
|
|
148
151
|
key_grain = []
|
|
149
152
|
for x in pkeys:
|
|
@@ -156,13 +159,24 @@ def function_to_concept(parent: Function, name: str, namespace: str) -> Concept:
|
|
|
156
159
|
purpose = Purpose.CONSTANT
|
|
157
160
|
else:
|
|
158
161
|
purpose = parent.output_purpose
|
|
162
|
+
if grain is not None:
|
|
163
|
+
return Concept(
|
|
164
|
+
name=name,
|
|
165
|
+
datatype=parent.output_datatype,
|
|
166
|
+
purpose=purpose,
|
|
167
|
+
lineage=parent,
|
|
168
|
+
namespace=namespace,
|
|
169
|
+
keys=keys,
|
|
170
|
+
modifiers=modifiers,
|
|
171
|
+
grain=grain,
|
|
172
|
+
)
|
|
173
|
+
|
|
159
174
|
return Concept(
|
|
160
175
|
name=name,
|
|
161
176
|
datatype=parent.output_datatype,
|
|
162
177
|
purpose=purpose,
|
|
163
178
|
lineage=parent,
|
|
164
179
|
namespace=namespace,
|
|
165
|
-
grain=grain,
|
|
166
180
|
keys=keys,
|
|
167
181
|
modifiers=modifiers,
|
|
168
182
|
)
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from os.path import dirname, join
|
|
2
2
|
from typing import List, Optional, Tuple, Union
|
|
3
3
|
from re import IGNORECASE
|
|
4
|
-
from lark import Lark, Transformer, v_args
|
|
4
|
+
from lark import Lark, Transformer, v_args, Tree
|
|
5
5
|
from lark.exceptions import (
|
|
6
6
|
UnexpectedCharacters,
|
|
7
7
|
UnexpectedEOF,
|
|
@@ -31,6 +31,7 @@ from trilogy.core.enums import (
|
|
|
31
31
|
DatePart,
|
|
32
32
|
ShowCategory,
|
|
33
33
|
FunctionClass,
|
|
34
|
+
IOType,
|
|
34
35
|
)
|
|
35
36
|
from trilogy.core.exceptions import InvalidSyntaxException, UndefinedConceptException
|
|
36
37
|
from trilogy.core.functions import (
|
|
@@ -84,6 +85,7 @@ from trilogy.core.models import (
|
|
|
84
85
|
PersistStatement,
|
|
85
86
|
Query,
|
|
86
87
|
RawSQLStatement,
|
|
88
|
+
CopyStatement,
|
|
87
89
|
SelectStatement,
|
|
88
90
|
SelectItem,
|
|
89
91
|
WhereClause,
|
|
@@ -105,9 +107,11 @@ from trilogy.core.models import (
|
|
|
105
107
|
ConceptDerivation,
|
|
106
108
|
RowsetDerivationStatement,
|
|
107
109
|
list_to_wrapper,
|
|
110
|
+
tuple_to_wrapper,
|
|
108
111
|
dict_to_map_wrapper,
|
|
109
112
|
NumericType,
|
|
110
113
|
HavingClause,
|
|
114
|
+
TupleWrapper,
|
|
111
115
|
)
|
|
112
116
|
from trilogy.parsing.exceptions import ParseError
|
|
113
117
|
from trilogy.parsing.common import (
|
|
@@ -461,40 +465,7 @@ class ParseToObjects(Transformer):
|
|
|
461
465
|
while isinstance(source_value, Parenthetical):
|
|
462
466
|
source_value = source_value.content
|
|
463
467
|
|
|
464
|
-
if (
|
|
465
|
-
isinstance(source_value, Function)
|
|
466
|
-
and source_value.operator == FunctionType.STRUCT
|
|
467
|
-
):
|
|
468
|
-
concept = arbitrary_to_concept(
|
|
469
|
-
source_value,
|
|
470
|
-
name=name,
|
|
471
|
-
namespace=namespace,
|
|
472
|
-
purpose=purpose,
|
|
473
|
-
metadata=metadata,
|
|
474
|
-
)
|
|
475
|
-
|
|
476
|
-
if concept.metadata:
|
|
477
|
-
concept.metadata.line_number = meta.line
|
|
478
|
-
self.environment.add_concept(concept, meta=meta)
|
|
479
|
-
return ConceptDerivation(concept=concept)
|
|
480
|
-
elif (
|
|
481
|
-
isinstance(source_value, Function)
|
|
482
|
-
and source_value.operator == FunctionType.ALIAS
|
|
483
|
-
):
|
|
484
|
-
concept = arbitrary_to_concept(
|
|
485
|
-
source_value,
|
|
486
|
-
name=name,
|
|
487
|
-
namespace=namespace,
|
|
488
|
-
purpose=purpose,
|
|
489
|
-
metadata=metadata,
|
|
490
|
-
)
|
|
491
|
-
|
|
492
|
-
if concept.metadata:
|
|
493
|
-
concept.metadata.line_number = meta.line
|
|
494
|
-
self.environment.add_concept(concept, meta=meta)
|
|
495
|
-
return ConceptDerivation(concept=concept)
|
|
496
|
-
|
|
497
|
-
elif isinstance(
|
|
468
|
+
if isinstance(
|
|
498
469
|
source_value, (FilterItem, WindowItem, AggregateWrapper, Function)
|
|
499
470
|
):
|
|
500
471
|
concept = arbitrary_to_concept(
|
|
@@ -781,13 +752,29 @@ class ParseToObjects(Transformer):
|
|
|
781
752
|
def rawsql_statement(self, meta: Meta, args) -> RawSQLStatement:
|
|
782
753
|
return RawSQLStatement(meta=Metadata(line_number=meta.line), text=args[0])
|
|
783
754
|
|
|
755
|
+
def COPY_TYPE(self, args) -> IOType:
|
|
756
|
+
return IOType(args.value)
|
|
757
|
+
|
|
758
|
+
@v_args(meta=True)
|
|
759
|
+
def copy_statement(self, meta: Meta, args) -> CopyStatement:
|
|
760
|
+
|
|
761
|
+
return CopyStatement(
|
|
762
|
+
target=args[1],
|
|
763
|
+
target_type=args[0],
|
|
764
|
+
meta=Metadata(line_number=meta.line),
|
|
765
|
+
select=args[-1],
|
|
766
|
+
)
|
|
767
|
+
|
|
784
768
|
def resolve_import_address(self, address) -> str:
|
|
785
769
|
with open(address, "r", encoding="utf-8") as f:
|
|
786
770
|
text = f.read()
|
|
787
771
|
return text
|
|
788
772
|
|
|
789
773
|
def import_statement(self, args: list[str]) -> ImportStatement:
|
|
790
|
-
|
|
774
|
+
if len(args) == 2:
|
|
775
|
+
alias = args[-1]
|
|
776
|
+
else:
|
|
777
|
+
alias = self.environment.namespace
|
|
791
778
|
path = args[0].split(".")
|
|
792
779
|
|
|
793
780
|
target = join(self.environment.working_path, *path) + ".preql"
|
|
@@ -1097,6 +1084,9 @@ class ParseToObjects(Transformer):
|
|
|
1097
1084
|
def array_lit(self, args):
|
|
1098
1085
|
return list_to_wrapper(args)
|
|
1099
1086
|
|
|
1087
|
+
def tuple_lit(self, args):
|
|
1088
|
+
return tuple_to_wrapper(args)
|
|
1089
|
+
|
|
1100
1090
|
def struct_lit(self, args):
|
|
1101
1091
|
|
|
1102
1092
|
zipped = dict(zip(args[::2], args[1::2]))
|
|
@@ -1157,12 +1147,18 @@ class ParseToObjects(Transformer):
|
|
|
1157
1147
|
|
|
1158
1148
|
while isinstance(right, Parenthetical) and isinstance(
|
|
1159
1149
|
right.content,
|
|
1160
|
-
(
|
|
1150
|
+
(
|
|
1151
|
+
Concept,
|
|
1152
|
+
Function,
|
|
1153
|
+
FilterItem,
|
|
1154
|
+
WindowItem,
|
|
1155
|
+
AggregateWrapper,
|
|
1156
|
+
ListWrapper,
|
|
1157
|
+
TupleWrapper,
|
|
1158
|
+
),
|
|
1161
1159
|
):
|
|
1162
1160
|
right = right.content
|
|
1163
|
-
if isinstance(
|
|
1164
|
-
right, (Function, FilterItem, WindowItem, AggregateWrapper, ListWrapper)
|
|
1165
|
-
):
|
|
1161
|
+
if isinstance(right, (Function, FilterItem, WindowItem, AggregateWrapper)):
|
|
1166
1162
|
right = arbitrary_to_concept(
|
|
1167
1163
|
right,
|
|
1168
1164
|
namespace=self.environment.namespace,
|
|
@@ -1175,7 +1171,7 @@ class ParseToObjects(Transformer):
|
|
|
1175
1171
|
)
|
|
1176
1172
|
|
|
1177
1173
|
def expr_tuple(self, args):
|
|
1178
|
-
return
|
|
1174
|
+
return TupleWrapper(content=tuple(args))
|
|
1179
1175
|
|
|
1180
1176
|
def parenthetical(self, args):
|
|
1181
1177
|
return Parenthetical(content=args[0])
|
|
@@ -1873,10 +1869,12 @@ def unpack_visit_error(e: VisitError):
|
|
|
1873
1869
|
unpack_visit_error(e.orig_exc)
|
|
1874
1870
|
elif isinstance(e.orig_exc, (UndefinedConceptException, ImportError)):
|
|
1875
1871
|
raise e.orig_exc
|
|
1876
|
-
elif isinstance(e.orig_exc, SyntaxError):
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1872
|
+
elif isinstance(e.orig_exc, (SyntaxError, TypeError)):
|
|
1873
|
+
if isinstance(e.obj, Tree):
|
|
1874
|
+
raise InvalidSyntaxException(
|
|
1875
|
+
str(e.orig_exc) + " in " + str(e.rule) + f" Line: {e.obj.meta.line}"
|
|
1876
|
+
)
|
|
1877
|
+
raise InvalidSyntaxException(str(e.orig_exc))
|
|
1880
1878
|
raise e
|
|
1881
1879
|
|
|
1882
1880
|
|
trilogy/parsing/render.py
CHANGED
|
@@ -32,6 +32,8 @@ from trilogy.core.models import (
|
|
|
32
32
|
AggregateWrapper,
|
|
33
33
|
PersistStatement,
|
|
34
34
|
ListWrapper,
|
|
35
|
+
ListType,
|
|
36
|
+
TupleWrapper,
|
|
35
37
|
RowsetDerivationStatement,
|
|
36
38
|
MultiSelectStatement,
|
|
37
39
|
OrderBy,
|
|
@@ -40,6 +42,7 @@ from trilogy.core.models import (
|
|
|
40
42
|
RawSQLStatement,
|
|
41
43
|
NumericType,
|
|
42
44
|
MergeStatementV2,
|
|
45
|
+
CopyStatement,
|
|
43
46
|
)
|
|
44
47
|
from trilogy.core.enums import Modifier
|
|
45
48
|
|
|
@@ -180,12 +183,18 @@ class Renderer:
|
|
|
180
183
|
def _(self, arg: ListWrapper):
|
|
181
184
|
return "[" + ", ".join([self.to_string(x) for x in arg]) + "]"
|
|
182
185
|
|
|
186
|
+
@to_string.register
|
|
187
|
+
def _(self, arg: TupleWrapper):
|
|
188
|
+
return "(" + ", ".join([self.to_string(x) for x in arg]) + ")"
|
|
189
|
+
|
|
183
190
|
@to_string.register
|
|
184
191
|
def _(self, arg: DatePart):
|
|
185
192
|
return arg.value
|
|
186
193
|
|
|
187
194
|
@to_string.register
|
|
188
195
|
def _(self, arg: "Address"):
|
|
196
|
+
if arg.is_query:
|
|
197
|
+
return f"query '''{arg.location}'''"
|
|
189
198
|
return f"address {arg.location}"
|
|
190
199
|
|
|
191
200
|
@to_string.register
|
|
@@ -209,21 +218,30 @@ class Renderer:
|
|
|
209
218
|
base_description = concept.metadata.description
|
|
210
219
|
else:
|
|
211
220
|
base_description = None
|
|
212
|
-
if concept.namespace:
|
|
221
|
+
if concept.namespace and concept.namespace != DEFAULT_NAMESPACE:
|
|
213
222
|
namespace = f"{concept.namespace}."
|
|
214
223
|
else:
|
|
215
224
|
namespace = ""
|
|
216
225
|
if not concept.lineage:
|
|
217
226
|
if concept.purpose == Purpose.PROPERTY and concept.keys:
|
|
218
|
-
|
|
227
|
+
keys = ",".join([self.to_string(key) for key in concept.keys])
|
|
228
|
+
output = f"{concept.purpose.value} <{keys}>.{namespace}{concept.name} {self.to_string(concept.datatype)};"
|
|
219
229
|
else:
|
|
220
|
-
output = f"{concept.purpose.value} {namespace}{concept.name} {concept.datatype
|
|
230
|
+
output = f"{concept.purpose.value} {namespace}{concept.name} {self.to_string(concept.datatype)};"
|
|
221
231
|
else:
|
|
222
232
|
output = f"{concept.purpose.value} {namespace}{concept.name} <- {self.to_string(concept.lineage)};"
|
|
223
233
|
if base_description:
|
|
224
234
|
output += f" # {base_description}"
|
|
225
235
|
return output
|
|
226
236
|
|
|
237
|
+
@to_string.register
|
|
238
|
+
def _(self, arg: ListType):
|
|
239
|
+
return f"list<{self.to_string(arg.value_data_type)}>"
|
|
240
|
+
|
|
241
|
+
@to_string.register
|
|
242
|
+
def _(self, arg: DataType):
|
|
243
|
+
return arg.value
|
|
244
|
+
|
|
227
245
|
@to_string.register
|
|
228
246
|
def _(self, arg: ConceptDerivation):
|
|
229
247
|
# this is identical rendering;
|
|
@@ -269,6 +287,10 @@ class Renderer:
|
|
|
269
287
|
base += "\n;"
|
|
270
288
|
return base
|
|
271
289
|
|
|
290
|
+
@to_string.register
|
|
291
|
+
def _(self, arg: CopyStatement):
|
|
292
|
+
return f"COPY INTO {arg.target_type.value.upper()} '{arg.target}' FROM {self.to_string(arg.select)}"
|
|
293
|
+
|
|
272
294
|
@to_string.register
|
|
273
295
|
def _(self, arg: AlignClause):
|
|
274
296
|
return "\nALIGN\n\t" + ",\n\t".join([self.to_string(c) for c in arg.items])
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
| persist_statement
|
|
9
9
|
| rowset_derivation_statement
|
|
10
10
|
| import_statement
|
|
11
|
-
|
|
11
|
+
| copy_statement
|
|
12
12
|
| merge_statement_v2
|
|
13
13
|
| rawsql_statement
|
|
14
14
|
|
|
@@ -57,7 +57,7 @@
|
|
|
57
57
|
|
|
58
58
|
column_list : (IDENTIFIER "," )* IDENTIFIER ","?
|
|
59
59
|
|
|
60
|
-
import_statement: "import"
|
|
60
|
+
import_statement: "import" IDENTIFIER ("." IDENTIFIER)* ("as" IDENTIFIER)?
|
|
61
61
|
|
|
62
62
|
// persist_statement
|
|
63
63
|
persist_statement: "persist"i IDENTIFIER "into"i IDENTIFIER "from"i select_statement grain_clause?
|
|
@@ -78,6 +78,12 @@
|
|
|
78
78
|
// raw sql statement
|
|
79
79
|
rawsql_statement: "raw_sql"i "(" MULTILINE_STRING ")"
|
|
80
80
|
|
|
81
|
+
// copy statement
|
|
82
|
+
|
|
83
|
+
COPY_TYPE: "csv"i
|
|
84
|
+
|
|
85
|
+
copy_statement: "copy"i "into"i COPY_TYPE _string_lit "from"i select_statement
|
|
86
|
+
|
|
81
87
|
// FUNCTION blocks
|
|
82
88
|
function: raw_function
|
|
83
89
|
function_binding_item: IDENTIFIER ":" data_type
|
|
@@ -200,29 +206,42 @@
|
|
|
200
206
|
_constant_functions: fcurrent_date | fcurrent_datetime
|
|
201
207
|
|
|
202
208
|
//string
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
209
|
+
_LIKE.1: "like("i
|
|
210
|
+
like: _LIKE expr "," _string_lit ")"
|
|
211
|
+
_ILIKE.1: "ilike("i
|
|
212
|
+
ilike: _ILIKE expr "," _string_lit ")"
|
|
213
|
+
alt_like: expr "like"i expr
|
|
214
|
+
_UPPER.1: "upper("i
|
|
215
|
+
upper: _UPPER expr ")"
|
|
216
|
+
_LOWER.1: "lower("i
|
|
217
|
+
lower: _LOWER expr ")"
|
|
218
|
+
_SPLIT.1: "split("i
|
|
219
|
+
fsplit: _SPLIT expr "," _string_lit ")"
|
|
220
|
+
_STRPOS.1: "strpos("i
|
|
221
|
+
fstrpos: _STRPOS expr "," expr ")"
|
|
210
222
|
_SUBSTRING.1: "substring("i
|
|
211
223
|
fsubstring: _SUBSTRING expr "," expr "," expr ")"
|
|
212
224
|
|
|
213
225
|
_string_functions: like | ilike | upper | lower | fsplit | fstrpos | fsubstring
|
|
214
226
|
|
|
215
227
|
// special aggregate
|
|
216
|
-
|
|
228
|
+
_GROUP.1: "group("i
|
|
229
|
+
fgroup: _GROUP expr ")" aggregate_over?
|
|
217
230
|
//aggregates
|
|
218
231
|
_COUNT.1: "count("i
|
|
219
232
|
count: _COUNT expr ")"
|
|
233
|
+
|
|
220
234
|
count_distinct: "count_distinct"i "(" expr ")"
|
|
221
235
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
236
|
+
// avoid conflicts with the window
|
|
237
|
+
_SUM.1: "sum("i
|
|
238
|
+
sum: _SUM expr ")"
|
|
239
|
+
_AVG.1: "avg("i
|
|
240
|
+
avg: _AVG expr ")"
|
|
241
|
+
_MAX.1: "max("i
|
|
242
|
+
max: _MAX expr ")"
|
|
243
|
+
_MIN.1: "min("i
|
|
244
|
+
min: _MIN expr ")"
|
|
226
245
|
|
|
227
246
|
//aggregates can force a grain
|
|
228
247
|
aggregate_all: "*"
|
|
@@ -255,10 +274,14 @@
|
|
|
255
274
|
fyear: _YEAR expr ")"
|
|
256
275
|
|
|
257
276
|
DATE_PART: "DAY"i | "WEEK"i | "MONTH"i | "QUARTER"i | "YEAR"i | "MINUTE"i | "HOUR"i | "SECOND"i
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
277
|
+
_DATE_TRUNC.1: "date_trunc("i
|
|
278
|
+
fdate_trunc: _DATE_TRUNC expr "," DATE_PART ")"
|
|
279
|
+
_DATE_PART.1: "date_part("i
|
|
280
|
+
fdate_part: _DATE_PART expr "," DATE_PART ")"
|
|
281
|
+
_DATE_ADD.1: "date_add("i
|
|
282
|
+
fdate_add: _DATE_ADD expr "," DATE_PART "," int_lit ")"
|
|
283
|
+
_DATE_DIFF.1: "date_diff("i
|
|
284
|
+
fdate_diff: _DATE_DIFF expr "," expr "," DATE_PART ")"
|
|
262
285
|
|
|
263
286
|
_date_functions: fdate | fdate_add | fdate_diff | fdatetime | ftimestamp | fsecond | fminute | fhour | fday | fday_of_week | fweek | fmonth | fquarter | fyear | fdate_part | fdate_trunc
|
|
264
287
|
|
|
@@ -286,16 +309,18 @@
|
|
|
286
309
|
|
|
287
310
|
array_lit: "[" (literal ",")* literal ","? "]"()
|
|
288
311
|
|
|
312
|
+
tuple_lit: "(" (literal ",")* literal ","? ")"
|
|
313
|
+
|
|
289
314
|
map_lit: "{" (literal ":" literal ",")* literal ":" literal ","? "}"
|
|
290
315
|
|
|
291
|
-
|
|
292
|
-
struct_lit:
|
|
316
|
+
_STRUCT.1: "struct("i
|
|
317
|
+
struct_lit: _STRUCT (IDENTIFIER "=" literal ",")* IDENTIFIER "=" literal ","? ")"
|
|
293
318
|
|
|
294
319
|
!bool_lit: "True"i | "False"i
|
|
295
320
|
|
|
296
321
|
!null_lit.1: "null"i
|
|
297
322
|
|
|
298
|
-
literal: null_lit | _string_lit | int_lit | float_lit | bool_lit | array_lit | map_lit | struct_lit
|
|
323
|
+
literal: null_lit | _string_lit | int_lit | float_lit | bool_lit | array_lit | map_lit | struct_lit | tuple_lit
|
|
299
324
|
|
|
300
325
|
MODIFIER: "Optional"i | "Partial"i | "Nullable"i
|
|
301
326
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|