pytrilogy 0.0.3.68__py3-none-any.whl → 0.0.3.70__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.68.dist-info → pytrilogy-0.0.3.70.dist-info}/METADATA +34 -19
- {pytrilogy-0.0.3.68.dist-info → pytrilogy-0.0.3.70.dist-info}/RECORD +20 -20
- trilogy/__init__.py +1 -1
- trilogy/core/functions.py +2 -2
- trilogy/core/models/author.py +1 -3
- trilogy/core/models/build.py +20 -7
- trilogy/core/processing/discovery_node_factory.py +1 -0
- trilogy/core/processing/node_generators/group_node.py +22 -3
- trilogy/core/processing/node_generators/select_merge_node.py +15 -8
- trilogy/core/query_processor.py +1 -2
- trilogy/dialect/base.py +39 -6
- trilogy/dialect/bigquery.py +19 -2
- trilogy/dialect/duckdb.py +17 -0
- trilogy/parsing/common.py +3 -2
- trilogy/parsing/parse_engine.py +5 -1
- trilogy/parsing/trilogy.lark +1 -1
- {pytrilogy-0.0.3.68.dist-info → pytrilogy-0.0.3.70.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.68.dist-info → pytrilogy-0.0.3.70.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.68.dist-info → pytrilogy-0.0.3.70.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.68.dist-info → pytrilogy-0.0.3.70.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pytrilogy
|
|
3
|
-
Version: 0.0.3.
|
|
3
|
+
Version: 0.0.3.70
|
|
4
4
|
Summary: Declarative, typed query language that compiles to SQL.
|
|
5
5
|
Home-page:
|
|
6
6
|
Author:
|
|
@@ -40,46 +40,61 @@ Dynamic: summary
|
|
|
40
40
|
[](https://trilogydata.dev/)
|
|
41
41
|
[](https://discord.gg/Z4QSSuqGEd)
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
The Trilogy language is an experiment in better SQL for analytics - a streamlined SQL that replaces tables/joins with a lightweight semantic binding layer and provides easy reuse and composability. It compiles to SQL - making it easy to debug or integrate into existing workflows - and can be run against any supported SQL backend.
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
[pytrilogy](https://github.com/trilogy-data/pytrilogy) is the reference implementation, written in Python.
|
|
46
|
+
|
|
47
|
+
Trilogy concretely solves these common problems in karge, SQL based analytics teams:
|
|
48
|
+
- decoupling consumption code from specific physical assets
|
|
49
|
+
- better testability and change management
|
|
50
|
+
- reduced boilerplate and opportunity for OLAP style optimization at scale
|
|
51
|
+
|
|
52
|
+
Trilogy can be especially powerful as a frontend consumption language, since the decoupling from the physical layout makes dynamic and interactive dashboards backed by SQL tables much easier to create.
|
|
46
53
|
|
|
47
54
|
> [!TIP]
|
|
48
|
-
>
|
|
55
|
+
> You can try Trilogy in a [open-source studio](https://trilogydata.dev/trilogy-studio-core/). More details on the language can be found on the [documentation](https://trilogydata.dev/).
|
|
49
56
|
|
|
50
|
-
|
|
57
|
+
We recommend the studio as the fastest way to explore Trilogy. For deeper work and integration, `pytrilogy` can be run locally to parse and execute trilogy model [.preql] files using the `trilogy` CLI tool, or can be run in python by importing the `trilogy` package.
|
|
51
58
|
|
|
52
|
-
|
|
59
|
+
Installation: `pip install pytrilogy`
|
|
53
60
|
|
|
54
|
-
|
|
61
|
+
### Trilogy Looks Like SQL
|
|
55
62
|
|
|
56
|
-
Trilogy looks like SQL:
|
|
57
63
|
```sql
|
|
64
|
+
import names;
|
|
65
|
+
|
|
66
|
+
const top_names <- ['Elvis', 'Elvira', 'Elrond', 'Sam'];
|
|
67
|
+
|
|
68
|
+
def initcap(word) -> upper(substring(word, 1, 1)) || substring(word, 2, len(word));
|
|
69
|
+
|
|
58
70
|
WHERE
|
|
59
|
-
name
|
|
71
|
+
@initcap(name) in top_names
|
|
60
72
|
SELECT
|
|
61
73
|
name,
|
|
62
|
-
|
|
74
|
+
sum(births) as name_count
|
|
63
75
|
ORDER BY
|
|
64
76
|
name_count desc
|
|
65
77
|
LIMIT 10;
|
|
66
78
|
```
|
|
67
79
|
## Goals
|
|
68
|
-
|
|
80
|
+
Versus SQL, Trilogy aims to:
|
|
69
81
|
|
|
70
|
-
|
|
82
|
+
Keep:
|
|
71
83
|
- Correctness
|
|
72
84
|
- Accessibility
|
|
73
85
|
|
|
74
|
-
|
|
86
|
+
Improve:
|
|
75
87
|
- Simplicity
|
|
76
|
-
- Understandability
|
|
77
88
|
- Refactoring/mantainability
|
|
78
89
|
- Reusability
|
|
79
90
|
|
|
80
91
|
Maintain:
|
|
81
92
|
- Acceptable performance
|
|
82
93
|
|
|
94
|
+
Remove:
|
|
95
|
+
- Lower-level procedural features
|
|
96
|
+
- Transactional optimizations/non-analytics features
|
|
97
|
+
|
|
83
98
|
## Hello World
|
|
84
99
|
|
|
85
100
|
Save the following code in a file named `hello.preql`
|
|
@@ -141,10 +156,7 @@ SELECT
|
|
|
141
156
|
sentences.text
|
|
142
157
|
;
|
|
143
158
|
|
|
144
|
-
|
|
145
|
-
|
|
146
159
|
```
|
|
147
|
-
|
|
148
160
|
Run the following from the directory the file is in.
|
|
149
161
|
|
|
150
162
|
```bash
|
|
@@ -157,11 +169,15 @@ trilogy run hello.trilogy duckdb
|
|
|
157
169
|
|
|
158
170
|
The current Trilogy implementation supports these backends:
|
|
159
171
|
|
|
172
|
+
### Core
|
|
160
173
|
- Bigquery
|
|
161
|
-
- SQL Server
|
|
162
174
|
- DuckDB
|
|
163
175
|
- Snowflake
|
|
164
176
|
|
|
177
|
+
### Experimental
|
|
178
|
+
- SQL Server
|
|
179
|
+
- Presto
|
|
180
|
+
|
|
165
181
|
## Basic Example - Python
|
|
166
182
|
|
|
167
183
|
Trilogy can be run directly in python through the core SDK. Trilogy code can be defined and parsed inline or parsed out of files.
|
|
@@ -170,7 +186,6 @@ A bigquery example, similar to bigquery [the quickstart](https://cloud.google.co
|
|
|
170
186
|
|
|
171
187
|
```python
|
|
172
188
|
|
|
173
|
-
|
|
174
189
|
from trilogy import Dialects, Environment
|
|
175
190
|
|
|
176
191
|
environment = Environment()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
pytrilogy-0.0.3.
|
|
2
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
pytrilogy-0.0.3.70.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
2
|
+
trilogy/__init__.py,sha256=jOvyFSWu8pnqklbm3VK_FmD5VSPj1cqSUoyMFyDKVC8,303
|
|
3
3
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
trilogy/constants.py,sha256=lv_aJWP6dn6e2aF4BAE72jbnNtceFddfqtiDSsvzno0,1692
|
|
5
5
|
trilogy/engine.py,sha256=OK2RuqCIUId6yZ5hfF8J1nxGP0AJqHRZiafcowmW0xc,1728
|
|
@@ -16,15 +16,15 @@ trilogy/core/env_processor.py,sha256=pFsxnluKIusGKx1z7tTnfsd_xZcPy9pZDungkjkyvI0
|
|
|
16
16
|
trilogy/core/environment_helpers.py,sha256=VvPIiFemqaLLpIpLIqprfu63K7muZ1YzNg7UZIUph8w,8267
|
|
17
17
|
trilogy/core/ergonomics.py,sha256=e-7gE29vPLFdg0_A1smQ7eOrUwKl5VYdxRSTddHweRA,1631
|
|
18
18
|
trilogy/core/exceptions.py,sha256=jYEduuMehcMkmCpf-OC_taELPZm7qNfeSNzIWkDYScs,707
|
|
19
|
-
trilogy/core/functions.py,sha256=
|
|
19
|
+
trilogy/core/functions.py,sha256=R8_aOe2mNRgOLmsnI9pG_GOU3I7kFPTnXQzplN2d7Dw,29343
|
|
20
20
|
trilogy/core/graph_models.py,sha256=wIT-oBchHWE46GLDkgN5K7EzhOBEo8LfaeWV5G5cYcE,3302
|
|
21
21
|
trilogy/core/internal.py,sha256=iicDBlC6nM8d7e7jqzf_ZOmpUsW8yrr2AA8AqEiLx-s,1577
|
|
22
22
|
trilogy/core/optimization.py,sha256=ojpn-p79lr03SSVQbbw74iPCyoYpDYBmj1dbZ3oXCjI,8860
|
|
23
|
-
trilogy/core/query_processor.py,sha256=
|
|
23
|
+
trilogy/core/query_processor.py,sha256=t91tYQS0o89r-acdxo8uDUpB1elNcqAhLWkbM8BFjho,20283
|
|
24
24
|
trilogy/core/utility.py,sha256=3VC13uSQWcZNghgt7Ot0ZTeEmNqs__cx122abVq9qhM,410
|
|
25
25
|
trilogy/core/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
trilogy/core/models/author.py,sha256=
|
|
27
|
-
trilogy/core/models/build.py,sha256=
|
|
26
|
+
trilogy/core/models/author.py,sha256=Wz_6yEZS5EyXJ5wHHl-C44ikFka5XftBYOjNRK98Hfw,77790
|
|
27
|
+
trilogy/core/models/build.py,sha256=BNHzfqD5NWCKHntvnswvDrtCD1JFgvPedx23cPq942U,65405
|
|
28
28
|
trilogy/core/models/build_environment.py,sha256=s_C9xAHuD3yZ26T15pWVBvoqvlp2LdZ8yjsv2_HdXLk,5363
|
|
29
29
|
trilogy/core/models/core.py,sha256=EMAuWTngoNVGCdfNrAY7_k6g528iodNQLwPRVip-8DA,10980
|
|
30
30
|
trilogy/core/models/datasource.py,sha256=6RjJUd2u4nYmEwFBpJlM9LbHVYDv8iHJxqiBMZqUrwI,9422
|
|
@@ -37,7 +37,7 @@ trilogy/core/optimizations/predicate_pushdown.py,sha256=g4AYE8Aw_iMlAh68TjNXGP75
|
|
|
37
37
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
trilogy/core/processing/concept_strategies_v3.py,sha256=zy5VZa9LITOws6aIILfv_bSR2-jR1Ndldy-nmwMyQ5w,23144
|
|
39
39
|
trilogy/core/processing/discovery_loop.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
-
trilogy/core/processing/discovery_node_factory.py,sha256=
|
|
40
|
+
trilogy/core/processing/discovery_node_factory.py,sha256=I3JJxoF-u8OVvqXXAOhvMg2h-KdpHQwg6EpCeQtxGCI,15123
|
|
41
41
|
trilogy/core/processing/discovery_utility.py,sha256=3xdd1ypKappSDm0SJs7WtW5YegL80SlYhDQlkNePp4E,4549
|
|
42
42
|
trilogy/core/processing/discovery_validation.py,sha256=fGWJmKpgEd1f4RkK-fYOBUT1cwsJnahwXFAdRlou7MI,5365
|
|
43
43
|
trilogy/core/processing/graph_utils.py,sha256=8QUVrkE9j-9C1AyrCb1nQEh8daCe0u1HuXl-Te85lag,1205
|
|
@@ -46,13 +46,13 @@ trilogy/core/processing/node_generators/__init__.py,sha256=w8TQQgNhyAra6JQHdg1_A
|
|
|
46
46
|
trilogy/core/processing/node_generators/basic_node.py,sha256=luN8LftafZepoFgDRv4gmvEGFlOI2j0icJ5fz4UT7uo,5165
|
|
47
47
|
trilogy/core/processing/node_generators/common.py,sha256=PdysdroW9DUADP7f5Wv_GKPUyCTROZV1g3L45fawxi8,9443
|
|
48
48
|
trilogy/core/processing/node_generators/filter_node.py,sha256=oRRq2-T3ufgn4D23uQsc58f20eFk-djs4QI3WKA75K8,10908
|
|
49
|
-
trilogy/core/processing/node_generators/group_node.py,sha256=
|
|
49
|
+
trilogy/core/processing/node_generators/group_node.py,sha256=1QJhRxsTklJ5xq8wHlAURZaN9gL9FPpeCa1OJ7IwXnY,6769
|
|
50
50
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=jKcNCDOY6fNblrdZwaRU0sbUSr9H0moQbAxrGgX6iGA,3832
|
|
51
51
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=GWV5yLmKTe1yyPhN60RG1Rnrn4ktfn9lYYXi_FVU4UI,7061
|
|
52
52
|
trilogy/core/processing/node_generators/node_merge_node.py,sha256=dSqfqWp2SolhDB16nkPaaTXgNQo4QquEufPdf7q0Tb4,17398
|
|
53
53
|
trilogy/core/processing/node_generators/recursive_node.py,sha256=l5zdh0dURKwmAy8kK4OpMtZfyUEQRk6N-PwSWIyBpSM,2468
|
|
54
54
|
trilogy/core/processing/node_generators/rowset_node.py,sha256=2BiSsegbRF9csJ_Xl8P_CxIm4dAAb7dF29u6v_Odr-A,6709
|
|
55
|
-
trilogy/core/processing/node_generators/select_merge_node.py,sha256=
|
|
55
|
+
trilogy/core/processing/node_generators/select_merge_node.py,sha256=2KIljy2TFLT9kjKUfhkFwSDkcu4hv4yRVTKOGzEe-NM,21763
|
|
56
56
|
trilogy/core/processing/node_generators/select_node.py,sha256=Ta1G39V94gjX_AgyZDz9OqnwLz4BjY3D6Drx9YpziMQ,3555
|
|
57
57
|
trilogy/core/processing/node_generators/synonym_node.py,sha256=AnAsa_Wj50NJ_IK0HSgab_7klYmKVrv0WI1uUe-GvEY,3766
|
|
58
58
|
trilogy/core/processing/node_generators/union_node.py,sha256=VNo6Oey4p8etU9xrOh2oTT2lIOTvY6PULUPRvVa2uxU,2877
|
|
@@ -76,12 +76,12 @@ trilogy/core/statements/build.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
76
76
|
trilogy/core/statements/common.py,sha256=KxEmz2ySySyZ6CTPzn0fJl5NX2KOk1RPyuUSwWhnK1g,759
|
|
77
77
|
trilogy/core/statements/execute.py,sha256=rqfuoMuXPcH7L7TmE1dSiZ_K_A1ohB8whVMfGimZBOk,1294
|
|
78
78
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
79
|
-
trilogy/dialect/base.py,sha256=
|
|
80
|
-
trilogy/dialect/bigquery.py,sha256=
|
|
79
|
+
trilogy/dialect/base.py,sha256=SpNv1D9eig-XDOui2xoeMvdTfRpejAfJ_HaAMkuPW9w,45112
|
|
80
|
+
trilogy/dialect/bigquery.py,sha256=e19dGcarapgA0x5_Xmq2StyHzuDWPOOPaR4elkWXwug,4203
|
|
81
81
|
trilogy/dialect/common.py,sha256=hhzuMTFW9QQIP7TKLT9BlJy6lw2R03a68jKQ-7t4-2c,6070
|
|
82
82
|
trilogy/dialect/config.py,sha256=olnyeVU5W5T6b9-dMeNAnvxuPlyc2uefb7FRME094Ec,3834
|
|
83
83
|
trilogy/dialect/dataframe.py,sha256=RUbNgReEa9g3pL6H7fP9lPTrAij5pkqedpZ99D8_5AE,1522
|
|
84
|
-
trilogy/dialect/duckdb.py,sha256=
|
|
84
|
+
trilogy/dialect/duckdb.py,sha256=gsXhPKX0D7ykJ9RFK9qx8uBTjLgtHu6PYv6GlBFtnJE,4448
|
|
85
85
|
trilogy/dialect/enums.py,sha256=FRNYQ5-w-B6-X0yXKNU5g9GowsMlERFogTC5u2nxL_s,4740
|
|
86
86
|
trilogy/dialect/postgres.py,sha256=VH4EB4myjIeZTHeFU6vK00GxY9c53rCBjg2mLbdaCEE,3254
|
|
87
87
|
trilogy/dialect/presto.py,sha256=Wd0yHq3EOSfCOy7lWPfCr13JHO3olsm8qUXgml-oTm0,3529
|
|
@@ -93,13 +93,13 @@ trilogy/hooks/graph_hook.py,sha256=5BfR7Dt0bgEsCLgwjowgCsVkboGYfVJGOz8g9mqpnos,4
|
|
|
93
93
|
trilogy/hooks/query_debugger.py,sha256=1npRjww94sPV5RRBBlLqMJRaFkH9vhEY6o828MeoEcw,5583
|
|
94
94
|
trilogy/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
95
|
trilogy/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
|
-
trilogy/parsing/common.py,sha256=
|
|
96
|
+
trilogy/parsing/common.py,sha256=_5UEnLtu40VQ8gb6wg3GtSrxf6IONhEOntmdsm0X4lU,30961
|
|
97
97
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
98
98
|
trilogy/parsing/exceptions.py,sha256=Xwwsv2C9kSNv2q-HrrKC1f60JNHShXcCMzstTSEbiCw,154
|
|
99
99
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
100
|
-
trilogy/parsing/parse_engine.py,sha256=
|
|
100
|
+
trilogy/parsing/parse_engine.py,sha256=vYhGmSJXi5TcLvt2mujISucQc35j4kHa78hj5ip9gGQ,72564
|
|
101
101
|
trilogy/parsing/render.py,sha256=gGCFj2ue0UoaU2MR6qHGMAHXkYRMkTmHjnBowdcgFMY,19603
|
|
102
|
-
trilogy/parsing/trilogy.lark,sha256=
|
|
102
|
+
trilogy/parsing/trilogy.lark,sha256=1RIqA7zrGuqDJYSv9yHGSw0vdIfGOLPOnc4hSBRSTVU,14346
|
|
103
103
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
104
|
trilogy/scripts/trilogy.py,sha256=1L0XrH4mVHRt1C9T1HnaDv2_kYEfbWTb5_-cBBke79w,3774
|
|
105
105
|
trilogy/std/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -110,8 +110,8 @@ trilogy/std/money.preql,sha256=XWwvAV3WxBsHX9zfptoYRnBigcfYwrYtBHXTME0xJuQ,2082
|
|
|
110
110
|
trilogy/std/net.preql,sha256=-bMV6dyofskl4Kvows-iQ4JCxjVUwsZOeWCy8JO5Ftw,135
|
|
111
111
|
trilogy/std/ranking.preql,sha256=LDoZrYyz4g3xsII9XwXfmstZD-_92i1Eox1UqkBIfi8,83
|
|
112
112
|
trilogy/std/report.preql,sha256=LbV-XlHdfw0jgnQ8pV7acG95xrd1-p65fVpiIc-S7W4,202
|
|
113
|
-
pytrilogy-0.0.3.
|
|
114
|
-
pytrilogy-0.0.3.
|
|
115
|
-
pytrilogy-0.0.3.
|
|
116
|
-
pytrilogy-0.0.3.
|
|
117
|
-
pytrilogy-0.0.3.
|
|
113
|
+
pytrilogy-0.0.3.70.dist-info/METADATA,sha256=dSEU2zgUrBXl88Yb-KV8y_hta24BeHt5VaPM19-YMFU,9734
|
|
114
|
+
pytrilogy-0.0.3.70.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
115
|
+
pytrilogy-0.0.3.70.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
|
|
116
|
+
pytrilogy-0.0.3.70.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
117
|
+
pytrilogy-0.0.3.70.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/core/functions.py
CHANGED
|
@@ -370,10 +370,10 @@ FUNCTION_REGISTRY: dict[FunctionType, FunctionConfig] = {
|
|
|
370
370
|
arg_count=2,
|
|
371
371
|
),
|
|
372
372
|
FunctionType.REGEXP_EXTRACT: FunctionConfig(
|
|
373
|
-
valid_inputs={DataType.STRING},
|
|
373
|
+
valid_inputs=[{DataType.STRING}, {DataType.STRING}, {DataType.INTEGER}],
|
|
374
374
|
output_purpose=Purpose.PROPERTY,
|
|
375
375
|
output_type=DataType.STRING,
|
|
376
|
-
arg_count=
|
|
376
|
+
arg_count=3,
|
|
377
377
|
),
|
|
378
378
|
FunctionType.REGEXP_REPLACE: FunctionConfig(
|
|
379
379
|
valid_inputs={DataType.STRING},
|
trilogy/core/models/author.py
CHANGED
|
@@ -2303,7 +2303,7 @@ class CustomFunctionFactory:
|
|
|
2303
2303
|
]
|
|
2304
2304
|
return self
|
|
2305
2305
|
|
|
2306
|
-
def __call__(self, *creation_args:
|
|
2306
|
+
def __call__(self, *creation_args: Expr):
|
|
2307
2307
|
nout = (
|
|
2308
2308
|
self.function.model_copy(deep=True)
|
|
2309
2309
|
if isinstance(self.function, BaseModel)
|
|
@@ -2389,7 +2389,6 @@ Expr = (
|
|
|
2389
2389
|
| int
|
|
2390
2390
|
| str
|
|
2391
2391
|
| float
|
|
2392
|
-
| list
|
|
2393
2392
|
| date
|
|
2394
2393
|
| datetime
|
|
2395
2394
|
| TupleWrapper
|
|
@@ -2430,7 +2429,6 @@ FuncArgs = (
|
|
|
2430
2429
|
| ListType
|
|
2431
2430
|
| MapType
|
|
2432
2431
|
| NumericType
|
|
2433
|
-
| list
|
|
2434
2432
|
| ListWrapper[Any]
|
|
2435
2433
|
| TupleWrapper[Any]
|
|
2436
2434
|
| Comparison
|
trilogy/core/models/build.py
CHANGED
|
@@ -155,6 +155,7 @@ def concepts_to_build_grain_concepts(
|
|
|
155
155
|
pconcepts.append(c)
|
|
156
156
|
elif environment:
|
|
157
157
|
pconcepts.append(environment.concepts[c])
|
|
158
|
+
|
|
158
159
|
else:
|
|
159
160
|
raise ValueError(
|
|
160
161
|
f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
|
|
@@ -248,7 +249,7 @@ def get_concept_arguments(expr) -> List["BuildConcept"]:
|
|
|
248
249
|
return output
|
|
249
250
|
|
|
250
251
|
|
|
251
|
-
class BuildParamaterizedConceptReference(BaseModel):
|
|
252
|
+
class BuildParamaterizedConceptReference(DataTyped, BaseModel):
|
|
252
253
|
concept: BuildConcept
|
|
253
254
|
|
|
254
255
|
def __str__(self):
|
|
@@ -258,6 +259,10 @@ class BuildParamaterizedConceptReference(BaseModel):
|
|
|
258
259
|
def safe_address(self) -> str:
|
|
259
260
|
return self.concept.safe_address
|
|
260
261
|
|
|
262
|
+
@property
|
|
263
|
+
def output_datatype(self) -> DataType:
|
|
264
|
+
return self.concept.output_datatype
|
|
265
|
+
|
|
261
266
|
|
|
262
267
|
class BuildGrain(BaseModel):
|
|
263
268
|
components: set[str] = Field(default_factory=set)
|
|
@@ -1810,8 +1815,8 @@ class Factory:
|
|
|
1810
1815
|
right_c, _ = self.instantiate_concept(base.right)
|
|
1811
1816
|
right = right_c
|
|
1812
1817
|
return BuildSubselectComparison.model_construct(
|
|
1813
|
-
left=self.build(base.left),
|
|
1814
|
-
right=self.build(right),
|
|
1818
|
+
left=self.handle_constant(self.build(base.left)),
|
|
1819
|
+
right=self.handle_constant(self.build(right)),
|
|
1815
1820
|
operator=base.operator,
|
|
1816
1821
|
)
|
|
1817
1822
|
|
|
@@ -1916,7 +1921,17 @@ class Factory:
|
|
|
1916
1921
|
where_factory = Factory(
|
|
1917
1922
|
grain=Grain(), environment=self.environment, local_concepts={}
|
|
1918
1923
|
)
|
|
1919
|
-
|
|
1924
|
+
where_clause = (
|
|
1925
|
+
where_factory.build(base.where_clause) if base.where_clause else None
|
|
1926
|
+
)
|
|
1927
|
+
# if the where clause derives new concepts
|
|
1928
|
+
# we need to ensure these are accessible from the general factory
|
|
1929
|
+
# post resolution
|
|
1930
|
+
for bk, bv in where_factory.local_concepts.items():
|
|
1931
|
+
# but do not override any local cahced grains
|
|
1932
|
+
if bk in materialized:
|
|
1933
|
+
continue
|
|
1934
|
+
materialized[bk] = bv
|
|
1920
1935
|
final: List[BuildConcept] = []
|
|
1921
1936
|
for original in base.selection:
|
|
1922
1937
|
new = original
|
|
@@ -1943,9 +1958,7 @@ class Factory:
|
|
|
1943
1958
|
factory.build(base.having_clause) if base.having_clause else None
|
|
1944
1959
|
),
|
|
1945
1960
|
# this uses a different grain factory
|
|
1946
|
-
where_clause=
|
|
1947
|
-
where_factory.build(base.where_clause) if base.where_clause else None
|
|
1948
|
-
),
|
|
1961
|
+
where_clause=where_clause,
|
|
1949
1962
|
)
|
|
1950
1963
|
|
|
1951
1964
|
@build.register
|
|
@@ -469,6 +469,7 @@ def generate_node(
|
|
|
469
469
|
Derivation.GROUP_TO: lambda: _generate_group_to_node(context),
|
|
470
470
|
Derivation.BASIC: lambda: _generate_basic_node(context),
|
|
471
471
|
Derivation.ROOT: lambda: RootNodeHandler(context).generate(),
|
|
472
|
+
Derivation.CONSTANT: lambda: RootNodeHandler(context).generate(),
|
|
472
473
|
}
|
|
473
474
|
|
|
474
475
|
handler = derivation_handlers.get(concept.derivation)
|
|
@@ -22,6 +22,24 @@ from trilogy.utility import unique
|
|
|
22
22
|
LOGGER_PREFIX = "[GEN_GROUP_NODE]"
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
def get_aggregate_grain(
|
|
26
|
+
concept: BuildConcept, environment: BuildEnvironment
|
|
27
|
+
) -> BuildGrain:
|
|
28
|
+
parent_concepts: List[BuildConcept] = unique(
|
|
29
|
+
resolve_function_parent_concepts(concept, environment=environment), "address"
|
|
30
|
+
)
|
|
31
|
+
if (
|
|
32
|
+
concept.grain
|
|
33
|
+
and len(concept.grain.components) > 0
|
|
34
|
+
and not concept.grain.abstract
|
|
35
|
+
):
|
|
36
|
+
grain_components = [environment.concepts[c] for c in concept.grain.components]
|
|
37
|
+
parent_concepts += grain_components
|
|
38
|
+
return BuildGrain.from_concepts(parent_concepts)
|
|
39
|
+
else:
|
|
40
|
+
return BuildGrain.from_concepts(parent_concepts)
|
|
41
|
+
|
|
42
|
+
|
|
25
43
|
def gen_group_node(
|
|
26
44
|
concept: BuildConcept,
|
|
27
45
|
local_optional: List[BuildConcept],
|
|
@@ -51,7 +69,7 @@ def gen_group_node(
|
|
|
51
69
|
):
|
|
52
70
|
grain_components = [environment.concepts[c] for c in concept.grain.components]
|
|
53
71
|
parent_concepts += grain_components
|
|
54
|
-
build_grain_parents =
|
|
72
|
+
build_grain_parents = get_aggregate_grain(concept, environment)
|
|
55
73
|
output_concepts += grain_components
|
|
56
74
|
for possible_agg in local_optional:
|
|
57
75
|
|
|
@@ -70,6 +88,7 @@ def gen_group_node(
|
|
|
70
88
|
possible_agg,
|
|
71
89
|
environment=environment,
|
|
72
90
|
)
|
|
91
|
+
comp_grain = get_aggregate_grain(possible_agg, environment)
|
|
73
92
|
if set([x.address for x in agg_parents]).issubset(
|
|
74
93
|
set([x.address for x in parent_concepts])
|
|
75
94
|
):
|
|
@@ -77,7 +96,7 @@ def gen_group_node(
|
|
|
77
96
|
logger.info(
|
|
78
97
|
f"{padding(depth)}{LOGGER_PREFIX} found equivalent group by optional concept {possible_agg.address} for {concept.address}"
|
|
79
98
|
)
|
|
80
|
-
elif
|
|
99
|
+
elif comp_grain == build_grain_parents:
|
|
81
100
|
extra = [x for x in agg_parents if x.address not in parent_concepts]
|
|
82
101
|
parent_concepts += extra
|
|
83
102
|
output_concepts.append(possible_agg)
|
|
@@ -86,7 +105,7 @@ def gen_group_node(
|
|
|
86
105
|
)
|
|
87
106
|
else:
|
|
88
107
|
logger.info(
|
|
89
|
-
f"{padding(depth)}{LOGGER_PREFIX} cannot include optional agg {possible_agg.address}; mismatched parent grain {
|
|
108
|
+
f"{padding(depth)}{LOGGER_PREFIX} cannot include optional agg {possible_agg.address}; it has mismatched parent grain {comp_grain } vs local parent {build_grain_parents}"
|
|
90
109
|
)
|
|
91
110
|
if parent_concepts:
|
|
92
111
|
logger.info(
|
|
@@ -231,7 +231,10 @@ def create_pruned_concept_graph(
|
|
|
231
231
|
|
|
232
232
|
|
|
233
233
|
def resolve_subgraphs(
|
|
234
|
-
g: nx.DiGraph,
|
|
234
|
+
g: nx.DiGraph,
|
|
235
|
+
relevant: list[BuildConcept],
|
|
236
|
+
conditions: BuildWhereClause | None,
|
|
237
|
+
depth: int = 0,
|
|
235
238
|
) -> dict[str, list[str]]:
|
|
236
239
|
"""When we have multiple distinct subgraphs within our matched
|
|
237
240
|
nodes that can satisfy a query, resolve which one of those we should
|
|
@@ -241,7 +244,7 @@ def resolve_subgraphs(
|
|
|
241
244
|
discarding duplicates.
|
|
242
245
|
Duplicate subgraphs will be resolved based on which
|
|
243
246
|
ones are most 'optimal' to use, a hueristic
|
|
244
|
-
that can evolve in the future but is currently based on
|
|
247
|
+
that can evolve in the future but is currently based on datasource
|
|
245
248
|
cardinality."""
|
|
246
249
|
datasources = [n for n in g.nodes if n.startswith("ds~")]
|
|
247
250
|
subgraphs: dict[str, list[str]] = {
|
|
@@ -261,7 +264,7 @@ def resolve_subgraphs(
|
|
|
261
264
|
pruned_subgraphs = {}
|
|
262
265
|
|
|
263
266
|
def score_node(input: str):
|
|
264
|
-
logger.debug(f"scoring node {input}")
|
|
267
|
+
logger.debug(f"{padding(depth)}{LOGGER_PREFIX} scoring node {input}")
|
|
265
268
|
grain = grain_length[input]
|
|
266
269
|
# first - go for lowest grain
|
|
267
270
|
# but if the object we want is in the grain, treat that as "free"
|
|
@@ -275,7 +278,7 @@ def resolve_subgraphs(
|
|
|
275
278
|
len(subgraphs[input]),
|
|
276
279
|
input,
|
|
277
280
|
)
|
|
278
|
-
logger.debug(score)
|
|
281
|
+
logger.debug(f"{padding(depth)}{LOGGER_PREFIX} node {input} has score {score}")
|
|
279
282
|
return score
|
|
280
283
|
|
|
281
284
|
for key, nodes in subgraphs.items():
|
|
@@ -296,7 +299,7 @@ def resolve_subgraphs(
|
|
|
296
299
|
if len(value) < len(other_value):
|
|
297
300
|
is_subset = True
|
|
298
301
|
logger.debug(
|
|
299
|
-
f"Dropping subgraph {key} with {value} as it is a subset of {other_key} with {other_value}"
|
|
302
|
+
f"{padding(depth)}{LOGGER_PREFIX} Dropping subgraph {key} with {value} as it is a subset of {other_key} with {other_value}"
|
|
300
303
|
)
|
|
301
304
|
elif len(value) == len(other_value) and len(all_concepts) == len(
|
|
302
305
|
other_all_concepts
|
|
@@ -305,7 +308,9 @@ def resolve_subgraphs(
|
|
|
305
308
|
matches.add(key)
|
|
306
309
|
if matches and not is_subset:
|
|
307
310
|
min_node = min(matches, key=score_node)
|
|
308
|
-
logger.debug(
|
|
311
|
+
logger.debug(
|
|
312
|
+
f"{padding(depth)}{LOGGER_PREFIX} minimum source score is {min_node}"
|
|
313
|
+
)
|
|
309
314
|
is_subset = key is not min(matches, key=score_node)
|
|
310
315
|
if not is_subset:
|
|
311
316
|
pruned_subgraphs[key] = nodes
|
|
@@ -330,7 +335,9 @@ def resolve_subgraphs(
|
|
|
330
335
|
> 1
|
|
331
336
|
)
|
|
332
337
|
if not keep:
|
|
333
|
-
logger.debug(
|
|
338
|
+
logger.debug(
|
|
339
|
+
f"{padding(depth)}{LOGGER_PREFIX} Pruning node {node} as irrelevant after subgraph resolution"
|
|
340
|
+
)
|
|
334
341
|
pruned_subgraphs = {
|
|
335
342
|
k: [n for n in v if n != node] for k, v in pruned_subgraphs.items()
|
|
336
343
|
}
|
|
@@ -561,7 +568,7 @@ def gen_select_merge_node(
|
|
|
561
568
|
return None
|
|
562
569
|
|
|
563
570
|
sub_nodes = resolve_subgraphs(
|
|
564
|
-
pruned_concept_graph, relevant=non_constant, conditions=conditions
|
|
571
|
+
pruned_concept_graph, relevant=non_constant, conditions=conditions, depth=depth
|
|
565
572
|
)
|
|
566
573
|
|
|
567
574
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
|
trilogy/core/query_processor.py
CHANGED
|
@@ -395,14 +395,13 @@ def get_query_node(
|
|
|
395
395
|
if not statement.output_components:
|
|
396
396
|
raise ValueError(f"Statement has no output components {statement}")
|
|
397
397
|
history = history or History(base_environment=environment)
|
|
398
|
-
|
|
398
|
+
logger.info(
|
|
399
399
|
f"{LOGGER_PREFIX} building query node for {statement.output_components} grain {statement.grain}"
|
|
400
400
|
)
|
|
401
401
|
build_statement: BuildSelectLineage | BuildMultiSelectLineage = Factory(
|
|
402
402
|
environment=environment,
|
|
403
403
|
).build(statement)
|
|
404
404
|
|
|
405
|
-
# build_statement = statement
|
|
406
405
|
build_environment = environment.materialize_for_select(
|
|
407
406
|
build_statement.local_concepts
|
|
408
407
|
)
|
trilogy/dialect/base.py
CHANGED
|
@@ -12,6 +12,7 @@ from trilogy.constants import (
|
|
|
12
12
|
)
|
|
13
13
|
from trilogy.core.constants import UNNEST_NAME
|
|
14
14
|
from trilogy.core.enums import (
|
|
15
|
+
ComparisonOperator,
|
|
15
16
|
DatePart,
|
|
16
17
|
FunctionType,
|
|
17
18
|
UnnestMode,
|
|
@@ -276,7 +277,13 @@ ORDER BY{% for order in order_by %}
|
|
|
276
277
|
)
|
|
277
278
|
|
|
278
279
|
|
|
279
|
-
def safe_get_cte_value(
|
|
280
|
+
def safe_get_cte_value(
|
|
281
|
+
coalesce,
|
|
282
|
+
cte: CTE | UnionCTE,
|
|
283
|
+
c: BuildConcept,
|
|
284
|
+
quote_char: str,
|
|
285
|
+
render_expr: Callable,
|
|
286
|
+
) -> Optional[str]:
|
|
280
287
|
address = c.address
|
|
281
288
|
raw = cte.source_map.get(address, None)
|
|
282
289
|
|
|
@@ -287,6 +294,9 @@ def safe_get_cte_value(coalesce, cte: CTE | UnionCTE, c: BuildConcept, quote_cha
|
|
|
287
294
|
return f"{quote_char}{raw}{quote_char}.{safe_quote(rendered, quote_char)}"
|
|
288
295
|
if isinstance(raw, list) and len(raw) == 1:
|
|
289
296
|
rendered = cte.get_alias(c, raw[0])
|
|
297
|
+
if isinstance(rendered, FUNCTION_ITEMS):
|
|
298
|
+
# if it's a function, we need to render it as a function
|
|
299
|
+
return f"{render_expr(rendered, cte=cte, raise_invalid=True)}"
|
|
290
300
|
return f"{quote_char}{raw[0]}{quote_char}.{safe_quote(rendered, quote_char)}"
|
|
291
301
|
return coalesce(
|
|
292
302
|
sorted(
|
|
@@ -499,6 +509,7 @@ class BaseDialect:
|
|
|
499
509
|
cte,
|
|
500
510
|
c,
|
|
501
511
|
self.QUOTE_CHARACTER,
|
|
512
|
+
self.render_expr,
|
|
502
513
|
)
|
|
503
514
|
if not rval:
|
|
504
515
|
# unions won't have a specific source mapped; just use a generic column reference
|
|
@@ -515,6 +526,17 @@ class BaseDialect:
|
|
|
515
526
|
)
|
|
516
527
|
return rval
|
|
517
528
|
|
|
529
|
+
def render_array_unnest(
|
|
530
|
+
self,
|
|
531
|
+
left,
|
|
532
|
+
right,
|
|
533
|
+
operator: ComparisonOperator,
|
|
534
|
+
cte: CTE | UnionCTE | None = None,
|
|
535
|
+
cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
|
|
536
|
+
raise_invalid: bool = False,
|
|
537
|
+
):
|
|
538
|
+
return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
539
|
+
|
|
518
540
|
def render_expr(
|
|
519
541
|
self,
|
|
520
542
|
e: Union[
|
|
@@ -556,6 +578,7 @@ class BaseDialect:
|
|
|
556
578
|
raise_invalid: bool = False,
|
|
557
579
|
) -> str:
|
|
558
580
|
if isinstance(e, SUBSELECT_COMPARISON_ITEMS):
|
|
581
|
+
|
|
559
582
|
if isinstance(e.right, BuildConcept):
|
|
560
583
|
# we won't always have an existnce map
|
|
561
584
|
# so fall back to the normal map
|
|
@@ -585,10 +608,22 @@ class BaseDialect:
|
|
|
585
608
|
info = cte.inlined_ctes[target]
|
|
586
609
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {info.new_base} as {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
587
610
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
588
|
-
|
|
611
|
+
elif isinstance(e.right, BuildParamaterizedConceptReference):
|
|
612
|
+
if isinstance(e.right.concept.lineage, BuildFunction) and isinstance(
|
|
613
|
+
e.right.concept.lineage.arguments[0], ListWrapper
|
|
614
|
+
):
|
|
615
|
+
return self.render_array_unnest(
|
|
616
|
+
e.left,
|
|
617
|
+
e.right,
|
|
618
|
+
e.operator,
|
|
619
|
+
cte=cte,
|
|
620
|
+
cte_map=cte_map,
|
|
621
|
+
raise_invalid=raise_invalid,
|
|
622
|
+
)
|
|
623
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
589
624
|
elif isinstance(
|
|
590
625
|
e.right,
|
|
591
|
-
(ListWrapper, TupleWrapper, BuildParenthetical
|
|
626
|
+
(ListWrapper, TupleWrapper, BuildParenthetical),
|
|
592
627
|
):
|
|
593
628
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
|
|
594
629
|
|
|
@@ -692,13 +727,11 @@ class BaseDialect:
|
|
|
692
727
|
return f"'{e}'"
|
|
693
728
|
elif isinstance(e, (int, float)):
|
|
694
729
|
return str(e)
|
|
695
|
-
elif isinstance(e, ListWrapper):
|
|
696
|
-
return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])}]"
|
|
697
730
|
elif isinstance(e, TupleWrapper):
|
|
698
731
|
return f"({','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])})"
|
|
699
732
|
elif isinstance(e, MapWrapper):
|
|
700
733
|
return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}:{self.render_expr(v, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}' for k, v in e.items()])}}}"
|
|
701
|
-
elif isinstance(e,
|
|
734
|
+
elif isinstance(e, ListWrapper):
|
|
702
735
|
return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])}"
|
|
703
736
|
elif isinstance(e, DataType):
|
|
704
737
|
return self.DATATYPE_MAP.get(e, e.value)
|
trilogy/dialect/bigquery.py
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
|
-
from typing import Any, Callable, Mapping
|
|
1
|
+
from typing import Any, Callable, Dict, Mapping, Optional
|
|
2
2
|
|
|
3
3
|
from jinja2 import Template
|
|
4
4
|
|
|
5
|
-
from trilogy.core.enums import
|
|
5
|
+
from trilogy.core.enums import (
|
|
6
|
+
ComparisonOperator,
|
|
7
|
+
FunctionType,
|
|
8
|
+
UnnestMode,
|
|
9
|
+
WindowType,
|
|
10
|
+
)
|
|
6
11
|
from trilogy.core.models.core import (
|
|
7
12
|
DataType,
|
|
8
13
|
)
|
|
14
|
+
from trilogy.core.models.execute import CTE, UnionCTE
|
|
9
15
|
from trilogy.dialect.base import BaseDialect
|
|
10
16
|
|
|
11
17
|
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
@@ -99,3 +105,14 @@ class BigqueryDialect(BaseDialect):
|
|
|
99
105
|
SQL_TEMPLATE = BQ_SQL_TEMPLATE
|
|
100
106
|
UNNEST_MODE = UnnestMode.CROSS_JOIN_UNNEST
|
|
101
107
|
DATATYPE_MAP = DATATYPE_MAP
|
|
108
|
+
|
|
109
|
+
def render_array_unnest(
|
|
110
|
+
self,
|
|
111
|
+
left,
|
|
112
|
+
right,
|
|
113
|
+
operator: ComparisonOperator,
|
|
114
|
+
cte: CTE | UnionCTE | None = None,
|
|
115
|
+
cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
|
|
116
|
+
raise_invalid: bool = False,
|
|
117
|
+
):
|
|
118
|
+
return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} unnest({self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)})"
|
trilogy/dialect/duckdb.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from typing import Any, Callable, Mapping
|
|
2
3
|
|
|
3
4
|
from jinja2 import Template
|
|
@@ -8,6 +9,19 @@ from trilogy.dialect.base import BaseDialect
|
|
|
8
9
|
|
|
9
10
|
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
10
11
|
|
|
12
|
+
SENTINAL_AUTO_CAPTURE_GROUP_VALUE = "-1"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def generate_regex_extract(x: list[str]) -> str:
|
|
16
|
+
if str(x[2]) == SENTINAL_AUTO_CAPTURE_GROUP_VALUE:
|
|
17
|
+
regex = re.compile(x[1])
|
|
18
|
+
if regex.groups == 0:
|
|
19
|
+
search = 0
|
|
20
|
+
else:
|
|
21
|
+
search = 1
|
|
22
|
+
return f"REGEXP_EXTRACT({x[0]},{x[1]},{search})"
|
|
23
|
+
return f"REGEXP_EXTRACT({x[0]},{x[1]},{x[2]})"
|
|
24
|
+
|
|
11
25
|
|
|
12
26
|
FUNCTION_MAP = {
|
|
13
27
|
FunctionType.COUNT: lambda args: f"count({args[0]})",
|
|
@@ -37,6 +51,9 @@ FUNCTION_MAP = {
|
|
|
37
51
|
FunctionType.DATETIME_LITERAL: lambda x: f"datetime '{x}'",
|
|
38
52
|
# string
|
|
39
53
|
FunctionType.CONTAINS: lambda x: f"CONTAINS(LOWER({x[0]}), LOWER({x[1]}))",
|
|
54
|
+
# regexp
|
|
55
|
+
FunctionType.REGEXP_CONTAINS: lambda x: f"REGEXP_MATCHES({x[0]},{x[1]})",
|
|
56
|
+
FunctionType.REGEXP_EXTRACT: lambda x: generate_regex_extract(x),
|
|
40
57
|
}
|
|
41
58
|
|
|
42
59
|
# if an aggregate function is called on a source that is at the same grain as the aggregate
|
trilogy/parsing/common.py
CHANGED
|
@@ -44,6 +44,7 @@ from trilogy.core.models.author import (
|
|
|
44
44
|
RowsetLineage,
|
|
45
45
|
SubselectComparison,
|
|
46
46
|
TraitDataType,
|
|
47
|
+
TupleWrapper,
|
|
47
48
|
UndefinedConcept,
|
|
48
49
|
WhereClause,
|
|
49
50
|
WindowItem,
|
|
@@ -172,7 +173,7 @@ def concept_list_to_keys(
|
|
|
172
173
|
|
|
173
174
|
|
|
174
175
|
def constant_to_concept(
|
|
175
|
-
parent: ListWrapper |
|
|
176
|
+
parent: ListWrapper | TupleWrapper | MapWrapper | int | float | str,
|
|
176
177
|
name: str,
|
|
177
178
|
namespace: str,
|
|
178
179
|
metadata: Metadata | None = None,
|
|
@@ -183,7 +184,7 @@ def constant_to_concept(
|
|
|
183
184
|
output_purpose=Purpose.CONSTANT,
|
|
184
185
|
arguments=[parent],
|
|
185
186
|
)
|
|
186
|
-
assert const_function.arguments[0] == parent, const_function.arguments[0]
|
|
187
|
+
# assert const_function.arguments[0] == parent, f'{const_function.arguments[0]} != {parent}, {type(const_function.arguments[0])} != {type(parent)}'
|
|
187
188
|
fmetadata = metadata or Metadata()
|
|
188
189
|
return Concept(
|
|
189
190
|
name=name,
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -146,7 +146,7 @@ class ParsePass(Enum):
|
|
|
146
146
|
VALIDATION = 2
|
|
147
147
|
|
|
148
148
|
|
|
149
|
-
CONSTANT_TYPES = (int, float, str, bool,
|
|
149
|
+
CONSTANT_TYPES = (int, float, str, bool, ListWrapper, TupleWrapper, MapWrapper)
|
|
150
150
|
|
|
151
151
|
SELF_LABEL = "root"
|
|
152
152
|
|
|
@@ -742,6 +742,7 @@ class ParseToObjects(Transformer):
|
|
|
742
742
|
lookup, namespace, name, parent = parse_concept_reference(
|
|
743
743
|
name, self.environment
|
|
744
744
|
)
|
|
745
|
+
|
|
745
746
|
concept = Concept(
|
|
746
747
|
name=name,
|
|
747
748
|
datatype=arg_to_datatype(constant),
|
|
@@ -1801,6 +1802,9 @@ class ParseToObjects(Transformer):
|
|
|
1801
1802
|
|
|
1802
1803
|
@v_args(meta=True)
|
|
1803
1804
|
def fregexp_extract(self, meta, args):
|
|
1805
|
+
if len(args) == 2:
|
|
1806
|
+
# this is a magic value to represent the default behavior
|
|
1807
|
+
args.append(-1)
|
|
1804
1808
|
return self.function_factory.create_function(
|
|
1805
1809
|
args, FunctionType.REGEXP_EXTRACT, meta
|
|
1806
1810
|
)
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -279,7 +279,7 @@
|
|
|
279
279
|
_SUBSTRING.1: "substring("i
|
|
280
280
|
fsubstring: _SUBSTRING expr "," expr "," expr ")"
|
|
281
281
|
_REGEXP_EXTRACT.1: "regexp_extract("
|
|
282
|
-
fregexp_extract: _REGEXP_EXTRACT expr "," expr ")"
|
|
282
|
+
fregexp_extract: _REGEXP_EXTRACT expr "," expr ("," int_lit)? ")"
|
|
283
283
|
_REGEXP_CONTAINS.1: "regexp_contains("
|
|
284
284
|
fregexp_contains: _REGEXP_CONTAINS expr "," expr ")"
|
|
285
285
|
_REGEXP_REPLACE.1: "regexp_replace("
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|