pytrilogy 0.0.3.69__py3-none-any.whl → 0.0.3.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.69.dist-info → pytrilogy-0.0.3.71.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.69.dist-info → pytrilogy-0.0.3.71.dist-info}/RECORD +23 -23
- trilogy/__init__.py +1 -1
- trilogy/core/functions.py +2 -2
- trilogy/core/graph_models.py +6 -2
- trilogy/core/models/datasource.py +5 -0
- trilogy/core/models/execute.py +38 -39
- trilogy/core/processing/concept_strategies_v3.py +6 -4
- trilogy/core/processing/node_generators/node_merge_node.py +3 -1
- trilogy/core/processing/node_generators/rowset_node.py +0 -1
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +69 -20
- trilogy/core/processing/node_generators/select_merge_node.py +70 -33
- trilogy/core/processing/nodes/base_node.py +16 -12
- trilogy/core/processing/nodes/union_node.py +9 -1
- trilogy/core/query_processor.py +5 -3
- trilogy/dialect/base.py +4 -1
- trilogy/dialect/duckdb.py +17 -0
- trilogy/parsing/parse_engine.py +3 -0
- trilogy/parsing/trilogy.lark +1 -1
- {pytrilogy-0.0.3.69.dist-info → pytrilogy-0.0.3.71.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.69.dist-info → pytrilogy-0.0.3.71.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.69.dist-info → pytrilogy-0.0.3.71.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.69.dist-info → pytrilogy-0.0.3.71.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
pytrilogy-0.0.3.
|
|
2
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
pytrilogy-0.0.3.71.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
2
|
+
trilogy/__init__.py,sha256=M3ozbgvpiJ9h44b-oCogBhygSVgnDOV37ejNFOYUV5w,303
|
|
3
3
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
trilogy/constants.py,sha256=lv_aJWP6dn6e2aF4BAE72jbnNtceFddfqtiDSsvzno0,1692
|
|
5
5
|
trilogy/engine.py,sha256=OK2RuqCIUId6yZ5hfF8J1nxGP0AJqHRZiafcowmW0xc,1728
|
|
@@ -16,26 +16,26 @@ trilogy/core/env_processor.py,sha256=pFsxnluKIusGKx1z7tTnfsd_xZcPy9pZDungkjkyvI0
|
|
|
16
16
|
trilogy/core/environment_helpers.py,sha256=VvPIiFemqaLLpIpLIqprfu63K7muZ1YzNg7UZIUph8w,8267
|
|
17
17
|
trilogy/core/ergonomics.py,sha256=e-7gE29vPLFdg0_A1smQ7eOrUwKl5VYdxRSTddHweRA,1631
|
|
18
18
|
trilogy/core/exceptions.py,sha256=jYEduuMehcMkmCpf-OC_taELPZm7qNfeSNzIWkDYScs,707
|
|
19
|
-
trilogy/core/functions.py,sha256=
|
|
20
|
-
trilogy/core/graph_models.py,sha256=
|
|
19
|
+
trilogy/core/functions.py,sha256=R8_aOe2mNRgOLmsnI9pG_GOU3I7kFPTnXQzplN2d7Dw,29343
|
|
20
|
+
trilogy/core/graph_models.py,sha256=BYhJzHKSgnZHVLJs1CfsgrxTPHqKqPNeA64RlozGY0A,3498
|
|
21
21
|
trilogy/core/internal.py,sha256=iicDBlC6nM8d7e7jqzf_ZOmpUsW8yrr2AA8AqEiLx-s,1577
|
|
22
22
|
trilogy/core/optimization.py,sha256=ojpn-p79lr03SSVQbbw74iPCyoYpDYBmj1dbZ3oXCjI,8860
|
|
23
|
-
trilogy/core/query_processor.py,sha256=
|
|
23
|
+
trilogy/core/query_processor.py,sha256=5aFgv-2LVM1Uku9cR_tFuTRDwyLnxc95bCMAHeFy2AY,20332
|
|
24
24
|
trilogy/core/utility.py,sha256=3VC13uSQWcZNghgt7Ot0ZTeEmNqs__cx122abVq9qhM,410
|
|
25
25
|
trilogy/core/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
trilogy/core/models/author.py,sha256=Wz_6yEZS5EyXJ5wHHl-C44ikFka5XftBYOjNRK98Hfw,77790
|
|
27
27
|
trilogy/core/models/build.py,sha256=BNHzfqD5NWCKHntvnswvDrtCD1JFgvPedx23cPq942U,65405
|
|
28
28
|
trilogy/core/models/build_environment.py,sha256=s_C9xAHuD3yZ26T15pWVBvoqvlp2LdZ8yjsv2_HdXLk,5363
|
|
29
29
|
trilogy/core/models/core.py,sha256=EMAuWTngoNVGCdfNrAY7_k6g528iodNQLwPRVip-8DA,10980
|
|
30
|
-
trilogy/core/models/datasource.py,sha256=
|
|
30
|
+
trilogy/core/models/datasource.py,sha256=wogTevZ-9CyUW2a8gjzqMCieircxi-J5lkI7EOAZnck,9596
|
|
31
31
|
trilogy/core/models/environment.py,sha256=TBbPfsXHpJK49QKuqHwhgZD4PwHiSAYjXmTTTomRE7o,27861
|
|
32
|
-
trilogy/core/models/execute.py,sha256=
|
|
32
|
+
trilogy/core/models/execute.py,sha256=A4SkqmOW9XrbgPDhP7LnS9dUq-Tw4vgzVcQbkkB2ljA,41706
|
|
33
33
|
trilogy/core/optimizations/__init__.py,sha256=YH2-mGXZnVDnBcWVi8vTbrdw7Qs5TivG4h38rH3js_I,290
|
|
34
34
|
trilogy/core/optimizations/base_optimization.py,sha256=gzDOKImoFn36k7XBD3ysEYDnbnb6vdVIztUfFQZsGnM,513
|
|
35
35
|
trilogy/core/optimizations/inline_datasource.py,sha256=2sWNRpoRInnTgo9wExVT_r9RfLAQHI57reEV5cGHUcg,4329
|
|
36
36
|
trilogy/core/optimizations/predicate_pushdown.py,sha256=g4AYE8Aw_iMlAh68TjNXGP754NTurrDduFECkUjoBnc,9399
|
|
37
37
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
trilogy/core/processing/concept_strategies_v3.py,sha256=
|
|
38
|
+
trilogy/core/processing/concept_strategies_v3.py,sha256=3Hy8Lz5NOJt8B3cGv_B0LuOYrlcxM_WiPdsFaFWaMjE,23212
|
|
39
39
|
trilogy/core/processing/discovery_loop.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
trilogy/core/processing/discovery_node_factory.py,sha256=I3JJxoF-u8OVvqXXAOhvMg2h-KdpHQwg6EpCeQtxGCI,15123
|
|
41
41
|
trilogy/core/processing/discovery_utility.py,sha256=3xdd1ypKappSDm0SJs7WtW5YegL80SlYhDQlkNePp4E,4549
|
|
@@ -49,25 +49,25 @@ trilogy/core/processing/node_generators/filter_node.py,sha256=oRRq2-T3ufgn4D23uQ
|
|
|
49
49
|
trilogy/core/processing/node_generators/group_node.py,sha256=1QJhRxsTklJ5xq8wHlAURZaN9gL9FPpeCa1OJ7IwXnY,6769
|
|
50
50
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=jKcNCDOY6fNblrdZwaRU0sbUSr9H0moQbAxrGgX6iGA,3832
|
|
51
51
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=GWV5yLmKTe1yyPhN60RG1Rnrn4ktfn9lYYXi_FVU4UI,7061
|
|
52
|
-
trilogy/core/processing/node_generators/node_merge_node.py,sha256=
|
|
52
|
+
trilogy/core/processing/node_generators/node_merge_node.py,sha256=KtTuvL9-xaTV_ZUO1DUyz2Rd595QfD9SvOv5filCDy0,17428
|
|
53
53
|
trilogy/core/processing/node_generators/recursive_node.py,sha256=l5zdh0dURKwmAy8kK4OpMtZfyUEQRk6N-PwSWIyBpSM,2468
|
|
54
|
-
trilogy/core/processing/node_generators/rowset_node.py,sha256=
|
|
55
|
-
trilogy/core/processing/node_generators/select_merge_node.py,sha256=
|
|
54
|
+
trilogy/core/processing/node_generators/rowset_node.py,sha256=5L5u6xz1In8EaHQdcYgR2si-tz9WB9YLXURo4AkUT9A,6630
|
|
55
|
+
trilogy/core/processing/node_generators/select_merge_node.py,sha256=Cv2GwNiYSmwewjuK8T3JB3pbgrLZFPsB75DCP153BMA,22818
|
|
56
56
|
trilogy/core/processing/node_generators/select_node.py,sha256=Ta1G39V94gjX_AgyZDz9OqnwLz4BjY3D6Drx9YpziMQ,3555
|
|
57
57
|
trilogy/core/processing/node_generators/synonym_node.py,sha256=AnAsa_Wj50NJ_IK0HSgab_7klYmKVrv0WI1uUe-GvEY,3766
|
|
58
58
|
trilogy/core/processing/node_generators/union_node.py,sha256=VNo6Oey4p8etU9xrOh2oTT2lIOTvY6PULUPRvVa2uxU,2877
|
|
59
59
|
trilogy/core/processing/node_generators/unnest_node.py,sha256=ueOQtoTf2iJHO09RzWHDFQ5iKZq2fVhGf2KAF2U2kU8,2677
|
|
60
60
|
trilogy/core/processing/node_generators/window_node.py,sha256=GP3Hvkbb0TDA6ef7W7bmvQEHVH-NRIfBT_0W4fcH3g4,6529
|
|
61
61
|
trilogy/core/processing/node_generators/select_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
-
trilogy/core/processing/node_generators/select_helpers/datasource_injection.py,sha256=
|
|
62
|
+
trilogy/core/processing/node_generators/select_helpers/datasource_injection.py,sha256=mKCDHxr2eyrdozXEHq_HvuwtBgnb9JPlImJIx6JGX34,7834
|
|
63
63
|
trilogy/core/processing/nodes/__init__.py,sha256=zTge1EzwzEydlcMliIFO_TT7h7lS8l37lyZuQDir1h0,5487
|
|
64
|
-
trilogy/core/processing/nodes/base_node.py,sha256=
|
|
64
|
+
trilogy/core/processing/nodes/base_node.py,sha256=C_CjlOzlGMXckyV0b_PJZerpopNesRCKfambMq7Asvc,18221
|
|
65
65
|
trilogy/core/processing/nodes/filter_node.py,sha256=5VtRfKbCORx0dV-vQfgy3gOEkmmscL9f31ExvlODwvY,2461
|
|
66
66
|
trilogy/core/processing/nodes/group_node.py,sha256=ZJ9LRF1sDOyel5v0MHXHORn6DYdn771nX0-KdHdt3-4,10517
|
|
67
67
|
trilogy/core/processing/nodes/merge_node.py,sha256=02oWRca0ba41U6PSAB14jwnWWxoyrvxRPLwkli259SY,15865
|
|
68
68
|
trilogy/core/processing/nodes/recursive_node.py,sha256=k0rizxR8KE64ievfHx_GPfQmU8QAP118Laeyq5BLUOk,1526
|
|
69
69
|
trilogy/core/processing/nodes/select_node_v2.py,sha256=Xyfq8lU7rP7JTAd8VV0ATDNal64n4xIBgWQsOuMe_Ak,8824
|
|
70
|
-
trilogy/core/processing/nodes/union_node.py,sha256=
|
|
70
|
+
trilogy/core/processing/nodes/union_node.py,sha256=hLAXXVWqEgMWi7dlgSHfCF59fon64av14-uPgJzoKzM,1870
|
|
71
71
|
trilogy/core/processing/nodes/unnest_node.py,sha256=oLKMMNMx6PLDPlt2V5neFMFrFWxET8r6XZElAhSNkO0,2181
|
|
72
72
|
trilogy/core/processing/nodes/window_node.py,sha256=JXJ0iVRlSEM2IBr1TANym2RaUf_p5E_l2sNykRzXWDo,1710
|
|
73
73
|
trilogy/core/statements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -76,12 +76,12 @@ trilogy/core/statements/build.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
76
76
|
trilogy/core/statements/common.py,sha256=KxEmz2ySySyZ6CTPzn0fJl5NX2KOk1RPyuUSwWhnK1g,759
|
|
77
77
|
trilogy/core/statements/execute.py,sha256=rqfuoMuXPcH7L7TmE1dSiZ_K_A1ohB8whVMfGimZBOk,1294
|
|
78
78
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
79
|
-
trilogy/dialect/base.py,sha256=
|
|
79
|
+
trilogy/dialect/base.py,sha256=YC8GROz9iwRFrUuLdYdzJNKXOliYecmfEqXJtTQGRVE,45187
|
|
80
80
|
trilogy/dialect/bigquery.py,sha256=e19dGcarapgA0x5_Xmq2StyHzuDWPOOPaR4elkWXwug,4203
|
|
81
81
|
trilogy/dialect/common.py,sha256=hhzuMTFW9QQIP7TKLT9BlJy6lw2R03a68jKQ-7t4-2c,6070
|
|
82
82
|
trilogy/dialect/config.py,sha256=olnyeVU5W5T6b9-dMeNAnvxuPlyc2uefb7FRME094Ec,3834
|
|
83
83
|
trilogy/dialect/dataframe.py,sha256=RUbNgReEa9g3pL6H7fP9lPTrAij5pkqedpZ99D8_5AE,1522
|
|
84
|
-
trilogy/dialect/duckdb.py,sha256=
|
|
84
|
+
trilogy/dialect/duckdb.py,sha256=gsXhPKX0D7ykJ9RFK9qx8uBTjLgtHu6PYv6GlBFtnJE,4448
|
|
85
85
|
trilogy/dialect/enums.py,sha256=FRNYQ5-w-B6-X0yXKNU5g9GowsMlERFogTC5u2nxL_s,4740
|
|
86
86
|
trilogy/dialect/postgres.py,sha256=VH4EB4myjIeZTHeFU6vK00GxY9c53rCBjg2mLbdaCEE,3254
|
|
87
87
|
trilogy/dialect/presto.py,sha256=Wd0yHq3EOSfCOy7lWPfCr13JHO3olsm8qUXgml-oTm0,3529
|
|
@@ -97,9 +97,9 @@ trilogy/parsing/common.py,sha256=_5UEnLtu40VQ8gb6wg3GtSrxf6IONhEOntmdsm0X4lU,309
|
|
|
97
97
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
98
98
|
trilogy/parsing/exceptions.py,sha256=Xwwsv2C9kSNv2q-HrrKC1f60JNHShXcCMzstTSEbiCw,154
|
|
99
99
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
100
|
-
trilogy/parsing/parse_engine.py,sha256=
|
|
100
|
+
trilogy/parsing/parse_engine.py,sha256=vYhGmSJXi5TcLvt2mujISucQc35j4kHa78hj5ip9gGQ,72564
|
|
101
101
|
trilogy/parsing/render.py,sha256=gGCFj2ue0UoaU2MR6qHGMAHXkYRMkTmHjnBowdcgFMY,19603
|
|
102
|
-
trilogy/parsing/trilogy.lark,sha256=
|
|
102
|
+
trilogy/parsing/trilogy.lark,sha256=1RIqA7zrGuqDJYSv9yHGSw0vdIfGOLPOnc4hSBRSTVU,14346
|
|
103
103
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
104
|
trilogy/scripts/trilogy.py,sha256=1L0XrH4mVHRt1C9T1HnaDv2_kYEfbWTb5_-cBBke79w,3774
|
|
105
105
|
trilogy/std/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -110,8 +110,8 @@ trilogy/std/money.preql,sha256=XWwvAV3WxBsHX9zfptoYRnBigcfYwrYtBHXTME0xJuQ,2082
|
|
|
110
110
|
trilogy/std/net.preql,sha256=-bMV6dyofskl4Kvows-iQ4JCxjVUwsZOeWCy8JO5Ftw,135
|
|
111
111
|
trilogy/std/ranking.preql,sha256=LDoZrYyz4g3xsII9XwXfmstZD-_92i1Eox1UqkBIfi8,83
|
|
112
112
|
trilogy/std/report.preql,sha256=LbV-XlHdfw0jgnQ8pV7acG95xrd1-p65fVpiIc-S7W4,202
|
|
113
|
-
pytrilogy-0.0.3.
|
|
114
|
-
pytrilogy-0.0.3.
|
|
115
|
-
pytrilogy-0.0.3.
|
|
116
|
-
pytrilogy-0.0.3.
|
|
117
|
-
pytrilogy-0.0.3.
|
|
113
|
+
pytrilogy-0.0.3.71.dist-info/METADATA,sha256=fjanpiqcWQJVfNIXoj2k04p5-0miJb_Rcxc0lZM7moo,9734
|
|
114
|
+
pytrilogy-0.0.3.71.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
115
|
+
pytrilogy-0.0.3.71.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
|
|
116
|
+
pytrilogy-0.0.3.71.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
117
|
+
pytrilogy-0.0.3.71.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/core/functions.py
CHANGED
|
@@ -370,10 +370,10 @@ FUNCTION_REGISTRY: dict[FunctionType, FunctionConfig] = {
|
|
|
370
370
|
arg_count=2,
|
|
371
371
|
),
|
|
372
372
|
FunctionType.REGEXP_EXTRACT: FunctionConfig(
|
|
373
|
-
valid_inputs={DataType.STRING},
|
|
373
|
+
valid_inputs=[{DataType.STRING}, {DataType.STRING}, {DataType.INTEGER}],
|
|
374
374
|
output_purpose=Purpose.PROPERTY,
|
|
375
375
|
output_type=DataType.STRING,
|
|
376
|
-
arg_count=
|
|
376
|
+
arg_count=3,
|
|
377
377
|
),
|
|
378
378
|
FunctionType.REGEXP_REPLACE: FunctionConfig(
|
|
379
379
|
valid_inputs={DataType.STRING},
|
trilogy/core/graph_models.py
CHANGED
|
@@ -4,7 +4,7 @@ from trilogy.core.models.build import BuildConcept, BuildDatasource, BuildWhereC
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def get_graph_exact_match(
|
|
7
|
-
g: nx.DiGraph, conditions: BuildWhereClause | None
|
|
7
|
+
g: nx.DiGraph, accept_partial: bool, conditions: BuildWhereClause | None
|
|
8
8
|
) -> set[str]:
|
|
9
9
|
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
10
10
|
nx.get_node_attributes(g, "datasource")
|
|
@@ -20,6 +20,9 @@ def get_graph_exact_match(
|
|
|
20
20
|
if not conditions and not ds.non_partial_for:
|
|
21
21
|
exact.add(node)
|
|
22
22
|
continue
|
|
23
|
+
elif not conditions and accept_partial and ds.non_partial_for:
|
|
24
|
+
exact.add(node)
|
|
25
|
+
continue
|
|
23
26
|
elif conditions:
|
|
24
27
|
if not ds.non_partial_for:
|
|
25
28
|
continue
|
|
@@ -34,10 +37,11 @@ def get_graph_exact_match(
|
|
|
34
37
|
|
|
35
38
|
def prune_sources_for_conditions(
|
|
36
39
|
g: nx.DiGraph,
|
|
40
|
+
accept_partial: bool,
|
|
37
41
|
conditions: BuildWhereClause | None,
|
|
38
42
|
):
|
|
39
43
|
|
|
40
|
-
complete = get_graph_exact_match(g, conditions)
|
|
44
|
+
complete = get_graph_exact_match(g, accept_partial, conditions)
|
|
41
45
|
to_remove = []
|
|
42
46
|
for node in g.nodes:
|
|
43
47
|
if node.startswith("ds~") and node not in complete:
|
|
@@ -249,6 +249,11 @@ class Datasource(HasUUID, Namespaced, BaseModel):
|
|
|
249
249
|
address=self.address,
|
|
250
250
|
columns=[c.with_namespace(namespace) for c in self.columns],
|
|
251
251
|
where=self.where.with_namespace(namespace) if self.where else None,
|
|
252
|
+
non_partial_for=(
|
|
253
|
+
self.non_partial_for.with_namespace(namespace)
|
|
254
|
+
if self.non_partial_for
|
|
255
|
+
else None
|
|
256
|
+
),
|
|
252
257
|
)
|
|
253
258
|
return new
|
|
254
259
|
|
trilogy/core/models/execute.py
CHANGED
|
@@ -24,12 +24,12 @@ from trilogy.core.enums import (
|
|
|
24
24
|
ComparisonOperator,
|
|
25
25
|
Derivation,
|
|
26
26
|
FunctionType,
|
|
27
|
-
Granularity,
|
|
28
27
|
JoinType,
|
|
29
28
|
Modifier,
|
|
30
29
|
Purpose,
|
|
31
30
|
SourceType,
|
|
32
31
|
)
|
|
32
|
+
from trilogy.core.exceptions import InvalidSyntaxException
|
|
33
33
|
from trilogy.core.models.build import (
|
|
34
34
|
BuildCaseElse,
|
|
35
35
|
BuildCaseWhen,
|
|
@@ -438,7 +438,7 @@ class ConceptPair(BaseModel):
|
|
|
438
438
|
|
|
439
439
|
|
|
440
440
|
class CTEConceptPair(ConceptPair):
|
|
441
|
-
cte: CTE
|
|
441
|
+
cte: CTE | UnionCTE
|
|
442
442
|
|
|
443
443
|
|
|
444
444
|
class InstantiatedUnnestJoin(BaseModel):
|
|
@@ -460,6 +460,30 @@ class UnnestJoin(BaseModel):
|
|
|
460
460
|
return self.alias + "".join([str(s.address) for s in self.concepts])
|
|
461
461
|
|
|
462
462
|
|
|
463
|
+
def raise_helpful_join_validation_error(
|
|
464
|
+
concepts: List[BuildConcept],
|
|
465
|
+
left_datasource: BuildDatasource | QueryDatasource | None,
|
|
466
|
+
right_datasource: BuildDatasource | QueryDatasource | None,
|
|
467
|
+
):
|
|
468
|
+
|
|
469
|
+
if not left_datasource or not right_datasource:
|
|
470
|
+
raise InvalidSyntaxException(
|
|
471
|
+
"No mutual keys found, and not two valid datasources"
|
|
472
|
+
)
|
|
473
|
+
left_keys = [c.address for c in left_datasource.output_concepts]
|
|
474
|
+
right_keys = [c.address for c in right_datasource.output_concepts]
|
|
475
|
+
match_concepts = [c.address for c in concepts]
|
|
476
|
+
assert left_datasource
|
|
477
|
+
assert right_datasource
|
|
478
|
+
raise InvalidSyntaxException(
|
|
479
|
+
"No mutual join keys found between"
|
|
480
|
+
f" {left_datasource.identifier} and"
|
|
481
|
+
f" {right_datasource.identifier}, left_keys {left_keys},"
|
|
482
|
+
f" right_keys {right_keys},"
|
|
483
|
+
f" provided join concepts {match_concepts}"
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
|
|
463
487
|
class BaseJoin(BaseModel):
|
|
464
488
|
right_datasource: Union[BuildDatasource, "QueryDatasource"]
|
|
465
489
|
join_type: JoinType
|
|
@@ -479,27 +503,24 @@ class BaseJoin(BaseModel):
|
|
|
479
503
|
)
|
|
480
504
|
|
|
481
505
|
# Early returns maintained as in original code
|
|
482
|
-
if self.concept_pairs:
|
|
483
|
-
return self
|
|
484
|
-
|
|
485
|
-
if self.concepts == []:
|
|
506
|
+
if self.concept_pairs or self.concepts == []:
|
|
486
507
|
return self
|
|
487
508
|
|
|
488
|
-
#
|
|
509
|
+
# reduce concept list to just the mutual keys
|
|
489
510
|
final_concepts = []
|
|
490
|
-
assert self.left_datasource and self.right_datasource
|
|
491
|
-
|
|
492
511
|
for concept in self.concepts or []:
|
|
493
512
|
include = True
|
|
494
513
|
for ds in [self.left_datasource, self.right_datasource]:
|
|
495
514
|
synonyms = []
|
|
515
|
+
if not ds:
|
|
516
|
+
continue
|
|
496
517
|
for c in ds.output_concepts:
|
|
497
518
|
synonyms += list(c.pseudonyms)
|
|
498
519
|
if (
|
|
499
|
-
concept.address not in
|
|
520
|
+
concept.address not in ds.output_concepts
|
|
500
521
|
and concept.address not in synonyms
|
|
501
522
|
):
|
|
502
|
-
raise
|
|
523
|
+
raise InvalidSyntaxException(
|
|
503
524
|
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
504
525
|
f" {[c.address for c in ds.output_concepts]}"
|
|
505
526
|
)
|
|
@@ -507,32 +528,10 @@ class BaseJoin(BaseModel):
|
|
|
507
528
|
final_concepts.append(concept)
|
|
508
529
|
|
|
509
530
|
if not final_concepts and self.concepts:
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
if all(
|
|
515
|
-
[
|
|
516
|
-
c.granularity == Granularity.SINGLE_ROW
|
|
517
|
-
for c in ds.output_concepts
|
|
518
|
-
]
|
|
519
|
-
):
|
|
520
|
-
self.concepts = []
|
|
521
|
-
return self
|
|
522
|
-
# if everything is at abstract grain, we can skip joins
|
|
523
|
-
if all([c.grain.abstract for c in ds.output_concepts]):
|
|
524
|
-
self.concepts = []
|
|
525
|
-
return self
|
|
526
|
-
|
|
527
|
-
left_keys = [c.address for c in self.left_datasource.output_concepts]
|
|
528
|
-
right_keys = [c.address for c in self.right_datasource.output_concepts]
|
|
529
|
-
match_concepts = [c.address for c in self.concepts]
|
|
530
|
-
raise SyntaxError(
|
|
531
|
-
"No mutual join keys found between"
|
|
532
|
-
f" {self.left_datasource.identifier} and"
|
|
533
|
-
f" {self.right_datasource.identifier}, left_keys {left_keys},"
|
|
534
|
-
f" right_keys {right_keys},"
|
|
535
|
-
f" provided join concepts {match_concepts}"
|
|
531
|
+
raise_helpful_join_validation_error(
|
|
532
|
+
self.concepts,
|
|
533
|
+
self.left_datasource,
|
|
534
|
+
self.right_datasource,
|
|
536
535
|
)
|
|
537
536
|
|
|
538
537
|
self.concepts = final_concepts
|
|
@@ -1087,7 +1086,7 @@ class UnionCTE(BaseModel):
|
|
|
1087
1086
|
class Join(BaseModel):
|
|
1088
1087
|
right_cte: CTE | UnionCTE
|
|
1089
1088
|
jointype: JoinType
|
|
1090
|
-
left_cte: CTE | None = None
|
|
1089
|
+
left_cte: CTE | UnionCTE | None = None
|
|
1091
1090
|
joinkey_pairs: List[CTEConceptPair] | None = None
|
|
1092
1091
|
inlined_ctes: set[str] = Field(default_factory=set)
|
|
1093
1092
|
quote: str | None = None
|
|
@@ -1096,7 +1095,7 @@ class Join(BaseModel):
|
|
|
1096
1095
|
def inline_cte(self, cte: CTE):
|
|
1097
1096
|
self.inlined_ctes.add(cte.name)
|
|
1098
1097
|
|
|
1099
|
-
def get_name(self, cte: CTE):
|
|
1098
|
+
def get_name(self, cte: CTE | UnionCTE) -> str:
|
|
1100
1099
|
if cte.identifier in self.inlined_ctes:
|
|
1101
1100
|
return cte.source.datasources[0].safe_identifier
|
|
1102
1101
|
return cte.safe_identifier
|
|
@@ -291,7 +291,7 @@ def evaluate_loop_conditions(
|
|
|
291
291
|
|
|
292
292
|
|
|
293
293
|
def check_for_early_exit(
|
|
294
|
-
complete, partial, context: LoopContext, priority_concept: BuildConcept
|
|
294
|
+
complete, partial, missing, context: LoopContext, priority_concept: BuildConcept
|
|
295
295
|
) -> bool:
|
|
296
296
|
if complete == ValidationResult.INCOMPLETE_CONDITION:
|
|
297
297
|
cond_dict = {str(node): node.preexisting_conditions for node in context.stack}
|
|
@@ -321,7 +321,7 @@ def check_for_early_exit(
|
|
|
321
321
|
)
|
|
322
322
|
else:
|
|
323
323
|
logger.info(
|
|
324
|
-
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Not complete, continuing search"
|
|
324
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Not complete (missing {missing}), continuing search"
|
|
325
325
|
)
|
|
326
326
|
# if we have attempted on root node, we've tried them all.
|
|
327
327
|
# inject in another search with filter concepts
|
|
@@ -412,7 +412,7 @@ def generate_loop_completion(context: LoopContext, virtual: set[str]) -> Strateg
|
|
|
412
412
|
elif context.conditions:
|
|
413
413
|
output.preexisting_conditions = context.conditions.conditional
|
|
414
414
|
logger.info(
|
|
415
|
-
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node output {[x.address for x in output.usable_outputs]} partial {[c.address for c in output.partial_concepts]} with {context.conditions}"
|
|
415
|
+
f"{depth_to_prefix(context.depth)}{LOGGER_PREFIX} Graph is connected, returning {type(output)} node output {[x.address for x in output.usable_outputs]} partial {[c.address for c in output.partial_concepts or []]} with {context.conditions}"
|
|
416
416
|
)
|
|
417
417
|
if condition_required and context.conditions and non_virtual_different:
|
|
418
418
|
logger.info(
|
|
@@ -532,7 +532,9 @@ def _search_concepts(
|
|
|
532
532
|
)
|
|
533
533
|
# assign
|
|
534
534
|
context.found = found_c
|
|
535
|
-
early_exit = check_for_early_exit(
|
|
535
|
+
early_exit = check_for_early_exit(
|
|
536
|
+
complete, partial, missing_c, context, priority_concept
|
|
537
|
+
)
|
|
536
538
|
if early_exit:
|
|
537
539
|
break
|
|
538
540
|
|
|
@@ -253,7 +253,9 @@ def resolve_weak_components(
|
|
|
253
253
|
break_flag = False
|
|
254
254
|
found = []
|
|
255
255
|
search_graph = environment_graph.copy()
|
|
256
|
-
prune_sources_for_conditions(
|
|
256
|
+
prune_sources_for_conditions(
|
|
257
|
+
search_graph, accept_partial, conditions=search_conditions
|
|
258
|
+
)
|
|
257
259
|
reduced_concept_sets: list[set[str]] = []
|
|
258
260
|
|
|
259
261
|
# loop through, removing new nodes we find
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import sys
|
|
1
2
|
from collections import defaultdict
|
|
2
3
|
from datetime import date, datetime, timedelta
|
|
3
4
|
from typing import List, Tuple, TypeVar
|
|
@@ -14,7 +15,7 @@ from trilogy.core.models.build import (
|
|
|
14
15
|
from trilogy.core.models.core import DataType
|
|
15
16
|
|
|
16
17
|
# Define a generic type that ensures start and end are the same type
|
|
17
|
-
T = TypeVar("T", int, date, datetime)
|
|
18
|
+
T = TypeVar("T", int, float, date, datetime)
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
def reduce_expression(
|
|
@@ -37,21 +38,26 @@ def reduce_expression(
|
|
|
37
38
|
elif var.datatype == DataType.DATETIME:
|
|
38
39
|
lower_check = datetime.min # type: ignore
|
|
39
40
|
upper_check = datetime.max # type: ignore
|
|
41
|
+
elif var.datatype == DataType.BOOL:
|
|
42
|
+
lower_check = False # type: ignore
|
|
43
|
+
upper_check = True # type: ignore
|
|
44
|
+
elif var.datatype == DataType.FLOAT:
|
|
45
|
+
lower_check = float("-inf") # type: ignore
|
|
46
|
+
upper_check = float("inf") # type: ignore
|
|
40
47
|
else:
|
|
41
|
-
|
|
48
|
+
return False
|
|
42
49
|
|
|
43
50
|
ranges: list[Tuple[T, T]] = []
|
|
44
51
|
for op, value in group_tuple:
|
|
45
|
-
increment: int | timedelta
|
|
52
|
+
increment: int | timedelta | float
|
|
46
53
|
if isinstance(value, date):
|
|
47
54
|
increment = timedelta(days=1)
|
|
48
55
|
elif isinstance(value, datetime):
|
|
49
56
|
increment = timedelta(seconds=1)
|
|
50
57
|
elif isinstance(value, int):
|
|
51
58
|
increment = 1
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# increment = Decimal(0.0000000001)
|
|
59
|
+
elif isinstance(value, float):
|
|
60
|
+
increment = sys.float_info.epsilon
|
|
55
61
|
|
|
56
62
|
if op == ">":
|
|
57
63
|
ranges.append(
|
|
@@ -88,24 +94,43 @@ def reduce_expression(
|
|
|
88
94
|
value,
|
|
89
95
|
)
|
|
90
96
|
)
|
|
97
|
+
elif op == ComparisonOperator.IS:
|
|
98
|
+
ranges.append(
|
|
99
|
+
(
|
|
100
|
+
value,
|
|
101
|
+
value,
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
elif op == ComparisonOperator.NE:
|
|
105
|
+
pass
|
|
91
106
|
else:
|
|
92
|
-
|
|
107
|
+
return False
|
|
93
108
|
return is_fully_covered(lower_check, upper_check, ranges, increment)
|
|
94
109
|
|
|
95
110
|
|
|
111
|
+
TARGET_TYPES = (
|
|
112
|
+
int,
|
|
113
|
+
date,
|
|
114
|
+
float,
|
|
115
|
+
datetime,
|
|
116
|
+
bool,
|
|
117
|
+
)
|
|
118
|
+
REDUCABLE_TYPES = (int, float, date, bool, datetime, BuildFunction)
|
|
119
|
+
|
|
120
|
+
|
|
96
121
|
def simplify_conditions(
|
|
97
122
|
conditions: list[BuildComparison | BuildConditional | BuildParenthetical],
|
|
98
123
|
) -> bool:
|
|
99
124
|
# Group conditions by variable
|
|
100
125
|
grouped: dict[
|
|
101
|
-
BuildConcept, list[tuple[ComparisonOperator, datetime | int | date]]
|
|
126
|
+
BuildConcept, list[tuple[ComparisonOperator, datetime | int | date | float]]
|
|
102
127
|
] = defaultdict(list)
|
|
103
128
|
for condition in conditions:
|
|
104
129
|
if not isinstance(condition, BuildComparison):
|
|
105
130
|
return False
|
|
106
|
-
if not isinstance(
|
|
107
|
-
condition.
|
|
108
|
-
)
|
|
131
|
+
if not isinstance(condition.left, REDUCABLE_TYPES) and not isinstance(
|
|
132
|
+
condition.right, REDUCABLE_TYPES
|
|
133
|
+
):
|
|
109
134
|
return False
|
|
110
135
|
if not isinstance(condition.left, BuildConcept) and not isinstance(
|
|
111
136
|
condition.right, BuildConcept
|
|
@@ -113,15 +138,20 @@ def simplify_conditions(
|
|
|
113
138
|
return False
|
|
114
139
|
vars = [condition.left, condition.right]
|
|
115
140
|
concept = [x for x in vars if isinstance(x, BuildConcept)][0]
|
|
116
|
-
|
|
117
|
-
if isinstance(
|
|
118
|
-
if not
|
|
141
|
+
raw_comparison = [x for x in vars if not isinstance(x, BuildConcept)][0]
|
|
142
|
+
if isinstance(raw_comparison, BuildFunction):
|
|
143
|
+
if not raw_comparison.operator == FunctionType.CONSTANT:
|
|
119
144
|
return False
|
|
120
|
-
first_arg =
|
|
121
|
-
if not isinstance(first_arg,
|
|
145
|
+
first_arg = raw_comparison.arguments[0]
|
|
146
|
+
if not isinstance(first_arg, TARGET_TYPES):
|
|
122
147
|
return False
|
|
123
148
|
comparison = first_arg
|
|
124
|
-
|
|
149
|
+
else:
|
|
150
|
+
if not isinstance(raw_comparison, TARGET_TYPES):
|
|
151
|
+
return False
|
|
152
|
+
comparison = raw_comparison
|
|
153
|
+
|
|
154
|
+
if not isinstance(comparison, REDUCABLE_TYPES):
|
|
125
155
|
return False
|
|
126
156
|
|
|
127
157
|
var = concept
|
|
@@ -136,11 +166,25 @@ def simplify_conditions(
|
|
|
136
166
|
return True if all(isinstance(s, bool) and s for s in simplified) else False
|
|
137
167
|
|
|
138
168
|
|
|
169
|
+
def boolean_fully_covered(
|
|
170
|
+
start: bool,
|
|
171
|
+
end: bool,
|
|
172
|
+
ranges: List[Tuple[bool, bool]],
|
|
173
|
+
):
|
|
174
|
+
all = []
|
|
175
|
+
for r_start, r_end in ranges:
|
|
176
|
+
if r_start is True and r_end is True:
|
|
177
|
+
all.append(True)
|
|
178
|
+
elif r_start is False and r_end is False:
|
|
179
|
+
all.append(False)
|
|
180
|
+
return set(all) == {False, True}
|
|
181
|
+
|
|
182
|
+
|
|
139
183
|
def is_fully_covered(
|
|
140
184
|
start: T,
|
|
141
185
|
end: T,
|
|
142
186
|
ranges: List[Tuple[T, T]],
|
|
143
|
-
increment: int | timedelta,
|
|
187
|
+
increment: int | timedelta | float,
|
|
144
188
|
):
|
|
145
189
|
"""
|
|
146
190
|
Check if the list of range pairs fully covers the set [start, end].
|
|
@@ -153,6 +197,11 @@ def is_fully_covered(
|
|
|
153
197
|
Returns:
|
|
154
198
|
- bool: True if the ranges fully cover [start, end], False otherwise.
|
|
155
199
|
"""
|
|
200
|
+
if isinstance(start, bool) and isinstance(end, bool):
|
|
201
|
+
# convert each element of each tuple to a boolean
|
|
202
|
+
bool_ranges = [(bool(r_start), bool(r_end)) for r_start, r_end in ranges]
|
|
203
|
+
|
|
204
|
+
return boolean_fully_covered(start, end, bool_ranges)
|
|
156
205
|
# Sort ranges by their start values (and by end values for ties)
|
|
157
206
|
ranges.sort()
|
|
158
207
|
|
|
@@ -173,14 +222,14 @@ def get_union_sources(
|
|
|
173
222
|
datasources: list[BuildDatasource], concepts: list[BuildConcept]
|
|
174
223
|
) -> List[list[BuildDatasource]]:
|
|
175
224
|
candidates: list[BuildDatasource] = []
|
|
225
|
+
|
|
176
226
|
for x in datasources:
|
|
177
|
-
if
|
|
227
|
+
if any([c.address in x.output_concepts for c in concepts]):
|
|
178
228
|
if (
|
|
179
229
|
any([c.address in x.partial_concepts for c in concepts])
|
|
180
230
|
and x.non_partial_for
|
|
181
231
|
):
|
|
182
232
|
candidates.append(x)
|
|
183
|
-
|
|
184
233
|
assocs: dict[str, list[BuildDatasource]] = defaultdict(list[BuildDatasource])
|
|
185
234
|
for x in candidates:
|
|
186
235
|
if not x.non_partial_for:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import reduce
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
3
3
|
|
|
4
4
|
import networkx as nx
|
|
5
5
|
|
|
@@ -30,6 +30,9 @@ from trilogy.core.processing.nodes import (
|
|
|
30
30
|
)
|
|
31
31
|
from trilogy.core.processing.utility import padding
|
|
32
32
|
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from trilogy.core.processing.nodes.union_node import UnionNode
|
|
35
|
+
|
|
33
36
|
LOGGER_PREFIX = "[GEN_ROOT_MERGE_NODE]"
|
|
34
37
|
|
|
35
38
|
|
|
@@ -119,8 +122,12 @@ def create_pruned_concept_graph(
|
|
|
119
122
|
|
|
120
123
|
g = g.copy()
|
|
121
124
|
union_options = get_union_sources(datasources, all_concepts)
|
|
125
|
+
|
|
122
126
|
for ds_list in union_options:
|
|
123
127
|
node_address = "ds~" + "-".join([x.name for x in ds_list])
|
|
128
|
+
logger.info(
|
|
129
|
+
f"{padding(depth)}{LOGGER_PREFIX} injecting potentially relevant union datasource {node_address}"
|
|
130
|
+
)
|
|
124
131
|
common: set[BuildConcept] = set.intersection(
|
|
125
132
|
*[set(x.output_concepts) for x in ds_list]
|
|
126
133
|
)
|
|
@@ -128,7 +135,7 @@ def create_pruned_concept_graph(
|
|
|
128
135
|
for c in common:
|
|
129
136
|
g.add_edge(node_address, concept_to_node(c))
|
|
130
137
|
g.add_edge(concept_to_node(c), node_address)
|
|
131
|
-
prune_sources_for_conditions(g, conditions)
|
|
138
|
+
prune_sources_for_conditions(g, accept_partial, conditions)
|
|
132
139
|
target_addresses = set([c.address for c in all_concepts])
|
|
133
140
|
concepts: dict[str, BuildConcept] = nx.get_node_attributes(orig_g, "concept")
|
|
134
141
|
datasource_map: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
@@ -203,14 +210,14 @@ def create_pruned_concept_graph(
|
|
|
203
210
|
for s in subgraphs
|
|
204
211
|
if subgraph_is_complete(s, target_addresses, relevant_concepts_pre, g)
|
|
205
212
|
]
|
|
206
|
-
|
|
213
|
+
# from trilogy.hooks.graph_hook import GraphHook
|
|
214
|
+
# GraphHook().query_graph_built(g)
|
|
207
215
|
if not subgraphs:
|
|
208
216
|
logger.info(
|
|
209
217
|
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - no subgraphs after node prune"
|
|
210
218
|
)
|
|
211
219
|
return None
|
|
212
|
-
|
|
213
|
-
# GraphHook().query_graph_built(g)
|
|
220
|
+
|
|
214
221
|
if subgraphs and len(subgraphs) != 1:
|
|
215
222
|
logger.info(
|
|
216
223
|
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - subgraphs are split - have {len(subgraphs)} from {subgraphs}"
|
|
@@ -233,6 +240,7 @@ def create_pruned_concept_graph(
|
|
|
233
240
|
def resolve_subgraphs(
|
|
234
241
|
g: nx.DiGraph,
|
|
235
242
|
relevant: list[BuildConcept],
|
|
243
|
+
accept_partial: bool,
|
|
236
244
|
conditions: BuildWhereClause | None,
|
|
237
245
|
depth: int = 0,
|
|
238
246
|
) -> dict[str, list[str]]:
|
|
@@ -251,7 +259,7 @@ def resolve_subgraphs(
|
|
|
251
259
|
ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources
|
|
252
260
|
}
|
|
253
261
|
partial_map = get_graph_partial_nodes(g, conditions)
|
|
254
|
-
exact_map = get_graph_exact_match(g, conditions)
|
|
262
|
+
exact_map = get_graph_exact_match(g, accept_partial, conditions)
|
|
255
263
|
grain_length = get_graph_grains(g)
|
|
256
264
|
concepts: dict[str, BuildConcept] = nx.get_node_attributes(g, "concept")
|
|
257
265
|
non_partial_map = {
|
|
@@ -386,7 +394,7 @@ def create_datasource_node(
|
|
|
386
394
|
datasource_conditions = datasource.where.conditional if datasource.where else None
|
|
387
395
|
rval = SelectNode(
|
|
388
396
|
input_concepts=[c.concept for c in datasource.columns],
|
|
389
|
-
output_concepts=all_concepts,
|
|
397
|
+
output_concepts=sorted(all_concepts, key=lambda x: x.address),
|
|
390
398
|
environment=environment,
|
|
391
399
|
parents=[],
|
|
392
400
|
depth=depth,
|
|
@@ -408,6 +416,46 @@ def create_datasource_node(
|
|
|
408
416
|
)
|
|
409
417
|
|
|
410
418
|
|
|
419
|
+
def create_union_datasource(
|
|
420
|
+
datasource: list[BuildDatasource],
|
|
421
|
+
all_concepts: List[BuildConcept],
|
|
422
|
+
accept_partial: bool,
|
|
423
|
+
environment: BuildEnvironment,
|
|
424
|
+
depth: int,
|
|
425
|
+
conditions: BuildWhereClause | None = None,
|
|
426
|
+
) -> tuple["UnionNode", bool]:
|
|
427
|
+
from trilogy.core.processing.nodes.union_node import UnionNode
|
|
428
|
+
|
|
429
|
+
logger.info(
|
|
430
|
+
f"{padding(depth)}{LOGGER_PREFIX} generating union node parents with condition {conditions}"
|
|
431
|
+
)
|
|
432
|
+
force_group = False
|
|
433
|
+
parents = []
|
|
434
|
+
for x in datasource:
|
|
435
|
+
subnode, fg = create_datasource_node(
|
|
436
|
+
x,
|
|
437
|
+
all_concepts,
|
|
438
|
+
accept_partial,
|
|
439
|
+
environment,
|
|
440
|
+
depth + 1,
|
|
441
|
+
conditions=conditions,
|
|
442
|
+
)
|
|
443
|
+
parents.append(subnode)
|
|
444
|
+
force_group = force_group or fg
|
|
445
|
+
logger.info(f"{padding(depth)}{LOGGER_PREFIX} returning union node")
|
|
446
|
+
return (
|
|
447
|
+
UnionNode(
|
|
448
|
+
output_concepts=all_concepts,
|
|
449
|
+
input_concepts=all_concepts,
|
|
450
|
+
environment=environment,
|
|
451
|
+
parents=parents,
|
|
452
|
+
depth=depth,
|
|
453
|
+
partial_concepts=[],
|
|
454
|
+
),
|
|
455
|
+
force_group,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
|
|
411
459
|
def create_select_node(
|
|
412
460
|
ds_name: str,
|
|
413
461
|
subgraph: list[str],
|
|
@@ -452,31 +500,13 @@ def create_select_node(
|
|
|
452
500
|
)
|
|
453
501
|
|
|
454
502
|
elif isinstance(datasource, list):
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
for x in datasource:
|
|
463
|
-
subnode, fg = create_datasource_node(
|
|
464
|
-
x,
|
|
465
|
-
all_concepts,
|
|
466
|
-
accept_partial,
|
|
467
|
-
environment,
|
|
468
|
-
depth,
|
|
469
|
-
conditions=conditions,
|
|
470
|
-
)
|
|
471
|
-
parents.append(subnode)
|
|
472
|
-
force_group = force_group or fg
|
|
473
|
-
logger.info(f"{padding(depth)}{LOGGER_PREFIX} generating union node")
|
|
474
|
-
bcandidate = UnionNode(
|
|
475
|
-
output_concepts=all_concepts,
|
|
476
|
-
input_concepts=all_concepts,
|
|
477
|
-
environment=environment,
|
|
478
|
-
parents=parents,
|
|
479
|
-
depth=depth,
|
|
503
|
+
bcandidate, force_group = create_union_datasource(
|
|
504
|
+
datasource,
|
|
505
|
+
all_concepts,
|
|
506
|
+
accept_partial,
|
|
507
|
+
environment,
|
|
508
|
+
depth,
|
|
509
|
+
conditions=conditions,
|
|
480
510
|
)
|
|
481
511
|
else:
|
|
482
512
|
raise ValueError(f"Unknown datasource type {datasource}")
|
|
@@ -548,6 +578,9 @@ def gen_select_merge_node(
|
|
|
548
578
|
]
|
|
549
579
|
if accept_partial:
|
|
550
580
|
attempts.append(True)
|
|
581
|
+
logger.info(
|
|
582
|
+
f"{padding(depth)}{LOGGER_PREFIX} searching for root source graph for concepts {[c.address for c in all_concepts]} and conditions {conditions}"
|
|
583
|
+
)
|
|
551
584
|
for attempt in attempts:
|
|
552
585
|
pruned_concept_graph = create_pruned_concept_graph(
|
|
553
586
|
g,
|
|
@@ -568,7 +601,11 @@ def gen_select_merge_node(
|
|
|
568
601
|
return None
|
|
569
602
|
|
|
570
603
|
sub_nodes = resolve_subgraphs(
|
|
571
|
-
pruned_concept_graph,
|
|
604
|
+
pruned_concept_graph,
|
|
605
|
+
relevant=non_constant,
|
|
606
|
+
accept_partial=accept_partial,
|
|
607
|
+
conditions=conditions,
|
|
608
|
+
depth=depth,
|
|
572
609
|
)
|
|
573
610
|
|
|
574
611
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
|
|
@@ -160,9 +160,7 @@ class StrategyNode:
|
|
|
160
160
|
self.whole_grain = whole_grain
|
|
161
161
|
self.parents = parents or []
|
|
162
162
|
self.resolution_cache: Optional[QueryDatasource] = None
|
|
163
|
-
|
|
164
|
-
self.output_concepts, self.parents
|
|
165
|
-
)
|
|
163
|
+
|
|
166
164
|
self.nullable_concepts = nullable_concepts or get_all_parent_nullable(
|
|
167
165
|
self.output_concepts, self.parents
|
|
168
166
|
)
|
|
@@ -188,7 +186,9 @@ class StrategyNode:
|
|
|
188
186
|
right=self.preexisting_conditions,
|
|
189
187
|
operator=BooleanOperator.AND,
|
|
190
188
|
)
|
|
191
|
-
self.
|
|
189
|
+
self.partial_concepts: list[BuildConcept] = self.derive_partials(
|
|
190
|
+
partial_concepts
|
|
191
|
+
)
|
|
192
192
|
self.validate_inputs()
|
|
193
193
|
self.log = True
|
|
194
194
|
|
|
@@ -214,7 +214,7 @@ class StrategyNode:
|
|
|
214
214
|
|
|
215
215
|
def add_parents(self, parents: list["StrategyNode"]):
|
|
216
216
|
self.parents += parents
|
|
217
|
-
self.
|
|
217
|
+
self.partial_concepts = self.derive_partials(None)
|
|
218
218
|
return self
|
|
219
219
|
|
|
220
220
|
def set_preexisting_conditions(
|
|
@@ -238,7 +238,9 @@ class StrategyNode:
|
|
|
238
238
|
self.rebuild_cache()
|
|
239
239
|
return self
|
|
240
240
|
|
|
241
|
-
def
|
|
241
|
+
def derive_partials(
|
|
242
|
+
self, partial_concepts: List[BuildConcept] | None = None
|
|
243
|
+
) -> List[BuildConcept]:
|
|
242
244
|
# validate parents exist
|
|
243
245
|
# assign partial values where needed
|
|
244
246
|
for parent in self.parents:
|
|
@@ -246,12 +248,14 @@ class StrategyNode:
|
|
|
246
248
|
raise SyntaxError("Unresolvable parent")
|
|
247
249
|
|
|
248
250
|
# TODO: make this accurate
|
|
249
|
-
if self.parents:
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
251
|
+
if self.parents and partial_concepts is None:
|
|
252
|
+
partials = get_all_parent_partial(self.output_concepts, self.parents)
|
|
253
|
+
elif partial_concepts is None:
|
|
254
|
+
partials = []
|
|
255
|
+
else:
|
|
256
|
+
partials = partial_concepts
|
|
257
|
+
self.partial_lcl = LooseBuildConceptList(concepts=partials)
|
|
258
|
+
return partials
|
|
255
259
|
|
|
256
260
|
def add_output_concepts(self, concepts: List[BuildConcept], rebuild: bool = True):
|
|
257
261
|
for concept in concepts:
|
|
@@ -19,6 +19,7 @@ class UnionNode(StrategyNode):
|
|
|
19
19
|
whole_grain: bool = False,
|
|
20
20
|
parents: List["StrategyNode"] | None = None,
|
|
21
21
|
depth: int = 0,
|
|
22
|
+
partial_concepts: List[BuildConcept] | None = None,
|
|
22
23
|
):
|
|
23
24
|
super().__init__(
|
|
24
25
|
input_concepts=input_concepts,
|
|
@@ -27,7 +28,13 @@ class UnionNode(StrategyNode):
|
|
|
27
28
|
whole_grain=whole_grain,
|
|
28
29
|
parents=parents,
|
|
29
30
|
depth=depth,
|
|
31
|
+
partial_concepts=partial_concepts,
|
|
30
32
|
)
|
|
33
|
+
if self.partial_concepts != []:
|
|
34
|
+
raise ValueError(
|
|
35
|
+
f"UnionNode should not have partial concepts, has {self.partial_concepts}, was given {partial_concepts}"
|
|
36
|
+
)
|
|
37
|
+
self.partial_concepts = []
|
|
31
38
|
|
|
32
39
|
def _resolve(self) -> QueryDatasource:
|
|
33
40
|
"""We need to ensure that any filtered values are removed from the output to avoid inappropriate references"""
|
|
@@ -40,6 +47,7 @@ class UnionNode(StrategyNode):
|
|
|
40
47
|
output_concepts=list(self.output_concepts),
|
|
41
48
|
environment=self.environment,
|
|
42
49
|
whole_grain=self.whole_grain,
|
|
43
|
-
parents=self.parents,
|
|
50
|
+
parents=[x.copy() for x in self.parents] if self.parents else None,
|
|
44
51
|
depth=self.depth,
|
|
52
|
+
partial_concepts=self.partial_concepts,
|
|
45
53
|
)
|
trilogy/core/query_processor.py
CHANGED
|
@@ -53,7 +53,7 @@ LOGGER_PREFIX = "[QUERY BUILD]"
|
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
def base_join_to_join(
|
|
56
|
-
base_join: BaseJoin | UnnestJoin, ctes: List[CTE]
|
|
56
|
+
base_join: BaseJoin | UnnestJoin, ctes: List[CTE | UnionCTE]
|
|
57
57
|
) -> Join | InstantiatedUnnestJoin:
|
|
58
58
|
"""This function converts joins at the datasource level
|
|
59
59
|
to joins at the CTE level"""
|
|
@@ -69,7 +69,9 @@ def base_join_to_join(
|
|
|
69
69
|
alias=base_join.alias,
|
|
70
70
|
)
|
|
71
71
|
|
|
72
|
-
def get_datasource_cte(
|
|
72
|
+
def get_datasource_cte(
|
|
73
|
+
datasource: BuildDatasource | QueryDatasource,
|
|
74
|
+
) -> CTE | UnionCTE:
|
|
73
75
|
eligible = set()
|
|
74
76
|
for cte in ctes:
|
|
75
77
|
if cte.source.identifier == datasource.identifier:
|
|
@@ -334,7 +336,7 @@ def datasource_to_cte(
|
|
|
334
336
|
human_id = generate_cte_name(query_datasource.identifier, name_map)
|
|
335
337
|
|
|
336
338
|
final_joins = [
|
|
337
|
-
base_join_to_join(join, [x for x in parents if isinstance(x, CTE)])
|
|
339
|
+
base_join_to_join(join, [x for x in parents if isinstance(x, (CTE, UnionCTE))])
|
|
338
340
|
for join in query_datasource.joins
|
|
339
341
|
]
|
|
340
342
|
|
trilogy/dialect/base.py
CHANGED
|
@@ -764,7 +764,10 @@ class BaseDialect:
|
|
|
764
764
|
def render_cte(self, cte: CTE | UnionCTE, auto_sort: bool = True) -> CompiledCTE:
|
|
765
765
|
if isinstance(cte, UnionCTE):
|
|
766
766
|
base_statement = f"\n{cte.operator}\n".join(
|
|
767
|
-
[
|
|
767
|
+
[
|
|
768
|
+
self.render_cte(child, auto_sort=False).statement
|
|
769
|
+
for child in cte.internal_ctes
|
|
770
|
+
]
|
|
768
771
|
)
|
|
769
772
|
if cte.order_by:
|
|
770
773
|
|
trilogy/dialect/duckdb.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from typing import Any, Callable, Mapping
|
|
2
3
|
|
|
3
4
|
from jinja2 import Template
|
|
@@ -8,6 +9,19 @@ from trilogy.dialect.base import BaseDialect
|
|
|
8
9
|
|
|
9
10
|
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
10
11
|
|
|
12
|
+
SENTINAL_AUTO_CAPTURE_GROUP_VALUE = "-1"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def generate_regex_extract(x: list[str]) -> str:
|
|
16
|
+
if str(x[2]) == SENTINAL_AUTO_CAPTURE_GROUP_VALUE:
|
|
17
|
+
regex = re.compile(x[1])
|
|
18
|
+
if regex.groups == 0:
|
|
19
|
+
search = 0
|
|
20
|
+
else:
|
|
21
|
+
search = 1
|
|
22
|
+
return f"REGEXP_EXTRACT({x[0]},{x[1]},{search})"
|
|
23
|
+
return f"REGEXP_EXTRACT({x[0]},{x[1]},{x[2]})"
|
|
24
|
+
|
|
11
25
|
|
|
12
26
|
FUNCTION_MAP = {
|
|
13
27
|
FunctionType.COUNT: lambda args: f"count({args[0]})",
|
|
@@ -37,6 +51,9 @@ FUNCTION_MAP = {
|
|
|
37
51
|
FunctionType.DATETIME_LITERAL: lambda x: f"datetime '{x}'",
|
|
38
52
|
# string
|
|
39
53
|
FunctionType.CONTAINS: lambda x: f"CONTAINS(LOWER({x[0]}), LOWER({x[1]}))",
|
|
54
|
+
# regexp
|
|
55
|
+
FunctionType.REGEXP_CONTAINS: lambda x: f"REGEXP_MATCHES({x[0]},{x[1]})",
|
|
56
|
+
FunctionType.REGEXP_EXTRACT: lambda x: generate_regex_extract(x),
|
|
40
57
|
}
|
|
41
58
|
|
|
42
59
|
# if an aggregate function is called on a source that is at the same grain as the aggregate
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -1802,6 +1802,9 @@ class ParseToObjects(Transformer):
|
|
|
1802
1802
|
|
|
1803
1803
|
@v_args(meta=True)
|
|
1804
1804
|
def fregexp_extract(self, meta, args):
|
|
1805
|
+
if len(args) == 2:
|
|
1806
|
+
# this is a magic value to represent the default behavior
|
|
1807
|
+
args.append(-1)
|
|
1805
1808
|
return self.function_factory.create_function(
|
|
1806
1809
|
args, FunctionType.REGEXP_EXTRACT, meta
|
|
1807
1810
|
)
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -279,7 +279,7 @@
|
|
|
279
279
|
_SUBSTRING.1: "substring("i
|
|
280
280
|
fsubstring: _SUBSTRING expr "," expr "," expr ")"
|
|
281
281
|
_REGEXP_EXTRACT.1: "regexp_extract("
|
|
282
|
-
fregexp_extract: _REGEXP_EXTRACT expr "," expr ")"
|
|
282
|
+
fregexp_extract: _REGEXP_EXTRACT expr "," expr ("," int_lit)? ")"
|
|
283
283
|
_REGEXP_CONTAINS.1: "regexp_contains("
|
|
284
284
|
fregexp_contains: _REGEXP_CONTAINS expr "," expr ")"
|
|
285
285
|
_REGEXP_REPLACE.1: "regexp_replace("
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|