pytrilogy 0.0.3.63__py3-none-any.whl → 0.0.3.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.63.dist-info → pytrilogy-0.0.3.65.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.63.dist-info → pytrilogy-0.0.3.65.dist-info}/RECORD +23 -23
- trilogy/__init__.py +1 -1
- trilogy/core/graph_models.py +45 -1
- trilogy/core/models/build.py +6 -1
- trilogy/core/models/environment.py +15 -11
- trilogy/core/models/execute.py +30 -58
- trilogy/core/processing/concept_strategies_v3.py +31 -15
- trilogy/core/processing/discovery_node_factory.py +2 -3
- trilogy/core/processing/node_generators/node_merge_node.py +4 -2
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +3 -1
- trilogy/core/processing/node_generators/select_merge_node.py +65 -26
- trilogy/core/processing/node_generators/synonym_node.py +4 -2
- trilogy/core/processing/nodes/__init__.py +11 -29
- trilogy/core/statements/author.py +1 -1
- trilogy/dialect/base.py +7 -0
- trilogy/hooks/graph_hook.py +65 -12
- trilogy/parsing/common.py +2 -2
- trilogy/parsing/render.py +5 -1
- {pytrilogy-0.0.3.63.dist-info → pytrilogy-0.0.3.65.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.63.dist-info → pytrilogy-0.0.3.65.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.63.dist-info → pytrilogy-0.0.3.65.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.63.dist-info → pytrilogy-0.0.3.65.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
pytrilogy-0.0.3.
|
|
2
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
pytrilogy-0.0.3.65.dist-info/licenses/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
2
|
+
trilogy/__init__.py,sha256=5PSTzzhWYk76xlzO_XJek4S4HFPOp41QHzTT_y75mqk,303
|
|
3
3
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
trilogy/constants.py,sha256=lv_aJWP6dn6e2aF4BAE72jbnNtceFddfqtiDSsvzno0,1692
|
|
5
5
|
trilogy/engine.py,sha256=OK2RuqCIUId6yZ5hfF8J1nxGP0AJqHRZiafcowmW0xc,1728
|
|
@@ -17,27 +17,27 @@ trilogy/core/environment_helpers.py,sha256=VvPIiFemqaLLpIpLIqprfu63K7muZ1YzNg7UZ
|
|
|
17
17
|
trilogy/core/ergonomics.py,sha256=e-7gE29vPLFdg0_A1smQ7eOrUwKl5VYdxRSTddHweRA,1631
|
|
18
18
|
trilogy/core/exceptions.py,sha256=JPYyBcit3T_pRtlHdtKSeVJkIyWUTozW2aaut25A2xI,673
|
|
19
19
|
trilogy/core/functions.py,sha256=poVfAwet1xdxTkC7WL38UmGRDpUVO9iSMNWSagl9_r4,29302
|
|
20
|
-
trilogy/core/graph_models.py,sha256=
|
|
20
|
+
trilogy/core/graph_models.py,sha256=wIT-oBchHWE46GLDkgN5K7EzhOBEo8LfaeWV5G5cYcE,3302
|
|
21
21
|
trilogy/core/internal.py,sha256=iicDBlC6nM8d7e7jqzf_ZOmpUsW8yrr2AA8AqEiLx-s,1577
|
|
22
22
|
trilogy/core/optimization.py,sha256=ojpn-p79lr03SSVQbbw74iPCyoYpDYBmj1dbZ3oXCjI,8860
|
|
23
23
|
trilogy/core/query_processor.py,sha256=QiE_w5HgheT4GLZFnaLssJ4plf4voK0TeTd6N3jhR6A,20188
|
|
24
24
|
trilogy/core/utility.py,sha256=3VC13uSQWcZNghgt7Ot0ZTeEmNqs__cx122abVq9qhM,410
|
|
25
25
|
trilogy/core/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
trilogy/core/models/author.py,sha256=8XbIsQr6cQrgo9uzee5qRoYiMdEG7yKF4FiiWImW7U0,77490
|
|
27
|
-
trilogy/core/models/build.py,sha256=
|
|
27
|
+
trilogy/core/models/build.py,sha256=80v9rxwl41O8_7uIJoHK4tnTUfgR6u8EPrwHg4ySqO4,63323
|
|
28
28
|
trilogy/core/models/build_environment.py,sha256=s_C9xAHuD3yZ26T15pWVBvoqvlp2LdZ8yjsv2_HdXLk,5363
|
|
29
29
|
trilogy/core/models/core.py,sha256=EMAuWTngoNVGCdfNrAY7_k6g528iodNQLwPRVip-8DA,10980
|
|
30
30
|
trilogy/core/models/datasource.py,sha256=6RjJUd2u4nYmEwFBpJlM9LbHVYDv8iHJxqiBMZqUrwI,9422
|
|
31
|
-
trilogy/core/models/environment.py,sha256=
|
|
32
|
-
trilogy/core/models/execute.py,sha256=
|
|
31
|
+
trilogy/core/models/environment.py,sha256=7bkxUob5pNgvK7Om-qvlJgsDiCh5iSPlMHI7tN_OZhU,27717
|
|
32
|
+
trilogy/core/models/execute.py,sha256=94CZVY_EdW675n8SihDnhaGTV0dq7BAAsl2Anf1mPmk,41815
|
|
33
33
|
trilogy/core/optimizations/__init__.py,sha256=YH2-mGXZnVDnBcWVi8vTbrdw7Qs5TivG4h38rH3js_I,290
|
|
34
34
|
trilogy/core/optimizations/base_optimization.py,sha256=gzDOKImoFn36k7XBD3ysEYDnbnb6vdVIztUfFQZsGnM,513
|
|
35
35
|
trilogy/core/optimizations/inline_datasource.py,sha256=2sWNRpoRInnTgo9wExVT_r9RfLAQHI57reEV5cGHUcg,4329
|
|
36
36
|
trilogy/core/optimizations/predicate_pushdown.py,sha256=g4AYE8Aw_iMlAh68TjNXGP754NTurrDduFECkUjoBnc,9399
|
|
37
37
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
trilogy/core/processing/concept_strategies_v3.py,sha256=
|
|
38
|
+
trilogy/core/processing/concept_strategies_v3.py,sha256=uD_Bzy2l30TJ5-6v0tn9dO-vI6zFRgtHsIHUvge3Sps,22536
|
|
39
39
|
trilogy/core/processing/discovery_loop.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
-
trilogy/core/processing/discovery_node_factory.py,sha256=
|
|
40
|
+
trilogy/core/processing/discovery_node_factory.py,sha256=X3-ywUvGbbcwwWtMqKXsPc6xyh3o41eiLRuByXGCvY4,14915
|
|
41
41
|
trilogy/core/processing/discovery_utility.py,sha256=hF3aUbRHHZFeFT5aBjE6TuSeU60I90gzmj512QXG_t8,4856
|
|
42
42
|
trilogy/core/processing/discovery_validation.py,sha256=Ek9jviFgimLMUMYLXBChUQmOD94ihhwQ3NDVe6RTdWg,4930
|
|
43
43
|
trilogy/core/processing/graph_utils.py,sha256=8QUVrkE9j-9C1AyrCb1nQEh8daCe0u1HuXl-Te85lag,1205
|
|
@@ -49,18 +49,18 @@ trilogy/core/processing/node_generators/filter_node.py,sha256=0hdfiS2I-Jvr6P-il3
|
|
|
49
49
|
trilogy/core/processing/node_generators/group_node.py,sha256=nIfiMrJQEksUfqAeeA3X5PS1343y4lmPTipYuCa-rvs,6141
|
|
50
50
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=jKcNCDOY6fNblrdZwaRU0sbUSr9H0moQbAxrGgX6iGA,3832
|
|
51
51
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=GWV5yLmKTe1yyPhN60RG1Rnrn4ktfn9lYYXi_FVU4UI,7061
|
|
52
|
-
trilogy/core/processing/node_generators/node_merge_node.py,sha256
|
|
52
|
+
trilogy/core/processing/node_generators/node_merge_node.py,sha256=-sVhRHB3NqNIOk_e7LLVhW17Rjcf82hxaHjEvpF5Q-w,16858
|
|
53
53
|
trilogy/core/processing/node_generators/recursive_node.py,sha256=l5zdh0dURKwmAy8kK4OpMtZfyUEQRk6N-PwSWIyBpSM,2468
|
|
54
54
|
trilogy/core/processing/node_generators/rowset_node.py,sha256=2BiSsegbRF9csJ_Xl8P_CxIm4dAAb7dF29u6v_Odr-A,6709
|
|
55
|
-
trilogy/core/processing/node_generators/select_merge_node.py,sha256=
|
|
55
|
+
trilogy/core/processing/node_generators/select_merge_node.py,sha256=3GDGi1tNIfuKO_FMrNCfp-G1c3lxdRuuufcmomYLt4s,21446
|
|
56
56
|
trilogy/core/processing/node_generators/select_node.py,sha256=3dvw0d53eUtCRCUPN6J48I3qBEX1Wha7saQ_ndPu6_I,1777
|
|
57
|
-
trilogy/core/processing/node_generators/synonym_node.py,sha256=
|
|
57
|
+
trilogy/core/processing/node_generators/synonym_node.py,sha256=CN2swdGPEP_Irx4GykHp4gyLCK0dWd2vX7PYJUGxw7w,3548
|
|
58
58
|
trilogy/core/processing/node_generators/union_node.py,sha256=VNo6Oey4p8etU9xrOh2oTT2lIOTvY6PULUPRvVa2uxU,2877
|
|
59
59
|
trilogy/core/processing/node_generators/unnest_node.py,sha256=ueOQtoTf2iJHO09RzWHDFQ5iKZq2fVhGf2KAF2U2kU8,2677
|
|
60
60
|
trilogy/core/processing/node_generators/window_node.py,sha256=GP3Hvkbb0TDA6ef7W7bmvQEHVH-NRIfBT_0W4fcH3g4,6529
|
|
61
61
|
trilogy/core/processing/node_generators/select_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
-
trilogy/core/processing/node_generators/select_helpers/datasource_injection.py,sha256=
|
|
63
|
-
trilogy/core/processing/nodes/__init__.py,sha256=
|
|
62
|
+
trilogy/core/processing/node_generators/select_helpers/datasource_injection.py,sha256=Dw0pjdhuJX0p-18CjelLrIJoevAPKzZOTN9uSLNPOF4,6553
|
|
63
|
+
trilogy/core/processing/nodes/__init__.py,sha256=zTge1EzwzEydlcMliIFO_TT7h7lS8l37lyZuQDir1h0,5487
|
|
64
64
|
trilogy/core/processing/nodes/base_node.py,sha256=p6yljFNLQsXz277c5wTATMNqsKUbsdP_3e7--tezBMw,17691
|
|
65
65
|
trilogy/core/processing/nodes/filter_node.py,sha256=5VtRfKbCORx0dV-vQfgy3gOEkmmscL9f31ExvlODwvY,2461
|
|
66
66
|
trilogy/core/processing/nodes/group_node.py,sha256=4EbOur1wSsOpPvP6znHih126o6A-TWbBXyvhiw5B0rs,10505
|
|
@@ -71,12 +71,12 @@ trilogy/core/processing/nodes/union_node.py,sha256=fDFzLAUh5876X6_NM7nkhoMvHEdGJ
|
|
|
71
71
|
trilogy/core/processing/nodes/unnest_node.py,sha256=oLKMMNMx6PLDPlt2V5neFMFrFWxET8r6XZElAhSNkO0,2181
|
|
72
72
|
trilogy/core/processing/nodes/window_node.py,sha256=JXJ0iVRlSEM2IBr1TANym2RaUf_p5E_l2sNykRzXWDo,1710
|
|
73
73
|
trilogy/core/statements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
|
-
trilogy/core/statements/author.py,sha256=
|
|
74
|
+
trilogy/core/statements/author.py,sha256=6cGCuKERNkH22T6iTsgoNp5CcIFwknF3WX-UmegbUPA,15409
|
|
75
75
|
trilogy/core/statements/build.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
76
|
trilogy/core/statements/common.py,sha256=KxEmz2ySySyZ6CTPzn0fJl5NX2KOk1RPyuUSwWhnK1g,759
|
|
77
77
|
trilogy/core/statements/execute.py,sha256=rqfuoMuXPcH7L7TmE1dSiZ_K_A1ohB8whVMfGimZBOk,1294
|
|
78
78
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
79
|
-
trilogy/dialect/base.py,sha256=
|
|
79
|
+
trilogy/dialect/base.py,sha256=fM5tPJA5yi6tTHUTWpHldh4RApJUmrosDorbrRQibe4,43613
|
|
80
80
|
trilogy/dialect/bigquery.py,sha256=6ghCqy-k7UioIJc1EEQ7gRo_PHaO8Vm7yYbiQ-kgpzs,3629
|
|
81
81
|
trilogy/dialect/common.py,sha256=hhzuMTFW9QQIP7TKLT9BlJy6lw2R03a68jKQ-7t4-2c,6070
|
|
82
82
|
trilogy/dialect/config.py,sha256=olnyeVU5W5T6b9-dMeNAnvxuPlyc2uefb7FRME094Ec,3834
|
|
@@ -89,16 +89,16 @@ trilogy/dialect/snowflake.py,sha256=LQIcHuyuGZXbxrv6sH17aLXLzw7yFVuRoE9M4doNk5k,
|
|
|
89
89
|
trilogy/dialect/sql_server.py,sha256=z2Vg7Qvw83rbGiEFIvHHLqVWJTWiz2xs76kpQj4HdTU,3131
|
|
90
90
|
trilogy/hooks/__init__.py,sha256=T3SF3phuUDPLXKGRVE_Lf9mzuwoXWyaLolncR_1kY30,144
|
|
91
91
|
trilogy/hooks/base_hook.py,sha256=I_l-NBMNC7hKTDx1JgHZPVOOCvLQ36m2oIGaR5EUMXY,1180
|
|
92
|
-
trilogy/hooks/graph_hook.py,sha256=
|
|
92
|
+
trilogy/hooks/graph_hook.py,sha256=5BfR7Dt0bgEsCLgwjowgCsVkboGYfVJGOz8g9mqpnos,4756
|
|
93
93
|
trilogy/hooks/query_debugger.py,sha256=1npRjww94sPV5RRBBlLqMJRaFkH9vhEY6o828MeoEcw,5583
|
|
94
94
|
trilogy/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
95
|
trilogy/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
|
-
trilogy/parsing/common.py,sha256=
|
|
96
|
+
trilogy/parsing/common.py,sha256=yuKN3fQEtftRMZlJb0ESUX4TLOVFcAE0vw2CfImYG1A,29980
|
|
97
97
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
98
98
|
trilogy/parsing/exceptions.py,sha256=Xwwsv2C9kSNv2q-HrrKC1f60JNHShXcCMzstTSEbiCw,154
|
|
99
99
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
100
100
|
trilogy/parsing/parse_engine.py,sha256=O7aM5nZ4SjKlqO2x8XWefI1BMCW06jYYLhABU4k1HCI,72430
|
|
101
|
-
trilogy/parsing/render.py,sha256=
|
|
101
|
+
trilogy/parsing/render.py,sha256=gGCFj2ue0UoaU2MR6qHGMAHXkYRMkTmHjnBowdcgFMY,19603
|
|
102
102
|
trilogy/parsing/trilogy.lark,sha256=x9D1BXtE1E9Kxatx5Kt7xCaid8zgedabwca_B7j7L7o,14331
|
|
103
103
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
104
|
trilogy/scripts/trilogy.py,sha256=1L0XrH4mVHRt1C9T1HnaDv2_kYEfbWTb5_-cBBke79w,3774
|
|
@@ -110,8 +110,8 @@ trilogy/std/money.preql,sha256=XWwvAV3WxBsHX9zfptoYRnBigcfYwrYtBHXTME0xJuQ,2082
|
|
|
110
110
|
trilogy/std/net.preql,sha256=-bMV6dyofskl4Kvows-iQ4JCxjVUwsZOeWCy8JO5Ftw,135
|
|
111
111
|
trilogy/std/ranking.preql,sha256=LDoZrYyz4g3xsII9XwXfmstZD-_92i1Eox1UqkBIfi8,83
|
|
112
112
|
trilogy/std/report.preql,sha256=LbV-XlHdfw0jgnQ8pV7acG95xrd1-p65fVpiIc-S7W4,202
|
|
113
|
-
pytrilogy-0.0.3.
|
|
114
|
-
pytrilogy-0.0.3.
|
|
115
|
-
pytrilogy-0.0.3.
|
|
116
|
-
pytrilogy-0.0.3.
|
|
117
|
-
pytrilogy-0.0.3.
|
|
113
|
+
pytrilogy-0.0.3.65.dist-info/METADATA,sha256=EzLG1grru3E83dGL_4EkZemWzM8QDjCe-S08QQNVAmk,9095
|
|
114
|
+
pytrilogy-0.0.3.65.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
115
|
+
pytrilogy-0.0.3.65.dist-info/entry_points.txt,sha256=ewBPU2vLnVexZVnB-NrVj-p3E-4vukg83Zk8A55Wp2w,56
|
|
116
|
+
pytrilogy-0.0.3.65.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
117
|
+
pytrilogy-0.0.3.65.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/core/graph_models.py
CHANGED
|
@@ -1,6 +1,50 @@
|
|
|
1
1
|
import networkx as nx
|
|
2
2
|
|
|
3
|
-
from trilogy.core.models.build import BuildConcept, BuildDatasource
|
|
3
|
+
from trilogy.core.models.build import BuildConcept, BuildDatasource, BuildWhereClause
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_graph_exact_match(
|
|
7
|
+
g: nx.DiGraph, conditions: BuildWhereClause | None
|
|
8
|
+
) -> set[str]:
|
|
9
|
+
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
10
|
+
nx.get_node_attributes(g, "datasource")
|
|
11
|
+
)
|
|
12
|
+
exact: set[str] = set()
|
|
13
|
+
for node in g.nodes:
|
|
14
|
+
if node in datasources:
|
|
15
|
+
ds = datasources[node]
|
|
16
|
+
if isinstance(ds, list):
|
|
17
|
+
exact.add(node)
|
|
18
|
+
continue
|
|
19
|
+
|
|
20
|
+
if not conditions and not ds.non_partial_for:
|
|
21
|
+
exact.add(node)
|
|
22
|
+
continue
|
|
23
|
+
elif conditions:
|
|
24
|
+
if not ds.non_partial_for:
|
|
25
|
+
continue
|
|
26
|
+
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
27
|
+
exact.add(node)
|
|
28
|
+
continue
|
|
29
|
+
else:
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
return exact
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def prune_sources_for_conditions(
|
|
36
|
+
g: nx.DiGraph,
|
|
37
|
+
conditions: BuildWhereClause | None,
|
|
38
|
+
):
|
|
39
|
+
|
|
40
|
+
complete = get_graph_exact_match(g, conditions)
|
|
41
|
+
to_remove = []
|
|
42
|
+
for node in g.nodes:
|
|
43
|
+
if node.startswith("ds~") and node not in complete:
|
|
44
|
+
to_remove.append(node)
|
|
45
|
+
|
|
46
|
+
for node in to_remove:
|
|
47
|
+
g.remove_node(node)
|
|
4
48
|
|
|
5
49
|
|
|
6
50
|
def concept_to_node(input: BuildConcept) -> str:
|
trilogy/core/models/build.py
CHANGED
|
@@ -1586,7 +1586,10 @@ class Factory:
|
|
|
1586
1586
|
|
|
1587
1587
|
return BuildFunction.model_construct(
|
|
1588
1588
|
operator=base.operator,
|
|
1589
|
-
arguments=[
|
|
1589
|
+
arguments=[
|
|
1590
|
+
rval,
|
|
1591
|
+
*[self.handle_constant(self.build(c)) for c in raw_args[1:]],
|
|
1592
|
+
],
|
|
1590
1593
|
output_datatype=base.output_datatype,
|
|
1591
1594
|
output_purpose=base.output_purpose,
|
|
1592
1595
|
valid_inputs=base.valid_inputs,
|
|
@@ -2042,4 +2045,6 @@ class Factory:
|
|
|
2042
2045
|
and base.lineage.operator == FunctionType.CONSTANT
|
|
2043
2046
|
):
|
|
2044
2047
|
return BuildParamaterizedConceptReference(concept=base)
|
|
2048
|
+
elif isinstance(base, ConceptRef):
|
|
2049
|
+
return self.handle_constant(self.build(base))
|
|
2045
2050
|
return base
|
|
@@ -603,7 +603,7 @@ class Environment(BaseModel):
|
|
|
603
603
|
# too hacky for maintainability
|
|
604
604
|
if current_derivation not in (Derivation.ROOT, Derivation.CONSTANT):
|
|
605
605
|
logger.info(
|
|
606
|
-
f"A datasource has been added which will persist derived concept {new_persisted_concept.address}"
|
|
606
|
+
f"A datasource has been added which will persist derived concept {new_persisted_concept.address} with derivation {current_derivation}"
|
|
607
607
|
)
|
|
608
608
|
persisted = f"{PERSISTED_CONCEPT_PREFIX}_" + new_persisted_concept.name
|
|
609
609
|
# override the current concept source to reflect that it's now coming from a datasource
|
|
@@ -622,17 +622,21 @@ class Environment(BaseModel):
|
|
|
622
622
|
meta=meta,
|
|
623
623
|
force=True,
|
|
624
624
|
)
|
|
625
|
+
base = {
|
|
626
|
+
"lineage": None,
|
|
627
|
+
"metadata": new_persisted_concept.metadata.model_copy(
|
|
628
|
+
update={"concept_source": ConceptSource.PERSIST_STATEMENT}
|
|
629
|
+
),
|
|
630
|
+
"derivation": Derivation.ROOT,
|
|
631
|
+
"purpose": new_persisted_concept.purpose,
|
|
632
|
+
}
|
|
633
|
+
# purpose is used in derivation calculation
|
|
634
|
+
# which should be fixed, but we'll do in a followup
|
|
635
|
+
# so override here
|
|
636
|
+
if new_persisted_concept.purpose == Purpose.CONSTANT:
|
|
637
|
+
base["purpose"] = Purpose.KEY
|
|
625
638
|
new_persisted_concept = new_persisted_concept.model_copy(
|
|
626
|
-
deep=True,
|
|
627
|
-
update={
|
|
628
|
-
"lineage": None,
|
|
629
|
-
"metadata": new_persisted_concept.metadata.model_copy(
|
|
630
|
-
update={
|
|
631
|
-
"concept_source": ConceptSource.PERSIST_STATEMENT
|
|
632
|
-
}
|
|
633
|
-
),
|
|
634
|
-
"derivation": Derivation.ROOT,
|
|
635
|
-
},
|
|
639
|
+
deep=True, update=base
|
|
636
640
|
)
|
|
637
641
|
self.add_concept(
|
|
638
642
|
new_persisted_concept,
|
trilogy/core/models/execute.py
CHANGED
|
@@ -56,6 +56,12 @@ LOGGER_PREFIX = "[MODELS_EXECUTE]"
|
|
|
56
56
|
DATASOURCE_TYPES = (BuildDatasource, BuildDatasource)
|
|
57
57
|
|
|
58
58
|
|
|
59
|
+
class InlinedCTE(BaseModel):
|
|
60
|
+
original_alias: str
|
|
61
|
+
new_alias: str
|
|
62
|
+
new_base: str
|
|
63
|
+
|
|
64
|
+
|
|
59
65
|
class CTE(BaseModel):
|
|
60
66
|
name: str
|
|
61
67
|
source: "QueryDatasource"
|
|
@@ -78,6 +84,7 @@ class CTE(BaseModel):
|
|
|
78
84
|
limit: Optional[int] = None
|
|
79
85
|
base_name_override: Optional[str] = None
|
|
80
86
|
base_alias_override: Optional[str] = None
|
|
87
|
+
inlined_ctes: dict[str, InlinedCTE] = Field(default_factory=dict)
|
|
81
88
|
|
|
82
89
|
@field_validator("join_derived_concepts")
|
|
83
90
|
def validate_join_derived_concepts(cls, v):
|
|
@@ -104,62 +111,6 @@ class CTE(BaseModel):
|
|
|
104
111
|
def validate_output_columns(cls, v):
|
|
105
112
|
return unique(v, "address")
|
|
106
113
|
|
|
107
|
-
def inline_constant(self, concept: BuildConcept):
|
|
108
|
-
if not concept.derivation == Derivation.CONSTANT:
|
|
109
|
-
return False
|
|
110
|
-
if not isinstance(concept.lineage, BuildFunction):
|
|
111
|
-
return False
|
|
112
|
-
if not concept.lineage.operator == FunctionType.CONSTANT:
|
|
113
|
-
return False
|
|
114
|
-
# remove the constant
|
|
115
|
-
removed: set = set()
|
|
116
|
-
if concept.address in self.source_map:
|
|
117
|
-
removed = removed.union(self.source_map[concept.address])
|
|
118
|
-
del self.source_map[concept.address]
|
|
119
|
-
|
|
120
|
-
if self.condition:
|
|
121
|
-
self.condition = self.condition.inline_constant(concept)
|
|
122
|
-
# if we've entirely removed the need to join to someplace to get the concept
|
|
123
|
-
# drop the join as well.
|
|
124
|
-
for removed_cte in removed:
|
|
125
|
-
still_required = any(
|
|
126
|
-
[
|
|
127
|
-
removed_cte in x
|
|
128
|
-
for x in self.source_map.values()
|
|
129
|
-
or self.existence_source_map.values()
|
|
130
|
-
]
|
|
131
|
-
)
|
|
132
|
-
if not still_required:
|
|
133
|
-
self.joins = [
|
|
134
|
-
join
|
|
135
|
-
for join in self.joins
|
|
136
|
-
if not isinstance(join, Join)
|
|
137
|
-
or (
|
|
138
|
-
isinstance(join, Join)
|
|
139
|
-
and (
|
|
140
|
-
join.right_cte.name != removed_cte
|
|
141
|
-
and any(
|
|
142
|
-
[
|
|
143
|
-
x.cte.name != removed_cte
|
|
144
|
-
for x in (join.joinkey_pairs or [])
|
|
145
|
-
]
|
|
146
|
-
)
|
|
147
|
-
)
|
|
148
|
-
)
|
|
149
|
-
]
|
|
150
|
-
for join in self.joins:
|
|
151
|
-
if isinstance(join, UnnestJoin) and concept in join.concepts:
|
|
152
|
-
join.rendering_required = False
|
|
153
|
-
|
|
154
|
-
self.parent_ctes = [
|
|
155
|
-
x for x in self.parent_ctes if x.name != removed_cte
|
|
156
|
-
]
|
|
157
|
-
if removed_cte == self.base_name_override:
|
|
158
|
-
candidates = [x.name for x in self.parent_ctes]
|
|
159
|
-
self.base_name_override = candidates[0] if candidates else None
|
|
160
|
-
self.base_alias_override = candidates[0] if candidates else None
|
|
161
|
-
return True
|
|
162
|
-
|
|
163
114
|
@property
|
|
164
115
|
def comment(self) -> str:
|
|
165
116
|
base = f"Target: {str(self.grain)}. Group: {self.group_to_grain}"
|
|
@@ -240,7 +191,18 @@ class CTE(BaseModel):
|
|
|
240
191
|
]
|
|
241
192
|
elif v == parent.safe_identifier:
|
|
242
193
|
self.source_map[k] = [ds_being_inlined.safe_identifier]
|
|
243
|
-
|
|
194
|
+
for k, v in self.existence_source_map.items():
|
|
195
|
+
if isinstance(v, list):
|
|
196
|
+
self.existence_source_map[k] = [
|
|
197
|
+
(
|
|
198
|
+
ds_being_inlined.safe_identifier
|
|
199
|
+
if x == parent.safe_identifier
|
|
200
|
+
else x
|
|
201
|
+
)
|
|
202
|
+
for x in v
|
|
203
|
+
]
|
|
204
|
+
elif v == parent.safe_identifier:
|
|
205
|
+
self.existence_source_map[k] = [ds_being_inlined.safe_identifier]
|
|
244
206
|
# zip in any required values for lookups
|
|
245
207
|
for k in ds_being_inlined.output_lcl.addresses:
|
|
246
208
|
if k in self.source_map and self.source_map[k]:
|
|
@@ -251,6 +213,11 @@ class CTE(BaseModel):
|
|
|
251
213
|
]
|
|
252
214
|
if force_group:
|
|
253
215
|
self.group_to_grain = True
|
|
216
|
+
self.inlined_ctes[ds_being_inlined.safe_identifier] = InlinedCTE(
|
|
217
|
+
original_alias=parent.name,
|
|
218
|
+
new_alias=ds_being_inlined.safe_identifier,
|
|
219
|
+
new_base=ds_being_inlined.safe_location,
|
|
220
|
+
)
|
|
254
221
|
return True
|
|
255
222
|
|
|
256
223
|
def __add__(self, other: "CTE" | "UnionCTE"):
|
|
@@ -303,6 +270,10 @@ class CTE(BaseModel):
|
|
|
303
270
|
**self.existence_source_map,
|
|
304
271
|
**other.existence_source_map,
|
|
305
272
|
}
|
|
273
|
+
self.inlined_ctes = {
|
|
274
|
+
**self.inlined_ctes,
|
|
275
|
+
**other.inlined_ctes,
|
|
276
|
+
}
|
|
306
277
|
|
|
307
278
|
return self
|
|
308
279
|
|
|
@@ -672,7 +643,7 @@ class QueryDatasource(BaseModel):
|
|
|
672
643
|
and CONFIG.validate_missing
|
|
673
644
|
):
|
|
674
645
|
raise SyntaxError(
|
|
675
|
-
f"
|
|
646
|
+
f"Missing source map entry for {concept.address} on {key} with pseudonyms {concept.pseudonyms}, have map: {v}"
|
|
676
647
|
)
|
|
677
648
|
return v
|
|
678
649
|
|
|
@@ -1057,6 +1028,7 @@ class UnionCTE(BaseModel):
|
|
|
1057
1028
|
hidden_concepts: set[str] = Field(default_factory=set)
|
|
1058
1029
|
partial_concepts: list[BuildConcept] = Field(default_factory=list)
|
|
1059
1030
|
existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
|
|
1031
|
+
inlined_ctes: Dict[str, InlinedCTE] = Field(default_factory=dict)
|
|
1060
1032
|
|
|
1061
1033
|
@computed_field # type: ignore
|
|
1062
1034
|
@property
|
|
@@ -54,11 +54,7 @@ def generate_candidates_restrictive(
|
|
|
54
54
|
exhausted: set[str],
|
|
55
55
|
depth: int,
|
|
56
56
|
conditions: BuildWhereClause | None = None,
|
|
57
|
-
) ->
|
|
58
|
-
# if it's single row, joins are irrelevant. Fetch without keys.
|
|
59
|
-
if priority_concept.granularity == Granularity.SINGLE_ROW:
|
|
60
|
-
return []
|
|
61
|
-
|
|
57
|
+
) -> tuple[list[BuildConcept], BuildWhereClause | None]:
|
|
62
58
|
local_candidates = [
|
|
63
59
|
x
|
|
64
60
|
for x in list(candidates)
|
|
@@ -71,8 +67,16 @@ def generate_candidates_restrictive(
|
|
|
71
67
|
logger.info(
|
|
72
68
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Injecting additional conditional row arguments as all remaining concepts are roots or constant"
|
|
73
69
|
)
|
|
74
|
-
|
|
75
|
-
|
|
70
|
+
# otherwise, we can ignore the conditions now that we've injected inputs
|
|
71
|
+
return (
|
|
72
|
+
unique(list(conditions.row_arguments) + local_candidates, "address"),
|
|
73
|
+
None,
|
|
74
|
+
)
|
|
75
|
+
# if it's single row, joins are irrelevant. Fetch without keys.
|
|
76
|
+
if priority_concept.granularity == Granularity.SINGLE_ROW:
|
|
77
|
+
return [], conditions
|
|
78
|
+
|
|
79
|
+
return local_candidates, conditions
|
|
76
80
|
|
|
77
81
|
|
|
78
82
|
def append_existence_check(
|
|
@@ -104,9 +108,7 @@ def append_existence_check(
|
|
|
104
108
|
)
|
|
105
109
|
assert parent, "Could not resolve existence clause"
|
|
106
110
|
node.add_parents([parent])
|
|
107
|
-
logger.info(
|
|
108
|
-
f"{LOGGER_PREFIX} fetching existence clause inputs {[str(c) for c in subselect]}"
|
|
109
|
-
)
|
|
111
|
+
logger.info(f"{LOGGER_PREFIX} found {[str(c) for c in subselect]}")
|
|
110
112
|
node.add_existence_concepts([*subselect])
|
|
111
113
|
|
|
112
114
|
|
|
@@ -440,7 +442,19 @@ def _search_concepts(
|
|
|
440
442
|
accept_partial: bool = False,
|
|
441
443
|
conditions: BuildWhereClause | None = None,
|
|
442
444
|
) -> StrategyNode | None:
|
|
445
|
+
# check for direct materialization first
|
|
446
|
+
candidate = history.gen_select_node(
|
|
447
|
+
mandatory_list,
|
|
448
|
+
environment,
|
|
449
|
+
g,
|
|
450
|
+
depth + 1,
|
|
451
|
+
fail_if_not_found=False,
|
|
452
|
+
accept_partial=accept_partial,
|
|
453
|
+
conditions=conditions,
|
|
454
|
+
)
|
|
443
455
|
|
|
456
|
+
if candidate:
|
|
457
|
+
return candidate
|
|
444
458
|
context = initialize_loop_context(
|
|
445
459
|
mandatory_list=mandatory_list,
|
|
446
460
|
environment=environment,
|
|
@@ -460,19 +474,21 @@ def _search_concepts(
|
|
|
460
474
|
)
|
|
461
475
|
|
|
462
476
|
local_conditions = evaluate_loop_conditions(context, priority_concept)
|
|
463
|
-
logger.info(
|
|
464
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} granularity {priority_concept.granularity} with conditions {local_conditions}"
|
|
465
|
-
)
|
|
466
477
|
|
|
467
478
|
candidates = [
|
|
468
479
|
c for c in context.mandatory_list if c.address != priority_concept.address
|
|
469
480
|
]
|
|
470
|
-
|
|
481
|
+
# the local conditions list may be override if we end up injecting conditions
|
|
482
|
+
candidate_list, local_conditions = generate_candidates_restrictive(
|
|
471
483
|
priority_concept,
|
|
472
484
|
candidates,
|
|
473
485
|
context.skip,
|
|
474
486
|
depth=depth,
|
|
475
|
-
conditions=
|
|
487
|
+
conditions=local_conditions,
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
logger.info(
|
|
491
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} priority concept is {str(priority_concept)} derivation {priority_concept.derivation} granularity {priority_concept.granularity} with conditions {local_conditions}"
|
|
476
492
|
)
|
|
477
493
|
|
|
478
494
|
logger.info(
|
|
@@ -438,15 +438,14 @@ def generate_node(
|
|
|
438
438
|
)
|
|
439
439
|
|
|
440
440
|
# Try materialized concept first
|
|
441
|
+
# this is worth checking every loop iteration
|
|
441
442
|
candidate = history.gen_select_node(
|
|
442
|
-
concept,
|
|
443
|
-
local_optional,
|
|
443
|
+
[concept] + local_optional,
|
|
444
444
|
environment,
|
|
445
445
|
g,
|
|
446
446
|
depth + 1,
|
|
447
447
|
fail_if_not_found=False,
|
|
448
448
|
accept_partial=accept_partial,
|
|
449
|
-
accept_partial_optional=False,
|
|
450
449
|
conditions=conditions,
|
|
451
450
|
)
|
|
452
451
|
|
|
@@ -6,7 +6,7 @@ from networkx.algorithms import approximation as ax
|
|
|
6
6
|
from trilogy.constants import logger
|
|
7
7
|
from trilogy.core.enums import Derivation
|
|
8
8
|
from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
|
|
9
|
-
from trilogy.core.graph_models import concept_to_node
|
|
9
|
+
from trilogy.core.graph_models import concept_to_node, prune_sources_for_conditions
|
|
10
10
|
from trilogy.core.models.build import BuildConcept, BuildConditional, BuildWhereClause
|
|
11
11
|
from trilogy.core.models.build_environment import BuildEnvironment
|
|
12
12
|
from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
|
|
@@ -222,10 +222,12 @@ def resolve_weak_components(
|
|
|
222
222
|
environment_graph: nx.DiGraph,
|
|
223
223
|
filter_downstream: bool = True,
|
|
224
224
|
accept_partial: bool = False,
|
|
225
|
+
search_conditions: BuildWhereClause | None = None,
|
|
225
226
|
) -> list[list[BuildConcept]] | None:
|
|
226
227
|
break_flag = False
|
|
227
228
|
found = []
|
|
228
229
|
search_graph = environment_graph.copy()
|
|
230
|
+
prune_sources_for_conditions(search_graph, conditions=search_conditions)
|
|
229
231
|
reduced_concept_sets: list[set[str]] = []
|
|
230
232
|
|
|
231
233
|
# loop through, removing new nodes we find
|
|
@@ -239,7 +241,6 @@ def resolve_weak_components(
|
|
|
239
241
|
if "__preql_internal" not in c.address
|
|
240
242
|
]
|
|
241
243
|
)
|
|
242
|
-
logger.debug(f"Resolving weak components for {node_list} in {search_graph.nodes}")
|
|
243
244
|
synonyms: set[str] = set()
|
|
244
245
|
for x in all_concepts:
|
|
245
246
|
synonyms = synonyms.union(x.pseudonyms)
|
|
@@ -407,6 +408,7 @@ def gen_merge_node(
|
|
|
407
408
|
g,
|
|
408
409
|
filter_downstream=filter_downstream,
|
|
409
410
|
accept_partial=accept_partial,
|
|
411
|
+
search_conditions=search_conditions,
|
|
410
412
|
)
|
|
411
413
|
if not weak_resolve:
|
|
412
414
|
logger.info(
|
|
@@ -169,7 +169,9 @@ def is_fully_covered(
|
|
|
169
169
|
return current_end >= end
|
|
170
170
|
|
|
171
171
|
|
|
172
|
-
def get_union_sources(
|
|
172
|
+
def get_union_sources(
|
|
173
|
+
datasources: list[BuildDatasource], concepts: list[BuildConcept]
|
|
174
|
+
) -> List[list[BuildDatasource]]:
|
|
173
175
|
candidates: list[BuildDatasource] = []
|
|
174
176
|
for x in datasources:
|
|
175
177
|
if all([c.address in x.output_concepts for c in concepts]):
|
|
@@ -5,7 +5,11 @@ import networkx as nx
|
|
|
5
5
|
|
|
6
6
|
from trilogy.constants import logger
|
|
7
7
|
from trilogy.core.enums import Derivation
|
|
8
|
-
from trilogy.core.graph_models import
|
|
8
|
+
from trilogy.core.graph_models import (
|
|
9
|
+
concept_to_node,
|
|
10
|
+
get_graph_exact_match,
|
|
11
|
+
prune_sources_for_conditions,
|
|
12
|
+
)
|
|
9
13
|
from trilogy.core.models.build import (
|
|
10
14
|
BuildConcept,
|
|
11
15
|
BuildDatasource,
|
|
@@ -57,26 +61,6 @@ def get_graph_partial_nodes(
|
|
|
57
61
|
return partial
|
|
58
62
|
|
|
59
63
|
|
|
60
|
-
def get_graph_exact_match(
|
|
61
|
-
g: nx.DiGraph, conditions: BuildWhereClause | None
|
|
62
|
-
) -> set[str]:
|
|
63
|
-
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
64
|
-
nx.get_node_attributes(g, "datasource")
|
|
65
|
-
)
|
|
66
|
-
exact: set[str] = set()
|
|
67
|
-
for node in g.nodes:
|
|
68
|
-
if node in datasources:
|
|
69
|
-
ds = datasources[node]
|
|
70
|
-
if not isinstance(ds, list):
|
|
71
|
-
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
72
|
-
exact.add(node)
|
|
73
|
-
continue
|
|
74
|
-
else:
|
|
75
|
-
continue
|
|
76
|
-
|
|
77
|
-
return exact
|
|
78
|
-
|
|
79
|
-
|
|
80
64
|
def get_graph_grains(g: nx.DiGraph) -> dict[str, list[str]]:
|
|
81
65
|
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
82
66
|
nx.get_node_attributes(g, "datasource")
|
|
@@ -95,6 +79,34 @@ def get_graph_grains(g: nx.DiGraph) -> dict[str, list[str]]:
|
|
|
95
79
|
return grain_length
|
|
96
80
|
|
|
97
81
|
|
|
82
|
+
def subgraph_is_complete(
|
|
83
|
+
nodes: list[str], targets: set[str], mapping: dict[str, str], g: nx.DiGraph
|
|
84
|
+
) -> bool:
|
|
85
|
+
mapped = set([mapping.get(n, n) for n in nodes])
|
|
86
|
+
passed = all([t in mapped for t in targets])
|
|
87
|
+
if not passed:
|
|
88
|
+
logger.info(
|
|
89
|
+
f"Subgraph {nodes} is not complete, missing targets {targets} - mapped {mapped}"
|
|
90
|
+
)
|
|
91
|
+
return False
|
|
92
|
+
# check if all concepts have a datasource edge
|
|
93
|
+
has_ds_edge = {
|
|
94
|
+
mapping.get(n, n): any(x.startswith("ds~") for x in nx.neighbors(g, n))
|
|
95
|
+
for n in nodes
|
|
96
|
+
if n.startswith("c~")
|
|
97
|
+
}
|
|
98
|
+
has_ds_edge = {k: False for k in targets}
|
|
99
|
+
# check at least one instance of concept has a datasource edge
|
|
100
|
+
for n in nodes:
|
|
101
|
+
if n.startswith("c~"):
|
|
102
|
+
neighbors = nx.neighbors(g, n)
|
|
103
|
+
for neighbor in neighbors:
|
|
104
|
+
if neighbor.startswith("ds~"):
|
|
105
|
+
has_ds_edge[mapping.get(n, n)] = True
|
|
106
|
+
break
|
|
107
|
+
return all(has_ds_edge.values()) and passed
|
|
108
|
+
|
|
109
|
+
|
|
98
110
|
def create_pruned_concept_graph(
|
|
99
111
|
g: nx.DiGraph,
|
|
100
112
|
all_concepts: List[BuildConcept],
|
|
@@ -104,6 +116,7 @@ def create_pruned_concept_graph(
|
|
|
104
116
|
depth: int = 0,
|
|
105
117
|
) -> nx.DiGraph:
|
|
106
118
|
orig_g = g
|
|
119
|
+
|
|
107
120
|
g = g.copy()
|
|
108
121
|
union_options = get_union_sources(datasources, all_concepts)
|
|
109
122
|
for ds_list in union_options:
|
|
@@ -114,7 +127,8 @@ def create_pruned_concept_graph(
|
|
|
114
127
|
g.add_node(node_address, datasource=ds_list)
|
|
115
128
|
for c in common:
|
|
116
129
|
g.add_edge(node_address, concept_to_node(c))
|
|
117
|
-
|
|
130
|
+
g.add_edge(concept_to_node(c), node_address)
|
|
131
|
+
prune_sources_for_conditions(g, conditions)
|
|
118
132
|
target_addresses = set([c.address for c in all_concepts])
|
|
119
133
|
concepts: dict[str, BuildConcept] = nx.get_node_attributes(orig_g, "concept")
|
|
120
134
|
datasource_map: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
@@ -126,8 +140,7 @@ def create_pruned_concept_graph(
|
|
|
126
140
|
# filter out synonyms
|
|
127
141
|
if (x := concepts.get(n, None)) and x.address in target_addresses
|
|
128
142
|
}
|
|
129
|
-
|
|
130
|
-
# GraphHook().query_graph_built(g)
|
|
143
|
+
|
|
131
144
|
relevant_concepts: list[str] = list(relevant_concepts_pre.keys())
|
|
132
145
|
relevent_datasets: list[str] = []
|
|
133
146
|
if not accept_partial:
|
|
@@ -149,6 +162,7 @@ def create_pruned_concept_graph(
|
|
|
149
162
|
to_remove.append(edge)
|
|
150
163
|
for edge in to_remove:
|
|
151
164
|
g.remove_edge(*edge)
|
|
165
|
+
|
|
152
166
|
for n in g.nodes():
|
|
153
167
|
if not n.startswith("ds~"):
|
|
154
168
|
continue
|
|
@@ -181,8 +195,15 @@ def create_pruned_concept_graph(
|
|
|
181
195
|
if n not in relevent_datasets and n not in relevant_concepts
|
|
182
196
|
]
|
|
183
197
|
)
|
|
184
|
-
|
|
198
|
+
# from trilogy.hooks.graph_hook import GraphHook
|
|
199
|
+
# GraphHook().query_graph_built(g)
|
|
185
200
|
subgraphs = list(nx.connected_components(g.to_undirected()))
|
|
201
|
+
subgraphs = [
|
|
202
|
+
s
|
|
203
|
+
for s in subgraphs
|
|
204
|
+
if subgraph_is_complete(s, target_addresses, relevant_concepts_pre, g)
|
|
205
|
+
]
|
|
206
|
+
|
|
186
207
|
if not subgraphs:
|
|
187
208
|
logger.info(
|
|
188
209
|
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - no subgraphs after node prune"
|
|
@@ -486,6 +507,24 @@ def gen_select_merge_node(
|
|
|
486
507
|
non_constant = [c for c in all_concepts if c.derivation != Derivation.CONSTANT]
|
|
487
508
|
constants = [c for c in all_concepts if c.derivation == Derivation.CONSTANT]
|
|
488
509
|
if not non_constant and constants:
|
|
510
|
+
logger.info(
|
|
511
|
+
f"{padding(depth)}{LOGGER_PREFIX} only constant inputs to discovery ({constants}), returning constant node directly"
|
|
512
|
+
)
|
|
513
|
+
for x in constants:
|
|
514
|
+
logger.info(
|
|
515
|
+
f"{padding(depth)}{LOGGER_PREFIX} {x} {x.lineage} {x.derivation}"
|
|
516
|
+
)
|
|
517
|
+
if conditions:
|
|
518
|
+
if not all(
|
|
519
|
+
[x.derivation == Derivation.CONSTANT for x in conditions.row_arguments]
|
|
520
|
+
):
|
|
521
|
+
logger.info(
|
|
522
|
+
f"{padding(depth)}{LOGGER_PREFIX} conditions being passed in to constant node {conditions}, but not all concepts are constants."
|
|
523
|
+
)
|
|
524
|
+
return None
|
|
525
|
+
else:
|
|
526
|
+
constants += conditions.row_arguments
|
|
527
|
+
|
|
489
528
|
return ConstantNode(
|
|
490
529
|
output_concepts=constants,
|
|
491
530
|
input_concepts=[],
|
|
@@ -494,7 +533,7 @@ def gen_select_merge_node(
|
|
|
494
533
|
depth=depth,
|
|
495
534
|
partial_concepts=[],
|
|
496
535
|
force_group=False,
|
|
497
|
-
|
|
536
|
+
conditions=conditions.conditional if conditions else None,
|
|
498
537
|
)
|
|
499
538
|
for attempt in [False, True]:
|
|
500
539
|
pruned_concept_graph = create_pruned_concept_graph(
|
|
@@ -50,7 +50,9 @@ def gen_synonym_node(
|
|
|
50
50
|
|
|
51
51
|
logger.info(f"{local_prefix} Generating Synonym Node with {len(synonyms)} synonyms")
|
|
52
52
|
sorted_keys = sorted(synonyms.keys())
|
|
53
|
-
combinations_list
|
|
53
|
+
combinations_list: list[tuple[BuildConcept, ...]] = list(
|
|
54
|
+
itertools.product(*(synonyms[obj] for obj in sorted_keys))
|
|
55
|
+
)
|
|
54
56
|
|
|
55
57
|
def similarity_sort_key(combo):
|
|
56
58
|
addresses = [x.address for x in combo]
|
|
@@ -83,7 +85,7 @@ def gen_synonym_node(
|
|
|
83
85
|
f"{local_prefix} checking combination {fingerprint} with {len(combo)} concepts"
|
|
84
86
|
)
|
|
85
87
|
attempt: StrategyNode | None = source_concepts(
|
|
86
|
-
combo,
|
|
88
|
+
list(combo),
|
|
87
89
|
history=history,
|
|
88
90
|
environment=environment,
|
|
89
91
|
depth=depth,
|
|
@@ -124,51 +124,31 @@ class History(BaseModel):
|
|
|
124
124
|
in self.started
|
|
125
125
|
)
|
|
126
126
|
|
|
127
|
-
def _select_concepts_to_lookup(
|
|
128
|
-
self,
|
|
129
|
-
main: BuildConcept,
|
|
130
|
-
search: list[BuildConcept],
|
|
131
|
-
accept_partial: bool,
|
|
132
|
-
fail_if_not_found: bool,
|
|
133
|
-
accept_partial_optional: bool,
|
|
134
|
-
conditions: BuildWhereClause | None = None,
|
|
135
|
-
) -> str:
|
|
136
|
-
return (
|
|
137
|
-
str(main.address)
|
|
138
|
-
+ "|"
|
|
139
|
-
+ "-".join([c.address for c in search])
|
|
140
|
-
+ str(accept_partial)
|
|
141
|
-
+ str(fail_if_not_found)
|
|
142
|
-
+ str(accept_partial_optional)
|
|
143
|
-
+ str(conditions)
|
|
144
|
-
)
|
|
145
|
-
|
|
146
127
|
def gen_select_node(
|
|
147
128
|
self,
|
|
148
|
-
|
|
149
|
-
local_optional: list[BuildConcept],
|
|
129
|
+
concepts: list[BuildConcept],
|
|
150
130
|
environment: BuildEnvironment,
|
|
151
131
|
g,
|
|
152
132
|
depth: int,
|
|
153
133
|
fail_if_not_found: bool = False,
|
|
154
134
|
accept_partial: bool = False,
|
|
155
|
-
accept_partial_optional: bool = False,
|
|
156
135
|
conditions: BuildWhereClause | None = None,
|
|
157
136
|
) -> StrategyNode | None:
|
|
158
137
|
from trilogy.core.processing.node_generators.select_node import gen_select_node
|
|
159
138
|
|
|
160
|
-
fingerprint = self.
|
|
161
|
-
|
|
162
|
-
local_optional,
|
|
139
|
+
fingerprint = self._concepts_to_lookup(
|
|
140
|
+
concepts,
|
|
163
141
|
accept_partial,
|
|
164
|
-
fail_if_not_found,
|
|
165
|
-
accept_partial_optional=accept_partial_optional,
|
|
166
142
|
conditions=conditions,
|
|
167
143
|
)
|
|
168
144
|
if fingerprint in self.select_history:
|
|
169
|
-
|
|
145
|
+
rval = self.select_history[fingerprint]
|
|
146
|
+
if rval:
|
|
147
|
+
# all nodes must be copied before returning
|
|
148
|
+
return rval.copy()
|
|
149
|
+
return rval
|
|
170
150
|
gen = gen_select_node(
|
|
171
|
-
|
|
151
|
+
concepts,
|
|
172
152
|
environment,
|
|
173
153
|
g,
|
|
174
154
|
depth + 1,
|
|
@@ -177,6 +157,8 @@ class History(BaseModel):
|
|
|
177
157
|
conditions=conditions,
|
|
178
158
|
)
|
|
179
159
|
self.select_history[fingerprint] = gen
|
|
160
|
+
if gen:
|
|
161
|
+
return gen.copy()
|
|
180
162
|
return gen
|
|
181
163
|
|
|
182
164
|
|
trilogy/dialect/base.py
CHANGED
|
@@ -58,6 +58,7 @@ from trilogy.core.query_processor import process_copy, process_persist, process_
|
|
|
58
58
|
from trilogy.core.statements.author import (
|
|
59
59
|
ConceptDeclarationStatement,
|
|
60
60
|
CopyStatement,
|
|
61
|
+
FunctionDeclaration,
|
|
61
62
|
ImportStatement,
|
|
62
63
|
MergeStatementV2,
|
|
63
64
|
MultiSelectStatement,
|
|
@@ -579,7 +580,12 @@ class BaseDialect:
|
|
|
579
580
|
target = INVALID_REFERENCE_STRING(
|
|
580
581
|
f"Missing source CTE for {e.right.address}"
|
|
581
582
|
)
|
|
583
|
+
assert cte, "CTE must be provided for inlined CTEs"
|
|
584
|
+
if target in cte.inlined_ctes:
|
|
585
|
+
info = cte.inlined_ctes[target]
|
|
586
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {info.new_base} as {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
582
587
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
588
|
+
|
|
583
589
|
elif isinstance(
|
|
584
590
|
e.right,
|
|
585
591
|
(ListWrapper, TupleWrapper, BuildParenthetical, list),
|
|
@@ -980,6 +986,7 @@ class BaseDialect:
|
|
|
980
986
|
ImportStatement,
|
|
981
987
|
RowsetDerivationStatement,
|
|
982
988
|
Datasource,
|
|
989
|
+
FunctionDeclaration,
|
|
983
990
|
),
|
|
984
991
|
):
|
|
985
992
|
continue
|
trilogy/hooks/graph_hook.py
CHANGED
|
@@ -22,7 +22,6 @@ class GraphHook(BaseHook):
|
|
|
22
22
|
pass
|
|
23
23
|
except ImportError:
|
|
24
24
|
raise ImportError("GraphHook requires matplotlib and scipy to be installed")
|
|
25
|
-
|
|
26
25
|
# https://github.com/python/cpython/issues/125235#issuecomment-2412948604
|
|
27
26
|
|
|
28
27
|
def query_graph_built(
|
|
@@ -39,8 +38,10 @@ class GraphHook(BaseHook):
|
|
|
39
38
|
for node in nodes:
|
|
40
39
|
if "__preql_internal" in node:
|
|
41
40
|
graph.remove_node(node)
|
|
41
|
+
|
|
42
42
|
if remove_isolates:
|
|
43
43
|
graph.remove_nodes_from(list(nx.isolates(graph)))
|
|
44
|
+
|
|
44
45
|
color_map = []
|
|
45
46
|
highlight_nodes = highlight_nodes or []
|
|
46
47
|
for node in graph:
|
|
@@ -50,9 +51,10 @@ class GraphHook(BaseHook):
|
|
|
50
51
|
color_map.append("blue")
|
|
51
52
|
else:
|
|
52
53
|
color_map.append("green")
|
|
53
|
-
|
|
54
|
+
|
|
54
55
|
pos = nx.spring_layout(graph)
|
|
55
56
|
kwargs = {}
|
|
57
|
+
|
|
56
58
|
if target:
|
|
57
59
|
edge_colors = []
|
|
58
60
|
descendents = nx.descendants(graph, target)
|
|
@@ -66,21 +68,72 @@ class GraphHook(BaseHook):
|
|
|
66
68
|
else:
|
|
67
69
|
edge_colors.append("black")
|
|
68
70
|
kwargs["edge_color"] = edge_colors
|
|
71
|
+
|
|
72
|
+
# Draw the graph without labels first
|
|
69
73
|
nx.draw(
|
|
70
74
|
graph,
|
|
71
75
|
pos=pos,
|
|
72
76
|
node_color=color_map,
|
|
73
77
|
connectionstyle="arc3, rad = 0.1",
|
|
78
|
+
with_labels=False, # Important: don't draw labels with nx.draw
|
|
74
79
|
**kwargs
|
|
75
|
-
)
|
|
76
|
-
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Draw labels with manual spacing
|
|
83
|
+
self._draw_labels_with_manual_spacing(graph, pos)
|
|
84
|
+
|
|
85
|
+
plt.show()
|
|
86
|
+
|
|
87
|
+
def _draw_labels_with_manual_spacing(self, graph, pos):
|
|
88
|
+
import numpy as np
|
|
89
|
+
|
|
77
90
|
pos_labels = {}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
91
|
+
node_positions = list(pos.values())
|
|
92
|
+
|
|
93
|
+
# Calculate average distance between nodes to determine spacing
|
|
94
|
+
if len(node_positions) > 1:
|
|
95
|
+
distances = []
|
|
96
|
+
for i, (x1, y1) in enumerate(node_positions):
|
|
97
|
+
for j, (x2, y2) in enumerate(node_positions[i + 1 :], i + 1):
|
|
98
|
+
dist = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
|
99
|
+
distances.append(dist)
|
|
100
|
+
|
|
101
|
+
avg_distance = np.mean(distances)
|
|
102
|
+
min_spacing = max(
|
|
103
|
+
0.1, avg_distance * 0.3
|
|
104
|
+
) # Minimum spacing as fraction of average distance
|
|
105
|
+
else:
|
|
106
|
+
min_spacing = 0.1
|
|
107
|
+
|
|
108
|
+
# Simple spacing algorithm - offset labels that are too close
|
|
109
|
+
for i, node in enumerate(graph.nodes()):
|
|
110
|
+
x, y = pos[node]
|
|
111
|
+
|
|
112
|
+
# Check for nearby labels and adjust position
|
|
113
|
+
adjusted_x, adjusted_y = x, y
|
|
114
|
+
for j, other_node in enumerate(
|
|
115
|
+
list(graph.nodes())[:i]
|
|
116
|
+
): # Only check previous nodes
|
|
117
|
+
other_x, other_y = pos_labels.get(other_node, pos[other_node])
|
|
118
|
+
distance = np.sqrt(
|
|
119
|
+
(adjusted_x - other_x) ** 2 + (adjusted_y - other_y) ** 2
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if distance < min_spacing:
|
|
123
|
+
# Calculate offset direction
|
|
124
|
+
if distance > 0:
|
|
125
|
+
offset_x = (adjusted_x - other_x) / distance * min_spacing
|
|
126
|
+
offset_y = (adjusted_y - other_y) / distance * min_spacing
|
|
127
|
+
else:
|
|
128
|
+
# If nodes are at exact same position, use random offset
|
|
129
|
+
angle = np.random.random() * 2 * np.pi
|
|
130
|
+
offset_x = np.cos(angle) * min_spacing
|
|
131
|
+
offset_y = np.sin(angle) * min_spacing
|
|
132
|
+
|
|
133
|
+
adjusted_x = other_x + offset_x
|
|
134
|
+
adjusted_y = other_y + offset_y
|
|
135
|
+
|
|
136
|
+
pos_labels[node] = (adjusted_x, adjusted_y)
|
|
137
|
+
|
|
138
|
+
# Draw the labels at adjusted positions
|
|
85
139
|
nx.draw_networkx_labels(graph, pos=pos_labels, font_size=10)
|
|
86
|
-
plt.show()
|
trilogy/parsing/common.py
CHANGED
|
@@ -86,7 +86,7 @@ def process_function_arg(
|
|
|
86
86
|
if concept.metadata and meta:
|
|
87
87
|
concept.metadata.line_number = meta.line
|
|
88
88
|
environment.add_concept(concept, meta=meta)
|
|
89
|
-
return concept
|
|
89
|
+
return concept.reference
|
|
90
90
|
elif isinstance(
|
|
91
91
|
arg,
|
|
92
92
|
(ListWrapper, MapWrapper),
|
|
@@ -103,7 +103,7 @@ def process_function_arg(
|
|
|
103
103
|
if concept.metadata and meta:
|
|
104
104
|
concept.metadata.line_number = meta.line
|
|
105
105
|
environment.add_concept(concept, meta=meta)
|
|
106
|
-
return concept
|
|
106
|
+
return concept.reference
|
|
107
107
|
elif isinstance(arg, Concept):
|
|
108
108
|
return arg.reference
|
|
109
109
|
elif isinstance(arg, ConceptRef):
|
trilogy/parsing/render.py
CHANGED
|
@@ -506,7 +506,11 @@ class Renderer:
|
|
|
506
506
|
return f"{args[0]} % {args[1]}"
|
|
507
507
|
if arg.operator == FunctionType.PARENTHETICAL:
|
|
508
508
|
return f"({args[0]})"
|
|
509
|
-
|
|
509
|
+
if arg.operator == FunctionType.GROUP:
|
|
510
|
+
arg_string = ", ".join(args[1:])
|
|
511
|
+
if len(args) == 1:
|
|
512
|
+
return f"group({args[0]})"
|
|
513
|
+
return f"group({args[0]}) by {arg_string}"
|
|
510
514
|
inputs = ",".join(args)
|
|
511
515
|
|
|
512
516
|
if arg.operator == FunctionType.CONSTANT:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|