clickzetta-semantic-model-generator 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.4.dist-info}/METADATA +5 -5
- clickzetta_semantic_model_generator-1.0.4.dist-info/RECORD +38 -0
- semantic_model_generator/clickzetta_utils/clickzetta_connector.py +100 -48
- semantic_model_generator/clickzetta_utils/env_vars.py +7 -2
- semantic_model_generator/clickzetta_utils/utils.py +44 -2
- semantic_model_generator/data_processing/cte_utils.py +44 -14
- semantic_model_generator/generate_model.py +711 -239
- semantic_model_generator/llm/dashscope_client.py +4 -2
- semantic_model_generator/llm/enrichment.py +144 -57
- semantic_model_generator/llm/progress_tracker.py +16 -15
- semantic_model_generator/relationships/__init__.py +2 -0
- semantic_model_generator/relationships/discovery.py +181 -16
- semantic_model_generator/tests/clickzetta_connector_test.py +3 -7
- semantic_model_generator/tests/cte_utils_test.py +15 -14
- semantic_model_generator/tests/generate_model_classification_test.py +12 -2
- semantic_model_generator/tests/llm_enrichment_test.py +152 -46
- semantic_model_generator/tests/relationship_discovery_test.py +70 -3
- semantic_model_generator/tests/relationships_filters_test.py +166 -30
- semantic_model_generator/tests/utils_test.py +1 -1
- semantic_model_generator/validate/keywords.py +453 -53
- semantic_model_generator/validate/schema.py +4 -2
- clickzetta_semantic_model_generator-1.0.2.dist-info/RECORD +0 -38
- {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.4.dist-info}/LICENSE +0 -0
- {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.4.dist-info}/WHEEL +0 -0
@@ -7,9 +7,24 @@ def test_suggest_filters_builds_in_clause_and_time_filter() -> None:
|
|
7
7
|
id_=0,
|
8
8
|
name="ORDERS",
|
9
9
|
columns=[
|
10
|
-
Column(
|
11
|
-
|
12
|
-
|
10
|
+
Column(
|
11
|
+
id_=0,
|
12
|
+
column_name="status",
|
13
|
+
column_type="STRING",
|
14
|
+
values=["OPEN", "CLOSED", "OPEN"],
|
15
|
+
),
|
16
|
+
Column(
|
17
|
+
id_=1,
|
18
|
+
column_name="order_date",
|
19
|
+
column_type="TIMESTAMP",
|
20
|
+
values=["2024-01-01", "2024-01-15"],
|
21
|
+
),
|
22
|
+
Column(
|
23
|
+
id_=2,
|
24
|
+
column_name="created_at",
|
25
|
+
column_type="STRING",
|
26
|
+
values=["2024-01-05 10:00:00"],
|
27
|
+
),
|
13
28
|
],
|
14
29
|
)
|
15
30
|
|
@@ -26,7 +41,12 @@ def test_infer_relationships_uses_pk_candidate() -> None:
|
|
26
41
|
id_=0,
|
27
42
|
name="CUSTOMERS",
|
28
43
|
columns=[
|
29
|
-
Column(
|
44
|
+
Column(
|
45
|
+
id_=0,
|
46
|
+
column_name="customer_id",
|
47
|
+
column_type="INT",
|
48
|
+
values=["1", "2", "3"],
|
49
|
+
),
|
30
50
|
Column(id_=1, column_name="customer_name", column_type="STRING"),
|
31
51
|
],
|
32
52
|
)
|
@@ -34,15 +54,35 @@ def test_infer_relationships_uses_pk_candidate() -> None:
|
|
34
54
|
id_=1,
|
35
55
|
name="ORDERS",
|
36
56
|
columns=[
|
37
|
-
Column(
|
38
|
-
|
57
|
+
Column(
|
58
|
+
id_=0,
|
59
|
+
column_name="order_id",
|
60
|
+
column_type="INT",
|
61
|
+
values=["10", "11", "12"],
|
62
|
+
),
|
63
|
+
Column(
|
64
|
+
id_=1,
|
65
|
+
column_name="customer_id",
|
66
|
+
column_type="INT",
|
67
|
+
values=["1", "2", "1"],
|
68
|
+
),
|
39
69
|
],
|
40
70
|
)
|
41
71
|
|
42
72
|
relationships = generate_model._infer_relationships(
|
43
73
|
[
|
44
|
-
(
|
45
|
-
|
74
|
+
(
|
75
|
+
FQNParts(
|
76
|
+
database="QUICK_START", schema_name="MCP_DEMO", table="CUSTOMERS"
|
77
|
+
),
|
78
|
+
customers_table,
|
79
|
+
),
|
80
|
+
(
|
81
|
+
FQNParts(
|
82
|
+
database="QUICK_START", schema_name="MCP_DEMO", table="ORDERS"
|
83
|
+
),
|
84
|
+
orders_table,
|
85
|
+
),
|
46
86
|
]
|
47
87
|
)
|
48
88
|
|
@@ -61,23 +101,46 @@ def test_infer_relationships_matches_synonym_keys() -> None:
|
|
61
101
|
id_=0,
|
62
102
|
name="ORDERS",
|
63
103
|
columns=[
|
64
|
-
Column(
|
65
|
-
|
104
|
+
Column(
|
105
|
+
id_=0,
|
106
|
+
column_name="o_orderkey",
|
107
|
+
column_type="INT",
|
108
|
+
values=["1", "2", "3"],
|
109
|
+
),
|
110
|
+
Column(
|
111
|
+
id_=1,
|
112
|
+
column_name="o_custkey",
|
113
|
+
column_type="INT",
|
114
|
+
values=["10", "20", "30"],
|
115
|
+
),
|
66
116
|
],
|
67
117
|
)
|
68
118
|
lineitem_table = Table(
|
69
119
|
id_=1,
|
70
120
|
name="LINEITEM",
|
71
121
|
columns=[
|
72
|
-
Column(
|
73
|
-
|
122
|
+
Column(
|
123
|
+
id_=0,
|
124
|
+
column_name="l_orderkey",
|
125
|
+
column_type="INT",
|
126
|
+
values=["1", "1", "2"],
|
127
|
+
),
|
128
|
+
Column(
|
129
|
+
id_=1,
|
130
|
+
column_name="l_linenumber",
|
131
|
+
column_type="INT",
|
132
|
+
values=["1", "2", "1"],
|
133
|
+
),
|
74
134
|
],
|
75
135
|
)
|
76
136
|
|
77
137
|
relationships = generate_model._infer_relationships(
|
78
138
|
[
|
79
139
|
(FQNParts(database="CAT", schema_name="SCH", table="ORDERS"), orders_table),
|
80
|
-
(
|
140
|
+
(
|
141
|
+
FQNParts(database="CAT", schema_name="SCH", table="LINEITEM"),
|
142
|
+
lineitem_table,
|
143
|
+
),
|
81
144
|
]
|
82
145
|
)
|
83
146
|
|
@@ -95,22 +158,40 @@ def test_infer_relationships_handles_part_supplier() -> None:
|
|
95
158
|
id_=0,
|
96
159
|
name="PART",
|
97
160
|
columns=[
|
98
|
-
Column(
|
161
|
+
Column(
|
162
|
+
id_=0,
|
163
|
+
column_name="p_partkey",
|
164
|
+
column_type="INT",
|
165
|
+
values=["1", "2", "3"],
|
166
|
+
),
|
99
167
|
],
|
100
168
|
)
|
101
169
|
partsupp_table = Table(
|
102
170
|
id_=1,
|
103
171
|
name="PARTSUPP",
|
104
172
|
columns=[
|
105
|
-
Column(
|
106
|
-
|
173
|
+
Column(
|
174
|
+
id_=0,
|
175
|
+
column_name="ps_partkey",
|
176
|
+
column_type="INT",
|
177
|
+
values=["1", "1", "2"],
|
178
|
+
),
|
179
|
+
Column(
|
180
|
+
id_=1,
|
181
|
+
column_name="ps_suppkey",
|
182
|
+
column_type="INT",
|
183
|
+
values=["10", "20", "30"],
|
184
|
+
),
|
107
185
|
],
|
108
186
|
)
|
109
187
|
|
110
188
|
relationships = generate_model._infer_relationships(
|
111
189
|
[
|
112
190
|
(FQNParts(database="CAT", schema_name="SCH", table="PART"), part_table),
|
113
|
-
(
|
191
|
+
(
|
192
|
+
FQNParts(database="CAT", schema_name="SCH", table="PARTSUPP"),
|
193
|
+
partsupp_table,
|
194
|
+
),
|
114
195
|
]
|
115
196
|
)
|
116
197
|
|
@@ -128,15 +209,30 @@ def test_infer_relationships_orders_customer() -> None:
|
|
128
209
|
id_=0,
|
129
210
|
name="ORDERS",
|
130
211
|
columns=[
|
131
|
-
Column(
|
132
|
-
|
212
|
+
Column(
|
213
|
+
id_=0,
|
214
|
+
column_name="o_orderkey",
|
215
|
+
column_type="INT",
|
216
|
+
values=["1", "2", "3"],
|
217
|
+
),
|
218
|
+
Column(
|
219
|
+
id_=1,
|
220
|
+
column_name="o_custkey",
|
221
|
+
column_type="INT",
|
222
|
+
values=["10", "20", "30"],
|
223
|
+
),
|
133
224
|
],
|
134
225
|
)
|
135
226
|
customer_table = Table(
|
136
227
|
id_=1,
|
137
228
|
name="CUSTOMER",
|
138
229
|
columns=[
|
139
|
-
Column(
|
230
|
+
Column(
|
231
|
+
id_=0,
|
232
|
+
column_name="c_custkey",
|
233
|
+
column_type="INT",
|
234
|
+
values=["10", "20", "30"],
|
235
|
+
),
|
140
236
|
Column(id_=1, column_name="c_name", column_type="STRING"),
|
141
237
|
],
|
142
238
|
)
|
@@ -144,7 +240,10 @@ def test_infer_relationships_orders_customer() -> None:
|
|
144
240
|
relationships = generate_model._infer_relationships(
|
145
241
|
[
|
146
242
|
(FQNParts(database="CAT", schema_name="SCH", table="ORDERS"), orders_table),
|
147
|
-
(
|
243
|
+
(
|
244
|
+
FQNParts(database="CAT", schema_name="SCH", table="CUSTOMER"),
|
245
|
+
customer_table,
|
246
|
+
),
|
148
247
|
]
|
149
248
|
)
|
150
249
|
|
@@ -162,23 +261,44 @@ def test_infer_relationships_lineitem_supplier() -> None:
|
|
162
261
|
id_=0,
|
163
262
|
name="LINEITEM",
|
164
263
|
columns=[
|
165
|
-
Column(
|
166
|
-
|
264
|
+
Column(
|
265
|
+
id_=0,
|
266
|
+
column_name="l_orderkey",
|
267
|
+
column_type="INT",
|
268
|
+
values=["1", "2", "3"],
|
269
|
+
),
|
270
|
+
Column(
|
271
|
+
id_=1,
|
272
|
+
column_name="l_suppkey",
|
273
|
+
column_type="INT",
|
274
|
+
values=["100", "101", "102"],
|
275
|
+
),
|
167
276
|
],
|
168
277
|
)
|
169
278
|
supplier_table = Table(
|
170
279
|
id_=1,
|
171
280
|
name="SUPPLIER",
|
172
281
|
columns=[
|
173
|
-
Column(
|
282
|
+
Column(
|
283
|
+
id_=0,
|
284
|
+
column_name="s_suppkey",
|
285
|
+
column_type="INT",
|
286
|
+
values=["100", "101", "102"],
|
287
|
+
),
|
174
288
|
Column(id_=1, column_name="s_name", column_type="STRING"),
|
175
289
|
],
|
176
290
|
)
|
177
291
|
|
178
292
|
relationships = generate_model._infer_relationships(
|
179
293
|
[
|
180
|
-
(
|
181
|
-
|
294
|
+
(
|
295
|
+
FQNParts(database="CAT", schema_name="SCH", table="LINEITEM"),
|
296
|
+
lineitem_table,
|
297
|
+
),
|
298
|
+
(
|
299
|
+
FQNParts(database="CAT", schema_name="SCH", table="SUPPLIER"),
|
300
|
+
supplier_table,
|
301
|
+
),
|
182
302
|
]
|
183
303
|
)
|
184
304
|
|
@@ -196,7 +316,13 @@ def test_infer_relationships_handles_suffix_based_foreign_keys() -> None:
|
|
196
316
|
id_=0,
|
197
317
|
name="DIM_DATE",
|
198
318
|
columns=[
|
199
|
-
Column(
|
319
|
+
Column(
|
320
|
+
id_=0,
|
321
|
+
column_name="date_id",
|
322
|
+
column_type="INT",
|
323
|
+
values=["20240101", "20240102"],
|
324
|
+
is_primary_key=True,
|
325
|
+
),
|
200
326
|
Column(id_=1, column_name="date_value", column_type="DATE"),
|
201
327
|
],
|
202
328
|
)
|
@@ -204,15 +330,25 @@ def test_infer_relationships_handles_suffix_based_foreign_keys() -> None:
|
|
204
330
|
id_=1,
|
205
331
|
name="FACT_SALES",
|
206
332
|
columns=[
|
207
|
-
Column(
|
208
|
-
|
333
|
+
Column(
|
334
|
+
id_=0, column_name="order_id", column_type="INT", values=["10", "11"]
|
335
|
+
),
|
336
|
+
Column(
|
337
|
+
id_=1,
|
338
|
+
column_name="order_date_id",
|
339
|
+
column_type="INT",
|
340
|
+
values=["20240101", "20240102"],
|
341
|
+
),
|
209
342
|
],
|
210
343
|
)
|
211
344
|
|
212
345
|
relationships = generate_model._infer_relationships(
|
213
346
|
[
|
214
347
|
(FQNParts(database="CAT", schema_name="SCH", table="DIM_DATE"), dim_date),
|
215
|
-
(
|
348
|
+
(
|
349
|
+
FQNParts(database="CAT", schema_name="SCH", table="FACT_SALES"),
|
350
|
+
fact_sales,
|
351
|
+
),
|
216
352
|
]
|
217
353
|
)
|
218
354
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import pytest
|
2
2
|
|
3
|
-
from semantic_model_generator.data_processing.data_types import FQNParts
|
4
3
|
from semantic_model_generator.clickzetta_utils.utils import create_fqn_table
|
4
|
+
from semantic_model_generator.data_processing.data_types import FQNParts
|
5
5
|
|
6
6
|
|
7
7
|
def test_fqn_creation():
|