clickzetta-semantic-model-generator 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.4.dist-info}/METADATA +5 -5
  2. clickzetta_semantic_model_generator-1.0.4.dist-info/RECORD +38 -0
  3. semantic_model_generator/clickzetta_utils/clickzetta_connector.py +100 -48
  4. semantic_model_generator/clickzetta_utils/env_vars.py +7 -2
  5. semantic_model_generator/clickzetta_utils/utils.py +44 -2
  6. semantic_model_generator/data_processing/cte_utils.py +44 -14
  7. semantic_model_generator/generate_model.py +711 -239
  8. semantic_model_generator/llm/dashscope_client.py +4 -2
  9. semantic_model_generator/llm/enrichment.py +144 -57
  10. semantic_model_generator/llm/progress_tracker.py +16 -15
  11. semantic_model_generator/relationships/__init__.py +2 -0
  12. semantic_model_generator/relationships/discovery.py +181 -16
  13. semantic_model_generator/tests/clickzetta_connector_test.py +3 -7
  14. semantic_model_generator/tests/cte_utils_test.py +15 -14
  15. semantic_model_generator/tests/generate_model_classification_test.py +12 -2
  16. semantic_model_generator/tests/llm_enrichment_test.py +152 -46
  17. semantic_model_generator/tests/relationship_discovery_test.py +70 -3
  18. semantic_model_generator/tests/relationships_filters_test.py +166 -30
  19. semantic_model_generator/tests/utils_test.py +1 -1
  20. semantic_model_generator/validate/keywords.py +453 -53
  21. semantic_model_generator/validate/schema.py +4 -2
  22. clickzetta_semantic_model_generator-1.0.2.dist-info/RECORD +0 -38
  23. {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.4.dist-info}/LICENSE +0 -0
  24. {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.4.dist-info}/WHEEL +0 -0
@@ -7,9 +7,24 @@ def test_suggest_filters_builds_in_clause_and_time_filter() -> None:
7
7
  id_=0,
8
8
  name="ORDERS",
9
9
  columns=[
10
- Column(id_=0, column_name="status", column_type="STRING", values=["OPEN", "CLOSED", "OPEN"]),
11
- Column(id_=1, column_name="order_date", column_type="TIMESTAMP", values=["2024-01-01", "2024-01-15"]),
12
- Column(id_=2, column_name="created_at", column_type="STRING", values=["2024-01-05 10:00:00"]),
10
+ Column(
11
+ id_=0,
12
+ column_name="status",
13
+ column_type="STRING",
14
+ values=["OPEN", "CLOSED", "OPEN"],
15
+ ),
16
+ Column(
17
+ id_=1,
18
+ column_name="order_date",
19
+ column_type="TIMESTAMP",
20
+ values=["2024-01-01", "2024-01-15"],
21
+ ),
22
+ Column(
23
+ id_=2,
24
+ column_name="created_at",
25
+ column_type="STRING",
26
+ values=["2024-01-05 10:00:00"],
27
+ ),
13
28
  ],
14
29
  )
15
30
 
@@ -26,7 +41,12 @@ def test_infer_relationships_uses_pk_candidate() -> None:
26
41
  id_=0,
27
42
  name="CUSTOMERS",
28
43
  columns=[
29
- Column(id_=0, column_name="customer_id", column_type="INT", values=["1", "2", "3"]),
44
+ Column(
45
+ id_=0,
46
+ column_name="customer_id",
47
+ column_type="INT",
48
+ values=["1", "2", "3"],
49
+ ),
30
50
  Column(id_=1, column_name="customer_name", column_type="STRING"),
31
51
  ],
32
52
  )
@@ -34,15 +54,35 @@ def test_infer_relationships_uses_pk_candidate() -> None:
34
54
  id_=1,
35
55
  name="ORDERS",
36
56
  columns=[
37
- Column(id_=0, column_name="order_id", column_type="INT", values=["10", "11", "12"]),
38
- Column(id_=1, column_name="customer_id", column_type="INT", values=["1", "2", "1"]),
57
+ Column(
58
+ id_=0,
59
+ column_name="order_id",
60
+ column_type="INT",
61
+ values=["10", "11", "12"],
62
+ ),
63
+ Column(
64
+ id_=1,
65
+ column_name="customer_id",
66
+ column_type="INT",
67
+ values=["1", "2", "1"],
68
+ ),
39
69
  ],
40
70
  )
41
71
 
42
72
  relationships = generate_model._infer_relationships(
43
73
  [
44
- (FQNParts(database="QUICK_START", schema_name="MCP_DEMO", table="CUSTOMERS"), customers_table),
45
- (FQNParts(database="QUICK_START", schema_name="MCP_DEMO", table="ORDERS"), orders_table),
74
+ (
75
+ FQNParts(
76
+ database="QUICK_START", schema_name="MCP_DEMO", table="CUSTOMERS"
77
+ ),
78
+ customers_table,
79
+ ),
80
+ (
81
+ FQNParts(
82
+ database="QUICK_START", schema_name="MCP_DEMO", table="ORDERS"
83
+ ),
84
+ orders_table,
85
+ ),
46
86
  ]
47
87
  )
48
88
 
@@ -61,23 +101,46 @@ def test_infer_relationships_matches_synonym_keys() -> None:
61
101
  id_=0,
62
102
  name="ORDERS",
63
103
  columns=[
64
- Column(id_=0, column_name="o_orderkey", column_type="INT", values=["1", "2", "3"]),
65
- Column(id_=1, column_name="o_custkey", column_type="INT", values=["10", "20", "30"]),
104
+ Column(
105
+ id_=0,
106
+ column_name="o_orderkey",
107
+ column_type="INT",
108
+ values=["1", "2", "3"],
109
+ ),
110
+ Column(
111
+ id_=1,
112
+ column_name="o_custkey",
113
+ column_type="INT",
114
+ values=["10", "20", "30"],
115
+ ),
66
116
  ],
67
117
  )
68
118
  lineitem_table = Table(
69
119
  id_=1,
70
120
  name="LINEITEM",
71
121
  columns=[
72
- Column(id_=0, column_name="l_orderkey", column_type="INT", values=["1", "1", "2"]),
73
- Column(id_=1, column_name="l_linenumber", column_type="INT", values=["1", "2", "1"]),
122
+ Column(
123
+ id_=0,
124
+ column_name="l_orderkey",
125
+ column_type="INT",
126
+ values=["1", "1", "2"],
127
+ ),
128
+ Column(
129
+ id_=1,
130
+ column_name="l_linenumber",
131
+ column_type="INT",
132
+ values=["1", "2", "1"],
133
+ ),
74
134
  ],
75
135
  )
76
136
 
77
137
  relationships = generate_model._infer_relationships(
78
138
  [
79
139
  (FQNParts(database="CAT", schema_name="SCH", table="ORDERS"), orders_table),
80
- (FQNParts(database="CAT", schema_name="SCH", table="LINEITEM"), lineitem_table),
140
+ (
141
+ FQNParts(database="CAT", schema_name="SCH", table="LINEITEM"),
142
+ lineitem_table,
143
+ ),
81
144
  ]
82
145
  )
83
146
 
@@ -95,22 +158,40 @@ def test_infer_relationships_handles_part_supplier() -> None:
95
158
  id_=0,
96
159
  name="PART",
97
160
  columns=[
98
- Column(id_=0, column_name="p_partkey", column_type="INT", values=["1", "2", "3"]),
161
+ Column(
162
+ id_=0,
163
+ column_name="p_partkey",
164
+ column_type="INT",
165
+ values=["1", "2", "3"],
166
+ ),
99
167
  ],
100
168
  )
101
169
  partsupp_table = Table(
102
170
  id_=1,
103
171
  name="PARTSUPP",
104
172
  columns=[
105
- Column(id_=0, column_name="ps_partkey", column_type="INT", values=["1", "1", "2"]),
106
- Column(id_=1, column_name="ps_suppkey", column_type="INT", values=["10", "20", "30"]),
173
+ Column(
174
+ id_=0,
175
+ column_name="ps_partkey",
176
+ column_type="INT",
177
+ values=["1", "1", "2"],
178
+ ),
179
+ Column(
180
+ id_=1,
181
+ column_name="ps_suppkey",
182
+ column_type="INT",
183
+ values=["10", "20", "30"],
184
+ ),
107
185
  ],
108
186
  )
109
187
 
110
188
  relationships = generate_model._infer_relationships(
111
189
  [
112
190
  (FQNParts(database="CAT", schema_name="SCH", table="PART"), part_table),
113
- (FQNParts(database="CAT", schema_name="SCH", table="PARTSUPP"), partsupp_table),
191
+ (
192
+ FQNParts(database="CAT", schema_name="SCH", table="PARTSUPP"),
193
+ partsupp_table,
194
+ ),
114
195
  ]
115
196
  )
116
197
 
@@ -128,15 +209,30 @@ def test_infer_relationships_orders_customer() -> None:
128
209
  id_=0,
129
210
  name="ORDERS",
130
211
  columns=[
131
- Column(id_=0, column_name="o_orderkey", column_type="INT", values=["1", "2", "3"]),
132
- Column(id_=1, column_name="o_custkey", column_type="INT", values=["10", "20", "30"]),
212
+ Column(
213
+ id_=0,
214
+ column_name="o_orderkey",
215
+ column_type="INT",
216
+ values=["1", "2", "3"],
217
+ ),
218
+ Column(
219
+ id_=1,
220
+ column_name="o_custkey",
221
+ column_type="INT",
222
+ values=["10", "20", "30"],
223
+ ),
133
224
  ],
134
225
  )
135
226
  customer_table = Table(
136
227
  id_=1,
137
228
  name="CUSTOMER",
138
229
  columns=[
139
- Column(id_=0, column_name="c_custkey", column_type="INT", values=["10", "20", "30"]),
230
+ Column(
231
+ id_=0,
232
+ column_name="c_custkey",
233
+ column_type="INT",
234
+ values=["10", "20", "30"],
235
+ ),
140
236
  Column(id_=1, column_name="c_name", column_type="STRING"),
141
237
  ],
142
238
  )
@@ -144,7 +240,10 @@ def test_infer_relationships_orders_customer() -> None:
144
240
  relationships = generate_model._infer_relationships(
145
241
  [
146
242
  (FQNParts(database="CAT", schema_name="SCH", table="ORDERS"), orders_table),
147
- (FQNParts(database="CAT", schema_name="SCH", table="CUSTOMER"), customer_table),
243
+ (
244
+ FQNParts(database="CAT", schema_name="SCH", table="CUSTOMER"),
245
+ customer_table,
246
+ ),
148
247
  ]
149
248
  )
150
249
 
@@ -162,23 +261,44 @@ def test_infer_relationships_lineitem_supplier() -> None:
162
261
  id_=0,
163
262
  name="LINEITEM",
164
263
  columns=[
165
- Column(id_=0, column_name="l_orderkey", column_type="INT", values=["1", "2", "3"]),
166
- Column(id_=1, column_name="l_suppkey", column_type="INT", values=["100", "101", "102"]),
264
+ Column(
265
+ id_=0,
266
+ column_name="l_orderkey",
267
+ column_type="INT",
268
+ values=["1", "2", "3"],
269
+ ),
270
+ Column(
271
+ id_=1,
272
+ column_name="l_suppkey",
273
+ column_type="INT",
274
+ values=["100", "101", "102"],
275
+ ),
167
276
  ],
168
277
  )
169
278
  supplier_table = Table(
170
279
  id_=1,
171
280
  name="SUPPLIER",
172
281
  columns=[
173
- Column(id_=0, column_name="s_suppkey", column_type="INT", values=["100", "101", "102"]),
282
+ Column(
283
+ id_=0,
284
+ column_name="s_suppkey",
285
+ column_type="INT",
286
+ values=["100", "101", "102"],
287
+ ),
174
288
  Column(id_=1, column_name="s_name", column_type="STRING"),
175
289
  ],
176
290
  )
177
291
 
178
292
  relationships = generate_model._infer_relationships(
179
293
  [
180
- (FQNParts(database="CAT", schema_name="SCH", table="LINEITEM"), lineitem_table),
181
- (FQNParts(database="CAT", schema_name="SCH", table="SUPPLIER"), supplier_table),
294
+ (
295
+ FQNParts(database="CAT", schema_name="SCH", table="LINEITEM"),
296
+ lineitem_table,
297
+ ),
298
+ (
299
+ FQNParts(database="CAT", schema_name="SCH", table="SUPPLIER"),
300
+ supplier_table,
301
+ ),
182
302
  ]
183
303
  )
184
304
 
@@ -196,7 +316,13 @@ def test_infer_relationships_handles_suffix_based_foreign_keys() -> None:
196
316
  id_=0,
197
317
  name="DIM_DATE",
198
318
  columns=[
199
- Column(id_=0, column_name="date_id", column_type="INT", values=["20240101", "20240102"], is_primary_key=True),
319
+ Column(
320
+ id_=0,
321
+ column_name="date_id",
322
+ column_type="INT",
323
+ values=["20240101", "20240102"],
324
+ is_primary_key=True,
325
+ ),
200
326
  Column(id_=1, column_name="date_value", column_type="DATE"),
201
327
  ],
202
328
  )
@@ -204,15 +330,25 @@ def test_infer_relationships_handles_suffix_based_foreign_keys() -> None:
204
330
  id_=1,
205
331
  name="FACT_SALES",
206
332
  columns=[
207
- Column(id_=0, column_name="order_id", column_type="INT", values=["10", "11"]),
208
- Column(id_=1, column_name="order_date_id", column_type="INT", values=["20240101", "20240102"]),
333
+ Column(
334
+ id_=0, column_name="order_id", column_type="INT", values=["10", "11"]
335
+ ),
336
+ Column(
337
+ id_=1,
338
+ column_name="order_date_id",
339
+ column_type="INT",
340
+ values=["20240101", "20240102"],
341
+ ),
209
342
  ],
210
343
  )
211
344
 
212
345
  relationships = generate_model._infer_relationships(
213
346
  [
214
347
  (FQNParts(database="CAT", schema_name="SCH", table="DIM_DATE"), dim_date),
215
- (FQNParts(database="CAT", schema_name="SCH", table="FACT_SALES"), fact_sales),
348
+ (
349
+ FQNParts(database="CAT", schema_name="SCH", table="FACT_SALES"),
350
+ fact_sales,
351
+ ),
216
352
  ]
217
353
  )
218
354
 
@@ -1,7 +1,7 @@
1
1
  import pytest
2
2
 
3
- from semantic_model_generator.data_processing.data_types import FQNParts
4
3
  from semantic_model_generator.clickzetta_utils.utils import create_fqn_table
4
+ from semantic_model_generator.data_processing.data_types import FQNParts
5
5
 
6
6
 
7
7
  def test_fqn_creation():