clickzetta-semantic-model-generator 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clickzetta_semantic_model_generator-1.0.1.dist-info → clickzetta_semantic_model_generator-1.0.3.dist-info}/METADATA +5 -5
- {clickzetta_semantic_model_generator-1.0.1.dist-info → clickzetta_semantic_model_generator-1.0.3.dist-info}/RECORD +22 -19
- semantic_model_generator/clickzetta_utils/clickzetta_connector.py +91 -33
- semantic_model_generator/clickzetta_utils/env_vars.py +7 -2
- semantic_model_generator/data_processing/cte_utils.py +1 -1
- semantic_model_generator/generate_model.py +588 -224
- semantic_model_generator/llm/dashscope_client.py +4 -2
- semantic_model_generator/llm/enrichment.py +144 -57
- semantic_model_generator/llm/progress_tracker.py +16 -15
- semantic_model_generator/relationships/__init__.py +15 -0
- semantic_model_generator/relationships/discovery.py +202 -0
- semantic_model_generator/tests/clickzetta_connector_test.py +3 -7
- semantic_model_generator/tests/cte_utils_test.py +1 -1
- semantic_model_generator/tests/generate_model_classification_test.py +12 -2
- semantic_model_generator/tests/llm_enrichment_test.py +152 -46
- semantic_model_generator/tests/relationship_discovery_test.py +114 -0
- semantic_model_generator/tests/relationships_filters_test.py +166 -30
- semantic_model_generator/tests/utils_test.py +1 -1
- semantic_model_generator/validate/keywords.py +453 -53
- semantic_model_generator/validate/schema.py +4 -2
- {clickzetta_semantic_model_generator-1.0.1.dist-info → clickzetta_semantic_model_generator-1.0.3.dist-info}/LICENSE +0 -0
- {clickzetta_semantic_model_generator-1.0.1.dist-info → clickzetta_semantic_model_generator-1.0.3.dist-info}/WHEEL +0 -0
@@ -1,57 +1,457 @@
|
|
1
1
|
# ClickZetta SQL reserved words mirrored from clickzetta.zettapark._internal.reserved_words.
|
2
2
|
# Reference: https://yunqi.tech/documents/sql-reference
|
3
3
|
CZ_RESERVED_WORDS = {
|
4
|
-
"ABS",
|
5
|
-
"
|
6
|
-
"
|
7
|
-
"
|
8
|
-
"
|
9
|
-
"
|
10
|
-
"
|
11
|
-
"
|
12
|
-
"
|
13
|
-
"
|
14
|
-
"
|
15
|
-
"
|
16
|
-
"
|
17
|
-
"
|
18
|
-
"
|
19
|
-
"
|
20
|
-
"
|
21
|
-
"
|
22
|
-
"
|
23
|
-
"
|
24
|
-
"
|
25
|
-
"
|
26
|
-
"
|
27
|
-
"
|
28
|
-
"
|
29
|
-
"
|
30
|
-
"
|
31
|
-
"
|
32
|
-
"
|
33
|
-
"
|
34
|
-
"
|
35
|
-
"
|
36
|
-
"
|
37
|
-
"
|
38
|
-
"
|
39
|
-
"
|
40
|
-
"
|
41
|
-
"
|
42
|
-
"
|
43
|
-
"
|
44
|
-
"
|
45
|
-
"
|
46
|
-
"
|
47
|
-
"
|
48
|
-
"
|
49
|
-
"
|
50
|
-
"
|
51
|
-
"
|
52
|
-
"
|
53
|
-
"
|
54
|
-
"
|
55
|
-
"
|
56
|
-
"
|
4
|
+
"ABS",
|
5
|
+
"ABSENT",
|
6
|
+
"ABSOLUTE",
|
7
|
+
"ACOS",
|
8
|
+
"ACTION",
|
9
|
+
"ADD",
|
10
|
+
"ALL",
|
11
|
+
"ALLOCATE",
|
12
|
+
"ALTER",
|
13
|
+
"AND",
|
14
|
+
"ANY",
|
15
|
+
"ARE",
|
16
|
+
"ARRAY",
|
17
|
+
"ARRAY_AGG",
|
18
|
+
"ARRAY_MAX_CARDINALITY",
|
19
|
+
"AS",
|
20
|
+
"ASC",
|
21
|
+
"ASENSITIVE",
|
22
|
+
"ASIN",
|
23
|
+
"ASSERTION",
|
24
|
+
"ASYMMETRIC",
|
25
|
+
"AT",
|
26
|
+
"ATAN",
|
27
|
+
"ATOMIC",
|
28
|
+
"AUTHORIZATION",
|
29
|
+
"AVG",
|
30
|
+
"BEGIN",
|
31
|
+
"BEGIN_FRAME",
|
32
|
+
"BEGIN_PARTITION",
|
33
|
+
"BETWEEN",
|
34
|
+
"BIGINT",
|
35
|
+
"BINARY",
|
36
|
+
"BIT",
|
37
|
+
"BIT_LENGTH",
|
38
|
+
"BLOB",
|
39
|
+
"BOOLEAN",
|
40
|
+
"BOTH",
|
41
|
+
"BY",
|
42
|
+
"CALL",
|
43
|
+
"CALLED",
|
44
|
+
"CARDINALITY",
|
45
|
+
"CASCADE",
|
46
|
+
"CASCADED",
|
47
|
+
"CASE",
|
48
|
+
"CAST",
|
49
|
+
"CATALOG",
|
50
|
+
"CEIL",
|
51
|
+
"CEILING",
|
52
|
+
"CHAR",
|
53
|
+
"CHARACTER",
|
54
|
+
"CHARACTER_LENGTH",
|
55
|
+
"CHAR_LENGTH",
|
56
|
+
"CHECK",
|
57
|
+
"CLASSIFIER",
|
58
|
+
"CLOB",
|
59
|
+
"CLOSE",
|
60
|
+
"COALESCE",
|
61
|
+
"COLLATE",
|
62
|
+
"COLLECT",
|
63
|
+
"COLUMN",
|
64
|
+
"COMMIT",
|
65
|
+
"CONDITION",
|
66
|
+
"CONNECT",
|
67
|
+
"CONNECTION",
|
68
|
+
"CONSTRAINT",
|
69
|
+
"CONTAINS",
|
70
|
+
"CONVERT",
|
71
|
+
"COPY",
|
72
|
+
"CORR",
|
73
|
+
"CORRESPONDING",
|
74
|
+
"COS",
|
75
|
+
"COSH",
|
76
|
+
"COUNT",
|
77
|
+
"COVAR_POP",
|
78
|
+
"COVAR_SAMP",
|
79
|
+
"CREATE",
|
80
|
+
"CROSS",
|
81
|
+
"CUBE",
|
82
|
+
"CUME_DIST",
|
83
|
+
"CURRENT",
|
84
|
+
"CURRENT_CATALOG",
|
85
|
+
"CURRENT_DATE",
|
86
|
+
"CURRENT_DEFAULT_TRANSFORM_GROUP",
|
87
|
+
"CURRENT_PATH",
|
88
|
+
"CURRENT_ROLE",
|
89
|
+
"CURRENT_ROW",
|
90
|
+
"CURRENT_SCHEMA",
|
91
|
+
"CURRENT_TIME",
|
92
|
+
"CURRENT_TIMESTAMP",
|
93
|
+
"CURRENT_TRANSFORM_GROUP_FOR_TYPE",
|
94
|
+
"CURRENT_USER",
|
95
|
+
"CURSOR",
|
96
|
+
"CYCLE",
|
97
|
+
"DATALINK",
|
98
|
+
"DATE",
|
99
|
+
"DAY",
|
100
|
+
"DEALLOCATE",
|
101
|
+
"DEC",
|
102
|
+
"DECFLOAT",
|
103
|
+
"DECIMAL",
|
104
|
+
"DECLARE",
|
105
|
+
"DEFAULT",
|
106
|
+
"DEFINE",
|
107
|
+
"DELETE",
|
108
|
+
"DENSE_RANK",
|
109
|
+
"DEREF",
|
110
|
+
"DESCRIBE",
|
111
|
+
"DETERMINISTIC",
|
112
|
+
"DIAGNOSTICS",
|
113
|
+
"DISCONNECT",
|
114
|
+
"DISTINCT",
|
115
|
+
"DLNEWCOPY",
|
116
|
+
"DLPREVIOUSCOPY",
|
117
|
+
"DLURLCOMPLETE",
|
118
|
+
"DLURLCOMPLETEONLY",
|
119
|
+
"DLURLCOMPLETEWRITE",
|
120
|
+
"DLURLPATH",
|
121
|
+
"DLURLPATHONLY",
|
122
|
+
"DLURLPATHWRITE",
|
123
|
+
"DLURLSCHEME",
|
124
|
+
"DLURLSERVER",
|
125
|
+
"DLVALUE",
|
126
|
+
"DOMAIN",
|
127
|
+
"DOUBLE",
|
128
|
+
"DROP",
|
129
|
+
"DYNAMIC",
|
130
|
+
"EACH",
|
131
|
+
"ELEMENT",
|
132
|
+
"ELSE",
|
133
|
+
"EMPTY",
|
134
|
+
"END",
|
135
|
+
"END-EXEC",
|
136
|
+
"END_FRAME",
|
137
|
+
"END_PARTITION",
|
138
|
+
"EQUALS",
|
139
|
+
"ESCAPE",
|
140
|
+
"EVERY",
|
141
|
+
"EXCEPT",
|
142
|
+
"EXCEPTION",
|
143
|
+
"EXEC",
|
144
|
+
"EXECUTE",
|
145
|
+
"EXISTS",
|
146
|
+
"EXP",
|
147
|
+
"EXTERNAL",
|
148
|
+
"EXTRACT",
|
149
|
+
"FALSE",
|
150
|
+
"FETCH",
|
151
|
+
"FILTER",
|
152
|
+
"FIRST",
|
153
|
+
"FIRST_VALUE",
|
154
|
+
"FLOAT",
|
155
|
+
"FLOOR",
|
156
|
+
"FOR",
|
157
|
+
"FOREIGN",
|
158
|
+
"FRAME_ROW",
|
159
|
+
"FREE",
|
160
|
+
"FROM",
|
161
|
+
"FULL",
|
162
|
+
"FUNCTION",
|
163
|
+
"FUSION",
|
164
|
+
"GET",
|
165
|
+
"GLOBAL",
|
166
|
+
"GO",
|
167
|
+
"GOTO",
|
168
|
+
"GRANT",
|
169
|
+
"GROUP",
|
170
|
+
"GROUPING",
|
171
|
+
"GROUPS",
|
172
|
+
"HAVING",
|
173
|
+
"HOLD",
|
174
|
+
"HOUR",
|
175
|
+
"IDENTITY",
|
176
|
+
"IMMEDIATE",
|
177
|
+
"IMPORT",
|
178
|
+
"IN",
|
179
|
+
"INCREMENTAL_DELETE_DELTA",
|
180
|
+
"INDICATOR",
|
181
|
+
"INITIAL",
|
182
|
+
"INITIALLY",
|
183
|
+
"INNER",
|
184
|
+
"INOUT",
|
185
|
+
"INPUT",
|
186
|
+
"INSENSITIVE",
|
187
|
+
"INSERT",
|
188
|
+
"INT",
|
189
|
+
"INTEGER",
|
190
|
+
"INTERSECT",
|
191
|
+
"INTERSECTION",
|
192
|
+
"INTERVAL",
|
193
|
+
"INTO",
|
194
|
+
"IS",
|
195
|
+
"ISOLATION",
|
196
|
+
"JOIN",
|
197
|
+
"JSON_ARRAY",
|
198
|
+
"JSON_ARRAYAGG",
|
199
|
+
"JSON_EXISTS",
|
200
|
+
"JSON_OBJECT",
|
201
|
+
"JSON_OBJECTAGG",
|
202
|
+
"JSON_QUERY",
|
203
|
+
"JSON_TABLE",
|
204
|
+
"JSON_TABLE_PRIMITIVE",
|
205
|
+
"JSON_VALUE",
|
206
|
+
"KEY",
|
207
|
+
"LAG",
|
208
|
+
"LANGUAGE",
|
209
|
+
"LARGE",
|
210
|
+
"LAST",
|
211
|
+
"LAST_VALUE",
|
212
|
+
"LATERAL",
|
213
|
+
"LEAD",
|
214
|
+
"LEADING",
|
215
|
+
"LEFT",
|
216
|
+
"LEVEL",
|
217
|
+
"LIKE",
|
218
|
+
"LIKE_REGEX",
|
219
|
+
"LISTAGG",
|
220
|
+
"LN",
|
221
|
+
"LOCAL",
|
222
|
+
"LOCALTIME",
|
223
|
+
"LOCALTIMESTAMP",
|
224
|
+
"LOG",
|
225
|
+
"LOG10",
|
226
|
+
"LOWER",
|
227
|
+
"MATCH",
|
228
|
+
"MATCHES",
|
229
|
+
"MATCH_NUMBER",
|
230
|
+
"MATCH_RECOGNIZE",
|
231
|
+
"MAX",
|
232
|
+
"MEASURES",
|
233
|
+
"MEMBER",
|
234
|
+
"MERGE",
|
235
|
+
"METHOD",
|
236
|
+
"MIN",
|
237
|
+
"MINUTE",
|
238
|
+
"MOD",
|
239
|
+
"MODIFIES",
|
240
|
+
"MODULE",
|
241
|
+
"MONTH",
|
242
|
+
"MULTISET",
|
243
|
+
"NAMES",
|
244
|
+
"NATIONAL",
|
245
|
+
"NATURAL",
|
246
|
+
"NCHAR",
|
247
|
+
"NCLOB",
|
248
|
+
"NEW",
|
249
|
+
"NEXT",
|
250
|
+
"NO",
|
251
|
+
"NONE",
|
252
|
+
"NORMALIZE",
|
253
|
+
"NOT",
|
254
|
+
"NTH_VALUE",
|
255
|
+
"NTILE",
|
256
|
+
"NULL",
|
257
|
+
"NULLIF",
|
258
|
+
"NUMERIC",
|
259
|
+
"OCCURRENCES_REGEX",
|
260
|
+
"OCTET_LENGTH",
|
261
|
+
"OF",
|
262
|
+
"OFFSET",
|
263
|
+
"OLD",
|
264
|
+
"OMIT",
|
265
|
+
"ON",
|
266
|
+
"ONE",
|
267
|
+
"ONLY",
|
268
|
+
"OPEN",
|
269
|
+
"OPTION",
|
270
|
+
"OR",
|
271
|
+
"ORDER",
|
272
|
+
"OUT",
|
273
|
+
"OUTER",
|
274
|
+
"OUTPUT",
|
275
|
+
"OVER",
|
276
|
+
"OVERLAPS",
|
277
|
+
"OVERLAY",
|
278
|
+
"PAD",
|
279
|
+
"PARAMETER",
|
280
|
+
"PARTIAL",
|
281
|
+
"PARTITION",
|
282
|
+
"PATTERN",
|
283
|
+
"PER",
|
284
|
+
"PERCENT",
|
285
|
+
"PERCENTILE_CONT",
|
286
|
+
"PERCENTILE_DISC",
|
287
|
+
"PERCENT_RANK",
|
288
|
+
"PERIOD",
|
289
|
+
"PERMUTE",
|
290
|
+
"PORTION",
|
291
|
+
"POSITION",
|
292
|
+
"POSITION_REGEX",
|
293
|
+
"POWER",
|
294
|
+
"PRECEDES",
|
295
|
+
"PRECISION",
|
296
|
+
"PREPARE",
|
297
|
+
"PRESERVE",
|
298
|
+
"PRIMARY",
|
299
|
+
"PRIOR",
|
300
|
+
"PRIVILEGES",
|
301
|
+
"PROCEDURE",
|
302
|
+
"PTF",
|
303
|
+
"PUBLIC",
|
304
|
+
"RANGE",
|
305
|
+
"RANK",
|
306
|
+
"READ",
|
307
|
+
"READS",
|
308
|
+
"REAL",
|
309
|
+
"RECURSIVE",
|
310
|
+
"REF",
|
311
|
+
"REFERENCES",
|
312
|
+
"REFERENCING",
|
313
|
+
"REGR_AVGX",
|
314
|
+
"REGR_AVGY",
|
315
|
+
"REGR_COUNT",
|
316
|
+
"REGR_INTERCEPT",
|
317
|
+
"REGR_R2",
|
318
|
+
"REGR_SLOPE",
|
319
|
+
"REGR_SXX",
|
320
|
+
"REGR_SXY",
|
321
|
+
"REGR_SYY",
|
322
|
+
"RELATIVE",
|
323
|
+
"RELEASE",
|
324
|
+
"RESTRICT",
|
325
|
+
"RESULT",
|
326
|
+
"RETURN",
|
327
|
+
"RETURNS",
|
328
|
+
"REVOKE",
|
329
|
+
"RIGHT",
|
330
|
+
"ROLLBACK",
|
331
|
+
"ROLLUP",
|
332
|
+
"ROW",
|
333
|
+
"ROWS",
|
334
|
+
"ROW_NUMBER",
|
335
|
+
"RUNNING",
|
336
|
+
"SAVEPOINT",
|
337
|
+
"SCHEMA",
|
338
|
+
"SCOPE",
|
339
|
+
"SCROLL",
|
340
|
+
"SEARCH",
|
341
|
+
"SECOND",
|
342
|
+
"SECTION",
|
343
|
+
"SEEK",
|
344
|
+
"SELECT",
|
345
|
+
"SENSITIVE",
|
346
|
+
"SESSION",
|
347
|
+
"SESSION_USER",
|
348
|
+
"SET",
|
349
|
+
"SHOW",
|
350
|
+
"SIMILAR",
|
351
|
+
"SIN",
|
352
|
+
"SINH",
|
353
|
+
"SIZE",
|
354
|
+
"SKIP",
|
355
|
+
"SMALLINT",
|
356
|
+
"SOME",
|
357
|
+
"SPACE",
|
358
|
+
"SPECIFIC",
|
359
|
+
"SPECIFICTYPE",
|
360
|
+
"SQL",
|
361
|
+
"SQLCODE",
|
362
|
+
"SQLERROR",
|
363
|
+
"SQLEXCEPTION",
|
364
|
+
"SQLSTATE",
|
365
|
+
"SQLWARNING",
|
366
|
+
"SQRT",
|
367
|
+
"START",
|
368
|
+
"STATIC",
|
369
|
+
"STDDEV_POP",
|
370
|
+
"STDDEV_SAMP",
|
371
|
+
"STREAM",
|
372
|
+
"SUBMULTISET",
|
373
|
+
"SUBSET",
|
374
|
+
"SUBSTRING",
|
375
|
+
"SUBSTRING_REGEX",
|
376
|
+
"SUCCEEDS",
|
377
|
+
"SUM",
|
378
|
+
"SYMMETRIC",
|
379
|
+
"SYSTEM",
|
380
|
+
"SYSTEM_TIME",
|
381
|
+
"SYSTEM_USER",
|
382
|
+
"TABLE",
|
383
|
+
"TABLESAMPLE",
|
384
|
+
"TAN",
|
385
|
+
"TANH",
|
386
|
+
"TEMPORARY",
|
387
|
+
"THEN",
|
388
|
+
"TIME",
|
389
|
+
"TIMESTAMP",
|
390
|
+
"TIMEZONE_HOUR",
|
391
|
+
"TIMEZONE_MINUTE",
|
392
|
+
"TO",
|
393
|
+
"TRAILING",
|
394
|
+
"TRANSACTION",
|
395
|
+
"TRANSLATE",
|
396
|
+
"TRANSLATE_REGEX",
|
397
|
+
"TRANSLATION",
|
398
|
+
"TREAT",
|
399
|
+
"TRIGGER",
|
400
|
+
"TRIM",
|
401
|
+
"TRIM_ARRAY",
|
402
|
+
"TRUE",
|
403
|
+
"TRUNCATE",
|
404
|
+
"UESCAPE",
|
405
|
+
"UNION",
|
406
|
+
"UNIQUE",
|
407
|
+
"UNKNOWN",
|
408
|
+
"UNMATCHED",
|
409
|
+
"UNNEST",
|
410
|
+
"UPDATE",
|
411
|
+
"UPPER",
|
412
|
+
"USAGE",
|
413
|
+
"USER",
|
414
|
+
"USING",
|
415
|
+
"VALUE",
|
416
|
+
"VALUES",
|
417
|
+
"VALUE_OF",
|
418
|
+
"VARBINARY",
|
419
|
+
"VARCHAR",
|
420
|
+
"VARYING",
|
421
|
+
"VAR_POP",
|
422
|
+
"VAR_SAMP",
|
423
|
+
"VERSIONING",
|
424
|
+
"VIEW",
|
425
|
+
"WHEN",
|
426
|
+
"WHENEVER",
|
427
|
+
"WHERE",
|
428
|
+
"WIDTH_BUCKET",
|
429
|
+
"WINDOW",
|
430
|
+
"WITH",
|
431
|
+
"WITHIN",
|
432
|
+
"WITHOUT",
|
433
|
+
"WORK",
|
434
|
+
"WRITE",
|
435
|
+
"XML",
|
436
|
+
"XMLAGG",
|
437
|
+
"XMLATTRIBUTES",
|
438
|
+
"XMLBINARY",
|
439
|
+
"XMLCAST",
|
440
|
+
"XMLCOMMENT",
|
441
|
+
"XMLCONCAT",
|
442
|
+
"XMLDOCUMENT",
|
443
|
+
"XMLELEMENT",
|
444
|
+
"XMLEXISTS",
|
445
|
+
"XMLFOREST",
|
446
|
+
"XMLITERATE",
|
447
|
+
"XMLNAMESPACES",
|
448
|
+
"XMLPARSE",
|
449
|
+
"XMLPI",
|
450
|
+
"XMLQUERY",
|
451
|
+
"XMLSERIALIZE",
|
452
|
+
"XMLTABLE",
|
453
|
+
"XMLTEXT",
|
454
|
+
"XMLVALIDATE",
|
455
|
+
"YEAR",
|
456
|
+
"ZONE",
|
57
457
|
}
|
@@ -22,9 +22,9 @@ from strictyaml import (
|
|
22
22
|
YAMLValidationError,
|
23
23
|
)
|
24
24
|
|
25
|
+
from semantic_model_generator.data_processing.cte_utils import ClickzettaDialect
|
25
26
|
from semantic_model_generator.protos import semantic_model_pb2
|
26
27
|
from semantic_model_generator.validate.keywords import CZ_RESERVED_WORDS
|
27
|
-
from semantic_model_generator.data_processing.cte_utils import ClickzettaDialect
|
28
28
|
|
29
29
|
scalar_type_map = {
|
30
30
|
FieldDescriptor.TYPE_BOOL: Bool,
|
@@ -53,7 +53,9 @@ class IdField(Str): # type: ignore
|
|
53
53
|
"name can only contain letters, underscores, decimal digits (0-9), and dollar signs ($).",
|
54
54
|
)
|
55
55
|
if chunk.contents.upper() in CZ_RESERVED_WORDS:
|
56
|
-
chunk.expecting_but_found(
|
56
|
+
chunk.expecting_but_found(
|
57
|
+
"", "name cannot be a ClickZetta reserved keyword"
|
58
|
+
)
|
57
59
|
return chunk.contents
|
58
60
|
|
59
61
|
|
File without changes
|
File without changes
|