@clickzetta/cz-cli-darwin-arm64 0.3.92 → 0.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
  3. package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
  4. package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
  5. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
  6. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
  7. package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
  8. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
  9. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
  10. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
  11. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
  12. package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
  13. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
  14. package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
  15. package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
  16. package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
  17. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
  18. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
  19. package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
  20. package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
  21. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
  22. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
  23. package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
  24. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
  25. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
  26. package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
  27. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
  28. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
  29. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
  30. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
  31. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
  32. package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
  33. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
  34. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
  35. package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
  36. package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
  37. package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
  38. package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
  39. package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
  40. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
  41. package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
  42. package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
  43. package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
  44. package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
  45. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
  46. package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
  47. package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
  48. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
  49. package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
  50. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
  51. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
  52. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
  53. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
  54. package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
  55. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
  56. package/package.json +1 -1
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  58. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  59. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  60. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
  61. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  62. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  63. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  64. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  65. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  66. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  67. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  68. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  69. /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
@@ -1,279 +0,0 @@
1
- # DML 完整语法参考
2
-
3
- > 基于 ClickZetta Lakehouse 产品文档整理,含与 Snowflake / Spark SQL 的差异标注
4
-
5
- ---
6
-
7
- ## ⚠️ 隐式类型转换规则(INSERT / UPDATE 通用)
8
-
9
- **ClickZetta 对写入操作(INSERT/UPDATE)严格禁止隐式类型转换,必须显式 CAST。**
10
- 但 SELECT/WHERE/表达式中允许隐式转换。
11
-
12
- ### 完整规则表(已验证)
13
-
14
- | 目标列类型 | 写入值 | INSERT/UPDATE | WHERE/SELECT |
15
- |---|---|---|---|
16
- | `DATE` | `'2024-01-15'`(字符串) | ❌ 报错 | ✅ 允许 |
17
- | `TIMESTAMP` | `'2024-01-15 12:00:00'`(字符串) | ❌ 报错 | ✅ 允许 |
18
- | `BOOLEAN` | `'true'` / `'false'`(字符串) | ❌ 报错 | ✅ 允许 |
19
- | `BOOLEAN` | `1` / `0`(整数) | ❌ 报错 | ✅ 允许 |
20
- | `JSON` | `'{"k":1}'`(字符串) | ❌ 报错 | ✅ 允许 |
21
- | `INT` / `BIGINT` | `'123'`(字符串) | ❌ 报错 | ✅ 允许 |
22
- | `BIGINT` | `100`(INT) | ✅ 允许 | ✅ 允许 |
23
- | `DOUBLE` | `1.5`(FLOAT) | ✅ 允许 | ✅ 允许 |
24
- | `BIGINT` | `1.5`(FLOAT) | ✅ 允许(截断) | ✅ 允许 |
25
-
26
- ### 各类型正确写法
27
-
28
- ```sql
29
- -- DATE(以下写法等价)
30
- INSERT INTO t VALUES (CAST('2024-01-15' AS DATE));
31
- INSERT INTO t VALUES (DATE '2024-01-15');
32
- INSERT INTO t VALUES (TO_DATE('2024-01-15'));
33
- INSERT INTO t VALUES (DATE('2024-01-15')); -- 函数形式,也支持
34
-
35
- -- TIMESTAMP(以下写法等价)
36
- INSERT INTO t VALUES (CAST('2024-01-15 12:00:00' AS TIMESTAMP));
37
- INSERT INTO t VALUES (TIMESTAMP '2024-01-15 12:00:00');
38
- INSERT INTO t VALUES (TO_TIMESTAMP('2024-01-15 12:00:00'));
39
- INSERT INTO t VALUES (TIMESTAMP('2024-01-15 12:00:00')); -- 函数形式,也支持
40
- INSERT INTO t VALUES (CURRENT_TIMESTAMP());
41
- INSERT INTO t VALUES (CURRENT_DATE() - INTERVAL 7 DAY);
42
-
43
- -- BOOLEAN(只接受 TRUE/FALSE 字面量或 CAST)
44
- INSERT INTO t VALUES (TRUE);
45
- INSERT INTO t VALUES (FALSE);
46
- INSERT INTO t VALUES (CAST(1 AS BOOLEAN));
47
- INSERT INTO t VALUES (CAST('true' AS BOOLEAN));
48
-
49
- -- JSON(必须用 PARSE_JSON 或 CAST)
50
- INSERT INTO t VALUES (PARSE_JSON('{"key":"value"}'));
51
- INSERT INTO t VALUES (CAST('{"key":"value"}' AS JSON));
52
-
53
- -- INT/BIGINT(字符串必须 CAST)
54
- INSERT INTO t VALUES (CAST('123' AS INT));
55
- INSERT INTO t VALUES (CAST('456' AS BIGINT));
56
- ```
57
-
58
- ### UPDATE 同样适用
59
-
60
- ```sql
61
- -- ❌ UPDATE 也不允许字符串隐式转换
62
- UPDATE orders SET dt = '2024-06-01' WHERE id = 1; -- 报错
63
- UPDATE orders SET flag = 0 WHERE id = 1; -- 报错
64
-
65
- -- ✅ 必须显式转换
66
- UPDATE orders SET dt = CAST('2024-06-01' AS DATE) WHERE id = 1;
67
- UPDATE orders SET flag = CAST(0 AS BOOLEAN) WHERE id = 1;
68
- ```
69
-
70
- ### WHERE 中字符串可以隐式比较
71
-
72
- ```sql
73
- -- ✅ WHERE 中允许字符串与日期/数字比较
74
- SELECT * FROM orders WHERE dt = '2024-01-15';
75
- SELECT * FROM orders WHERE dt >= '2024-01-01' AND dt < '2025-01-01';
76
- SELECT * FROM orders WHERE id = '123';
77
- ```
78
-
79
- **与 Snowflake / Spark 差异:**
80
- - Snowflake / Spark:INSERT/UPDATE 时字符串可隐式转为日期/布尔/数字类型
81
- - ClickZetta:写入时**必须显式转换**,查询时可隐式比较
82
-
83
- > **同样适用于 RESTORE TABLE**:`RESTORE TABLE t TO TIMESTAMP AS OF '2024-01-15'` 会报错,必须用 `CAST('2024-01-15 10:00:00' AS TIMESTAMP)` 或完整毫秒时间戳字符串。
84
-
85
- ---
86
-
87
- ## INSERT
88
-
89
- ```sql
90
- -- 追加(单行)
91
- INSERT INTO orders VALUES (1, 101, 100.0, 'pending');
92
- INSERT INTO orders (id, customer_id, amount) VALUES (1, 101, 100.0);
93
-
94
- -- 追加(多行)
95
- INSERT INTO orders VALUES
96
- (1, 101, 100.0, 'pending'),
97
- (2, 102, 200.0, 'completed');
98
-
99
- -- 从查询追加
100
- INSERT INTO orders SELECT * FROM staging_orders WHERE status = 'new';
101
-
102
- -- 覆盖整表
103
- INSERT OVERWRITE TABLE orders SELECT * FROM new_orders;
104
-
105
- -- 覆盖指定分区(静态分区)
106
- INSERT OVERWRITE TABLE orders PARTITION (dt = '2024-01-01')
107
- SELECT id, amount FROM staging WHERE dt = '2024-01-01';
108
-
109
- -- 动态分区(自动根据数据值分区)
110
- INSERT INTO orders PARTITION (dt)
111
- SELECT id, amount, dt FROM staging;
112
-
113
- -- 不推荐大量数据用 VALUES,适合测试
114
- ```
115
-
116
- **与 Snowflake 差异:**
117
- - Snowflake 无 `INSERT OVERWRITE`;用 `TRUNCATE` + `INSERT` 或 `MERGE` 替代
118
- - Snowflake 无 `PARTITION` 子句(Snowflake 用 CLUSTER BY 自动管理)
119
- - ClickZetta 支持 Hive 风格动态分区
120
-
121
- **与 Spark SQL 差异:**
122
- - 语法基本相同,ClickZetta 完全兼容 Spark INSERT 语法
123
-
124
- ---
125
-
126
- ## UPDATE
127
-
128
- ```sql
129
- -- 基本更新
130
- UPDATE orders SET status = 'cancelled' WHERE id = 123;
131
-
132
- -- 多列更新
133
- UPDATE orders
134
- SET status = 'completed', updated_at = current_timestamp()
135
- WHERE id = 123;
136
-
137
- -- 子查询更新
138
- UPDATE orders
139
- SET amount = amount * 1.1
140
- WHERE customer_id IN (
141
- SELECT id FROM customers WHERE tier = 'VIP'
142
- );
143
-
144
- -- 带 ORDER BY + LIMIT(分批更新)
145
- UPDATE orders
146
- SET status = 'archived'
147
- WHERE created_at < '2020-01-01'
148
- ORDER BY created_at ASC
149
- LIMIT 10000;
150
- ```
151
-
152
- **与 Snowflake 差异:**
153
- - Snowflake `UPDATE ... FROM` 语法(JOIN 更新)→ ClickZetta 用子查询替代
154
- - ClickZetta 额外支持 `ORDER BY + LIMIT`(Snowflake 不支持)
155
-
156
- **与 Spark SQL 差异:**
157
- - Spark SQL 不支持 `UPDATE`(Delta Lake 支持);ClickZetta 原生支持
158
-
159
- ---
160
-
161
- ## DELETE
162
-
163
- ```sql
164
- -- 基本删除
165
- DELETE FROM orders WHERE id = 123;
166
-
167
- -- 条件删除
168
- DELETE FROM orders WHERE created_at < '2020-01-01';
169
-
170
- -- 子查询删除
171
- DELETE FROM orders
172
- WHERE order_id IN (
173
- SELECT order_id FROM order_details WHERE status = 'cancelled'
174
- );
175
-
176
- -- 删除所有行(等价于 TRUNCATE,但会记录版本)
177
- DELETE FROM orders WHERE 1 = 1;
178
- ```
179
-
180
- **与 Snowflake 差异:**
181
- - 语法基本相同
182
-
183
- **与 Spark SQL 差异:**
184
- - Spark SQL 不支持 `DELETE`(Delta Lake 支持);ClickZetta 原生支持
185
-
186
- ---
187
-
188
- ## MERGE INTO(UPSERT)
189
-
190
- ```sql
191
- -- 标准 MERGE(⚠️ 多个 WHEN MATCHED 时,UPDATE 必须在 DELETE 之前)
192
- MERGE INTO target t
193
- USING source s ON t.id = s.id
194
- WHEN MATCHED AND s.is_deleted = 0 THEN UPDATE SET -- UPDATE 在前
195
- t.amount = s.amount,
196
- t.status = s.status,
197
- t.updated_at = current_timestamp()
198
- WHEN MATCHED AND s.is_deleted = 1 THEN DELETE -- DELETE 在后
199
- WHEN NOT MATCHED THEN INSERT (id, amount, status, created_at)
200
- VALUES (s.id, s.amount, s.status, current_timestamp());
201
-
202
- -- 多个 WHEN MATCHED(UPDATE 必须在 DELETE 前)
203
- MERGE INTO target t
204
- USING source s ON t.id = s.id
205
- WHEN MATCHED AND s.action = 'update' THEN UPDATE SET t.amount = s.amount
206
- WHEN MATCHED AND s.action = 'delete' THEN DELETE
207
- WHEN NOT MATCHED THEN INSERT VALUES (s.id, s.amount);
208
-
209
- -- 从子查询 MERGE
210
- MERGE INTO orders t
211
- USING (
212
- SELECT id, SUM(amount) AS total FROM line_items GROUP BY id
213
- ) s ON t.id = s.id
214
- WHEN MATCHED THEN UPDATE SET t.total = s.total
215
- WHEN NOT MATCHED THEN INSERT (id, total) VALUES (s.id, s.total);
216
- ```
217
-
218
- **⚠️ ClickZetta MERGE 限制:**
219
- 1. `WHEN NOT MATCHED` 只能有**一个**(Snowflake 支持多个)
220
- 2. 多个 `WHEN MATCHED` 时,`UPDATE` 必须在 `DELETE` 之前
221
- 3. 一个源行不能匹配多个目标行(否则报错)
222
-
223
- **与 Snowflake 差异:**
224
- - Snowflake 支持多个 `WHEN NOT MATCHED`;ClickZetta 只支持一个
225
- - Snowflake `MERGE ... WHEN NOT MATCHED BY SOURCE THEN DELETE`;ClickZetta 不支持
226
- - 语法结构基本相同
227
-
228
- **与 Spark SQL 差异:**
229
- - Spark SQL(Delta Lake)支持 `WHEN NOT MATCHED BY SOURCE`;ClickZetta 不支持
230
- - 语法结构基本相同
231
-
232
- ---
233
-
234
- ## COPY INTO(批量导入/导出)
235
-
236
- ```sql
237
- -- 从 Volume 导入
238
- COPY INTO orders
239
- FROM VOLUME my_oss_volume
240
- USING CSV
241
- OPTIONS('header' = 'true', 'sep' = ',')
242
- SUBDIRECTORY 'data/2024/';
243
-
244
- -- 从 Volume 导入(Parquet)
245
- COPY INTO orders
246
- FROM VOLUME my_oss_volume
247
- USING PARQUET
248
- FILES('part-00001.parquet', 'part-00002.parquet');
249
-
250
- -- 正则匹配文件
251
- COPY INTO orders
252
- FROM VOLUME my_oss_volume
253
- USING PARQUET
254
- REGEXP '.*2024-0[1-6].parquet';
255
-
256
- -- 覆盖导入
257
- COPY OVERWRITE INTO orders
258
- FROM VOLUME my_oss_volume
259
- USING CSV OPTIONS('header' = 'true');
260
-
261
- -- 导出到 Volume
262
- COPY INTO VOLUME my_oss_volume
263
- SUBDIRECTORY 'export/orders/'
264
- FROM orders
265
- USING PARQUET;
266
-
267
- -- 导出查询结果
268
- COPY INTO VOLUME my_oss_volume
269
- SUBDIRECTORY 'export/2024/'
270
- FROM (SELECT * FROM orders WHERE YEAR(created_at) = 2024)
271
- USING CSV OPTIONS('header' = 'true');
272
- ```
273
-
274
- **与 Snowflake 差异:**
275
- - Snowflake `COPY INTO t FROM @stage/path/file.csv` → ClickZetta `COPY INTO t FROM VOLUME v USING CSV`
276
- - Snowflake Stage 用 `@` 前缀;ClickZetta Volume 用对象名
277
- - Snowflake `COPY INTO @stage FROM t` → ClickZetta `COPY INTO VOLUME v FROM t`
278
- - Snowflake 支持 `PATTERN = '.*\.csv'`;ClickZetta 用 `REGEXP`
279
- - Snowflake `FILE_FORMAT = (TYPE = CSV)` → ClickZetta `USING CSV OPTIONS(...)`
@@ -1,372 +0,0 @@
1
- # 函数完整参考
2
-
3
- > 含与 Snowflake / Spark SQL 的差异标注
4
-
5
- ---
6
-
7
- ## 数值函数
8
-
9
- ```sql
10
- ABS(x) -- 绝对值
11
- CEIL(x) / CEILING(x) -- 向上取整
12
- FLOOR(x) -- 向下取整
13
- ROUND(x, d) -- 四舍五入,d位小数
14
- TRUNCATE(x, d) -- 截断,d位小数
15
- MOD(x, y) / x % y -- 取模
16
- POWER(x, y) / POW(x, y) -- 幂运算
17
- SQRT(x) -- 平方根
18
- EXP(x) -- e^x
19
- LN(x) / LOG(x) -- 自然对数
20
- LOG(base, x) -- 指定底数对数
21
- LOG2(x) / LOG10(x) -- 以2/10为底
22
- SIGN(x) -- 符号(-1/0/1)
23
- GREATEST(a, b, c, ...) -- 最大值
24
- LEAST(a, b, c, ...) -- 最小值
25
- RANDOM() / RAND() -- 0-1随机数
26
- PI() -- π
27
- SIN(x) / COS(x) / TAN(x) -- 三角函数
28
- ASIN(x) / ACOS(x) / ATAN(x) -- 反三角函数
29
- ATAN2(y, x) -- 反正切
30
- DEGREES(x) / RADIANS(x) -- 角度/弧度转换
31
- -- ⚠️ FACTORIAL 不支持,用 EXP(SUM(LN(n))) 替代
32
- -- ⚠️ BIN(x) 不支持,用 CONV(x, 10, 2) 替代
33
- HEX(x) -- 转十六进制字符串
34
- UNHEX(s) -- 十六进制转字符串
35
- CONV(x, from_base, to_base) -- 进制转换(如 CONV(10,10,2) 得 '1010')
36
- ```
37
-
38
- **与 Snowflake 差异:**
39
- - Snowflake `SQUARE(x)` → ClickZetta `POWER(x, 2)`
40
- - Snowflake `HAVERSINE(lat1, lon1, lat2, lon2)` → ClickZetta 不支持
41
- - Snowflake `WIDTH_BUCKET` → ClickZetta 不支持
42
-
43
- ---
44
-
45
- ## 字符串函数
46
-
47
- ```sql
48
- -- 基本操作
49
- LENGTH(s) / CHAR_LENGTH(s) -- 字符长度
50
- OCTET_LENGTH(s) -- 字节长度
51
- UPPER(s) / LOWER(s) -- 大小写转换
52
- INITCAP(s) -- 首字母大写
53
- TRIM(s) / LTRIM(s) / RTRIM(s) -- 去空格
54
- TRIM(BOTH 'x' FROM s) -- 去指定字符
55
- LPAD(s, n, pad) / RPAD(s, n, pad) -- 填充
56
- REPEAT(s, n) -- 重复
57
- REVERSE(s) -- 反转
58
- SPACE(n) -- n个空格
59
-
60
- -- 拼接
61
- CONCAT(s1, s2, ...) -- 拼接(NULL 传播)
62
- CONCAT_WS(sep, s1, s2, ...) -- 带分隔符拼接(跳过 NULL)
63
- s1 || s2 -- 拼接运算符
64
-
65
- -- 截取
66
- SUBSTR(s, pos) / SUBSTRING(s, pos)
67
- SUBSTR(s, pos, len) / SUBSTRING(s, pos, len)
68
- LEFT(s, n) / RIGHT(s, n)
69
- MID(s, pos, len) -- 同 SUBSTR
70
-
71
- -- 查找
72
- INSTR(s, substr) -- 查找位置(1-based,0表示未找到)
73
- LOCATE(substr, s) -- 同 INSTR,参数顺序不同
74
- LOCATE(substr, s, pos) -- 从pos开始查找
75
- POSITION(substr IN s) -- ✅ 支持,返回子串位置(1-based)
76
- FIND_IN_SET(s, list) -- 在逗号分隔列表中查找
77
-
78
- -- 替换
79
- REPLACE(s, old, new) -- 替换所有
80
- TRANSLATE(s, from_chars, to_chars) -- 字符级替换
81
- -- ⚠️ OVERLAY 语法不支持,用 CONCAT(LEFT(s,pos-1), new, SUBSTR(s,pos+len)) 替代
82
-
83
- -- 正则
84
- REGEXP_EXTRACT(s, pattern, group) -- 提取匹配组
85
- REGEXP_EXTRACT_ALL(s, pattern) -- 提取所有匹配
86
- REGEXP_REPLACE(s, pattern, repl) -- 正则替换
87
- REGEXP_LIKE(s, pattern) -- 正则匹配(返回布尔)
88
- RLIKE(s, pattern) -- 同 REGEXP_LIKE
89
- s RLIKE pattern -- 运算符形式
90
- REGEXP_COUNT(s, pattern) -- 匹配次数
91
- REGEXP_SUBSTR(s, pattern) -- 提取第一个匹配
92
-
93
- -- 分割
94
- SPLIT(s, delimiter) -- 按分隔符分割,返回 ARRAY
95
- SPLIT_PART(s, delimiter, n) -- 取第n个分割部分(1-based)
96
-
97
- -- 格式化
98
- FORMAT_STRING(fmt, args...) -- printf 风格(如 FORMAT_STRING('%d items', 5) → '5 items')
99
- -- ⚠️ FORMAT(number, decimals) 数字千分位格式化不支持,用 ROUND + CAST 替代
100
-
101
- -- 编码
102
- BASE64(s) / UNBASE64(s) -- Base64 编解码
103
- MD5(s) -- MD5 哈希
104
- SHA1(s) / SHA2(s, bits) -- SHA 哈希
105
- CRC32(s) -- CRC32
106
- ENCODE(s, charset) / DECODE(s, charset) -- 字符集编解码
107
-
108
- -- 其他
109
- ASCII(s) -- 首字符 ASCII 码
110
- CHAR(n) -- ASCII 码转字符
111
- -- ⚠️ SOUNDEX 不支持
112
- -- ⚠️ LEVENSHTEIN 不支持,用 Python UDF 或 ZettaPark 替代
113
- HAMMING_DISTANCE(s1, s2) -- 汉明距离(字符串)
114
- ```
115
-
116
- **与 Snowflake 差异:**
117
- - Snowflake `CHARINDEX(substr, s)` → ClickZetta `INSTR(s, substr)` 或 `LOCATE(substr, s)`(参数顺序不同!)
118
- - Snowflake `EDITDISTANCE(s1, s2)` → ClickZetta 不支持 LEVENSHTEIN,需用 Python UDF
119
- - Snowflake `STRTOK(s, delim, n)` → ClickZetta `SPLIT_PART(s, delim, n)`
120
- - Snowflake `ILIKE(s, pattern)` → ClickZetta `ILIKE` ✅ 也支持!
121
- - Snowflake `CONTAINS(s, substr)` → ClickZetta `INSTR(s, substr) > 0`
122
- - Snowflake `STARTSWITH(s, prefix)` → ClickZetta `s LIKE 'prefix%'` 或 `STARTSWITH(s, prefix)`
123
- - Snowflake `ENDSWITH(s, suffix)` → ClickZetta `s LIKE '%suffix'` 或 `ENDSWITH(s, suffix)`
124
-
125
- ---
126
-
127
- ## 日期时间函数
128
-
129
- ```sql
130
- -- 获取当前时间
131
- CURRENT_DATE() -- 当前日期
132
- CURRENT_TIMESTAMP() / NOW() -- 当前时间戳(带时区)
133
- CURRENT_TIME() -- 当前时间
134
- LOCALTIMESTAMP() -- 本地时间戳
135
-
136
- -- 提取部分
137
- YEAR(dt) / MONTH(dt) / DAY(dt)
138
- HOUR(dt) / MINUTE(dt) / SECOND(dt)
139
- DAYOFWEEK(dt) -- 1=周日, 7=周六
140
- DAYOFMONTH(dt) -- 同 DAY
141
- DAYOFYEAR(dt) -- 年中第几天
142
- WEEKOFYEAR(dt) -- 年中第几周
143
- QUARTER(dt) -- 季度(1-4)
144
- EXTRACT(YEAR FROM dt) -- 标准SQL提取
145
- -- ⚠️ DATE_PART('year', dt) 不支持,用 EXTRACT 或 YEAR(dt) 替代
146
-
147
- -- 日期加减
148
- DATE_ADD(dt, n) -- 加n天
149
- DATE_SUB(dt, n) -- 减n天
150
- dt + INTERVAL n DAY -- 加n天(标准SQL)
151
- dt - INTERVAL n DAY -- 减n天
152
- dt + INTERVAL '1-2' YEAR TO MONTH -- 加1年2个月
153
- ADDDATE(dt, n) -- 同 DATE_ADD
154
- SUBDATE(dt, n) -- 同 DATE_SUB
155
- ADD_MONTHS(dt, n) -- 加n个月
156
- MONTHS_BETWEEN(dt1, dt2) -- 月份差
157
-
158
- -- 日期差
159
- DATEDIFF(end_dt, start_dt) -- 两参数形式:返回天数差(end在前)
160
- DATEDIFF(unit, start_dt, end_dt) -- 三参数形式:指定单位(day/hour/month等),与 Snowflake 兼容
161
- TIMESTAMPDIFF(unit, dt1, dt2) -- 指定单位的差值
162
-
163
- -- 截断
164
- DATE_TRUNC('year', dt) -- 截断到年
165
- DATE_TRUNC('month', dt) -- 截断到月
166
- DATE_TRUNC('day', dt) -- 截断到天
167
- DATE_TRUNC('hour', dt) -- 截断到小时
168
- DATE_TRUNC('week', dt) -- 截断到周(周一)
169
- TRUNC(dt, 'MM') -- Oracle 风格截断
170
-
171
- -- 格式化
172
- DATE_FORMAT(dt, 'yyyy-MM-dd') -- 格式化为字符串
173
- DATE_FORMAT(dt, 'yyyy-MM-dd HH:mm:ss')
174
- TO_CHAR(dt, 'YYYY-MM-DD') -- 同 DATE_FORMAT
175
-
176
- -- 转换
177
- TO_DATE('2024-01-01') -- 字符串转日期
178
- TO_DATE('2024-01-01', 'yyyy-MM-dd')
179
- TO_TIMESTAMP('2024-01-01 12:00:00')
180
- TO_TIMESTAMP('2024-01-01', 'yyyy-MM-dd')
181
- CAST('2024-01-01' AS DATE)
182
- CAST('2024-01-01 12:00:00' AS TIMESTAMP)
183
- FROM_UNIXTIME(unix_ts) -- Unix时间戳转时间戳
184
- FROM_UNIXTIME(unix_ts, fmt) -- 转格式化字符串
185
- UNIX_TIMESTAMP() -- 当前Unix时间戳
186
- UNIX_TIMESTAMP(dt) -- 日期转Unix时间戳
187
- UNIX_TIMESTAMP(s, fmt) -- 字符串转Unix时间戳
188
-
189
- -- 其他
190
- LAST_DAY(dt) -- 月末日期
191
- NEXT_DAY(dt, 'Monday') -- 下一个指定星期几
192
- MAKE_DATE(year, month, day) -- 构造日期(注意:是 MAKE_DATE 不是 MAKEDATE)
193
- ADD_MONTHS(dt, n) -- 加n个月
194
- MONTHS_BETWEEN(dt1, dt2) -- 月份差
195
- TIMESTAMPDIFF(unit, dt1, dt2) -- 指定单位的差值(如 TIMESTAMPDIFF(MONTH, ...))
196
- FROM_UTC_TIMESTAMP(ts, tz) -- UTC 转指定时区
197
- TO_UTC_TIMESTAMP(ts, tz) -- 指定时区转 UTC
198
- -- ⚠️ CONVERT_TZ(dt, from_tz, to_tz) 不支持,用 FROM_UTC_TIMESTAMP/TO_UTC_TIMESTAMP 替代
199
- -- ⚠️ MAKEDATE(year, dayofyear) 不支持,用 MAKE_DATE(year, month, day) 替代
200
- -- ⚠️ MAKETIME / PERIOD_ADD / PERIOD_DIFF 不支持
201
- ```
202
-
203
- **与 Snowflake 差异:**
204
- - Snowflake `DATEADD(day, n, dt)` → ClickZetta `DATEADD(day, n, dt)` ✅ 也支持;或用 `DATE_ADD(dt, n)` / `dt + INTERVAL n DAY`
205
- - Snowflake `DATEDIFF(day, start, end)` → ClickZetta `DATEDIFF(day, start, end)` ✅ 三参数形式也支持;或用 `DATEDIFF(end, start)` 两参数形式(返回天数)
206
- - Snowflake `DATE_TRUNC('day', dt)` → ClickZetta 相同
207
- - Snowflake `TO_DATE(s)` → ClickZetta 相同
208
- - Snowflake `CONVERT_TIMEZONE(from, to, ts)` → ClickZetta `FROM_UTC_TIMESTAMP` / `TO_UTC_TIMESTAMP`
209
- - Snowflake `CONVERT_TIMEZONE(tz, dt)` → ClickZetta `CONVERT_TZ(dt, from_tz, to_tz)`
210
- - Snowflake `SYSDATE()` / `GETDATE()` → ClickZetta `CURRENT_TIMESTAMP()` / `NOW()`
211
- - Snowflake `TIMESTAMPADD(unit, n, dt)` → ClickZetta `dt + INTERVAL n unit`
212
-
213
- **与 Spark SQL 差异:**
214
- - 大部分函数相同,ClickZetta 兼容 Spark 日期函数
215
-
216
- ---
217
-
218
- ## 条件函数
219
-
220
- ```sql
221
- -- IF
222
- IF(condition, true_val, false_val)
223
-
224
- -- CASE WHEN
225
- CASE WHEN cond1 THEN val1
226
- WHEN cond2 THEN val2
227
- ELSE default_val
228
- END
229
-
230
- -- 简单 CASE
231
- CASE status
232
- WHEN 'A' THEN 'Active'
233
- WHEN 'I' THEN 'Inactive'
234
- ELSE 'Unknown'
235
- END
236
-
237
- -- NULL 处理
238
- COALESCE(a, b, c) -- 第一个非NULL值
239
- NVL(a, b) -- a为NULL时返回b(同 IFNULL)
240
- IFNULL(a, b) -- 同 NVL
241
- NULLIF(a, b) -- a=b时返回NULL,否则返回a
242
- NVL2(a, b, c) -- a非NULL返回b,否则返回c
243
- ISNULL(a) -- 是否为NULL(返回布尔)
244
- ISNOTNULL(a) -- 是否非NULL
245
-
246
- -- DECODE(Oracle/Hive 风格)
247
- DECODE(expr, val1, res1, val2, res2, ..., default)
248
-
249
- -- 类型检查
250
- TYPEOF(expr) -- 返回类型名称字符串
251
- ```
252
-
253
- **与 Snowflake 差异:**
254
- - Snowflake `IFF(cond, a, b)` → ClickZetta `IF(cond, a, b)`
255
- - Snowflake `ZEROIFNULL(x)` → ClickZetta `COALESCE(x, 0)` 或 `NVL(x, 0)`
256
- - Snowflake `NULLIFZERO(x)` → ClickZetta `NULLIF(x, 0)`
257
- - Snowflake `BOOLAND(a, b)` / `BOOLOR(a, b)` → ClickZetta `a AND b` / `a OR b`
258
-
259
- ---
260
-
261
- ## 聚合函数
262
-
263
- ```sql
264
- -- 基本聚合
265
- COUNT(*) / COUNT(col) / COUNT(DISTINCT col)
266
- SUM(col) / AVG(col) / MAX(col) / MIN(col)
267
- STDDEV(col) / STDDEV_POP(col) / STDDEV_SAMP(col)
268
- VARIANCE(col) / VAR_POP(col) / VAR_SAMP(col)
269
-
270
- -- 布尔聚合
271
- BOOL_OR(cond) -- 任意一个为真
272
- BOOL_AND(cond) -- 全部为真
273
- EVERY(cond) -- 同 BOOL_AND
274
-
275
- -- 字符串聚合
276
- GROUP_CONCAT(col ORDER BY col SEPARATOR ',') -- 替代 Snowflake LISTAGG
277
- GROUP_CONCAT(DISTINCT col SEPARATOR ',')
278
-
279
- -- 数组聚合
280
- ARRAY_AGG(col) -- 收集为数组(含NULL)
281
- COLLECT_LIST(col) -- 同 ARRAY_AGG
282
- COLLECT_SET(col) -- 去重收集
283
-
284
- -- 近似聚合
285
- APPROX_COUNT_DISTINCT(col) -- 近似去重计数(HyperLogLog)
286
- APPROX_PERCENTILE(col, p) -- 近似百分位数
287
-
288
- -- 统计聚合
289
- CORR(x, y) -- 相关系数
290
- COVAR_POP(x, y) / COVAR_SAMP(x, y) -- 协方差
291
- -- ⚠️ REGR_SLOPE / REGR_INTERCEPT 不支持
292
- -- 替代:CORR(y,x) * STDDEV(y) / STDDEV(x) 计算斜率
293
-
294
- -- 有序集合聚合
295
- PERCENTILE(col, p) -- 精确百分位数
296
- PERCENTILE_APPROX(col, p) -- 近似百分位数
297
- MEDIAN(col) -- 中位数
298
- ```
299
-
300
- **与 Snowflake 差异:**
301
- - Snowflake `LISTAGG(col, ',') WITHIN GROUP (ORDER BY col)` → ClickZetta `GROUP_CONCAT(col ORDER BY col SEPARATOR ',')`
302
- - Snowflake `ARRAY_AGG(col) WITHIN GROUP (ORDER BY col)` → ClickZetta `ARRAY_AGG(col)` 不支持 WITHIN GROUP
303
- - Snowflake `OBJECT_AGG(key, value)` → ClickZetta `MAP_AGG(key, value)`
304
- - Snowflake `BITAND_AGG / BITOR_AGG / BITXOR_AGG` → ClickZetta `BIT_AND / BIT_OR / BIT_XOR`
305
-
306
- ---
307
-
308
- ## 类型转换函数
309
-
310
- ```sql
311
- -- 显式转换
312
- CAST(expr AS target_type)
313
- expr::target_type -- 简写语法
314
-
315
- -- 安全转换(失败返回NULL而非报错)
316
- TRY_CAST(expr AS target_type)
317
-
318
- -- 字符串转换
319
- TO_NUMBER(s) / TO_DECIMAL(s)
320
- TO_DOUBLE(s)
321
- TO_BOOLEAN(s) -- 'true'/'false'/'1'/'0'
322
-
323
- -- 示例
324
- CAST('123' AS INT)
325
- CAST(123 AS STRING)
326
- CAST('2024-01-01' AS DATE)
327
- CAST('[1,2,3]' AS VECTOR(3)) -- 字符串转向量
328
- TRY_CAST('abc' AS INT) -- 返回 NULL
329
- ```
330
-
331
- **与 Snowflake 差异:**
332
- - Snowflake `TRY_TO_NUMBER / TRY_TO_DATE` → ClickZetta `TRY_CAST`
333
- - Snowflake `TO_VARIANT(x)` → ClickZetta `PARSE_JSON(TO_JSON(x))`
334
-
335
- ---
336
-
337
- ## 系统/上下文函数
338
-
339
- ```sql
340
- CURRENT_USER() -- 当前用户名
341
- CURRENT_WORKSPACE() -- 当前工作空间
342
- CURRENT_SCHEMA() -- 当前 Schema
343
- CURRENT_VCLUSTER() -- 当前计算集群
344
- CURRENT_INSTANCE_ID() -- 当前实例ID
345
- VERSION() -- 版本信息
346
- ```
347
-
348
- **与 Snowflake 差异:**
349
- - Snowflake `CURRENT_DATABASE()` → ClickZetta `CURRENT_WORKSPACE()`
350
- - Snowflake `CURRENT_WAREHOUSE()` → ClickZetta `CURRENT_VCLUSTER()`
351
- - Snowflake `CURRENT_ROLE()` → ClickZetta 无直接对应
352
-
353
- ---
354
-
355
- ## 向量函数
356
-
357
- ```sql
358
- -- 距离计算
359
- L2_DISTANCE(v1, v2) -- 欧几里得距离(越小越相似)
360
- COSINE_DISTANCE(v1, v2) -- 余弦距离(越小越相似)
361
- DOT_PRODUCT(v1, v2) -- 点积(越大越相似,需归一化)
362
- HAMMING_DISTANCE(v1, v2) -- 汉明距离(二值向量)
363
- JACCARD_DISTANCE(v1, v2) -- 雅卡德距离
364
-
365
- -- 向量操作
366
- BINARY_QUANTIZE(v) -- float向量二值化
367
- VECTOR(v1, v2, ...) -- 构建向量
368
-
369
- -- 构建向量
370
- SELECT VECTOR(0.1, 0.2, 0.3, 0.4);
371
- SELECT CAST('[0.1, 0.2, 0.3]' AS VECTOR(3));
372
- ```