@clickzetta/cz-cli-linux-x64 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/bin/cz-cli +0 -0
  2. package/package.json +1 -1
  3. package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
  4. package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
  5. package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
  6. package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
  7. package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
  8. package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
  9. package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
  10. package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
  11. package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
  12. package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
  13. package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
  14. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
  15. package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
  16. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
  17. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -457
  18. package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
  19. package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
  20. package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
  21. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
  22. package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
  23. package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
  24. package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
  25. package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
  26. package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
  27. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
  28. package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
  29. package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
  30. package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
  31. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
  32. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
  33. package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
  34. package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
  35. package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
  36. package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
  37. package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
  38. package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -112
  39. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
  40. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
  41. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
  42. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
  43. package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
  44. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  45. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
  46. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
  47. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
  48. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
  49. package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
  50. package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
  51. package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
  52. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
  53. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -156
  54. package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
  55. package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
  56. package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
  57. package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
  58. package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
  59. package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
  60. package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
  61. package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
  62. package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
  63. package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
  64. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
  65. package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
  66. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -639
  67. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
  68. package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
  69. package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
  70. package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
  71. package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
  72. package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
  73. package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
  74. package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
  75. package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
  76. package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
  77. package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
  78. package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
  79. package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
  80. package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
  81. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -427
  82. package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
  83. package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
  84. package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
  85. package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
  86. package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
  87. package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
  88. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
  89. package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
  90. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
  91. package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
  92. package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
  93. package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
  94. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -379
  95. package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
  96. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
  97. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
  98. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
  99. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
  100. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
  101. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  102. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  103. package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
  104. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  105. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  106. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
  107. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  108. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  109. package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
  110. package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
  111. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
  112. package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
  113. package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
  114. package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
  115. package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
  116. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
  117. package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
  118. package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
@@ -1,372 +0,0 @@
1
- # 函数完整参考
2
-
3
- > 含与 Snowflake / Spark SQL 的差异标注
4
-
5
- ---
6
-
7
- ## 数值函数
8
-
9
- ```sql
10
- ABS(x) -- 绝对值
11
- CEIL(x) / CEILING(x) -- 向上取整
12
- FLOOR(x) -- 向下取整
13
- ROUND(x, d) -- 四舍五入,d位小数
14
- TRUNCATE(x, d) -- 截断,d位小数
15
- MOD(x, y) / x % y -- 取模
16
- POWER(x, y) / POW(x, y) -- 幂运算
17
- SQRT(x) -- 平方根
18
- EXP(x) -- e^x
19
- LN(x) / LOG(x) -- 自然对数
20
- LOG(base, x) -- 指定底数对数
21
- LOG2(x) / LOG10(x) -- 以2/10为底
22
- SIGN(x) -- 符号(-1/0/1)
23
- GREATEST(a, b, c, ...) -- 最大值
24
- LEAST(a, b, c, ...) -- 最小值
25
- RANDOM() / RAND() -- 0-1随机数
26
- PI() -- π
27
- SIN(x) / COS(x) / TAN(x) -- 三角函数
28
- ASIN(x) / ACOS(x) / ATAN(x) -- 反三角函数
29
- ATAN2(y, x) -- 反正切
30
- DEGREES(x) / RADIANS(x) -- 角度/弧度转换
31
- -- ⚠️ FACTORIAL 不支持,用 EXP(SUM(LN(n))) 替代
32
- -- ⚠️ BIN(x) 不支持,用 CONV(x, 10, 2) 替代
33
- HEX(x) -- 转十六进制字符串
34
- UNHEX(s) -- 十六进制转字符串
35
- CONV(x, from_base, to_base) -- 进制转换(如 CONV(10,10,2) 得 '1010')
36
- ```
37
-
38
- **与 Snowflake 差异:**
39
- - Snowflake `SQUARE(x)` → ClickZetta `POWER(x, 2)`
40
- - Snowflake `HAVERSINE(lat1, lon1, lat2, lon2)` → ClickZetta 不支持
41
- - Snowflake `WIDTH_BUCKET` → ClickZetta 不支持
42
-
43
- ---
44
-
45
- ## 字符串函数
46
-
47
- ```sql
48
- -- 基本操作
49
- LENGTH(s) / CHAR_LENGTH(s) -- 字符长度
50
- OCTET_LENGTH(s) -- 字节长度
51
- UPPER(s) / LOWER(s) -- 大小写转换
52
- INITCAP(s) -- 首字母大写
53
- TRIM(s) / LTRIM(s) / RTRIM(s) -- 去空格
54
- TRIM(BOTH 'x' FROM s) -- 去指定字符
55
- LPAD(s, n, pad) / RPAD(s, n, pad) -- 填充
56
- REPEAT(s, n) -- 重复
57
- REVERSE(s) -- 反转
58
- SPACE(n) -- n个空格
59
-
60
- -- 拼接
61
- CONCAT(s1, s2, ...) -- 拼接(NULL 传播)
62
- CONCAT_WS(sep, s1, s2, ...) -- 带分隔符拼接(跳过 NULL)
63
- s1 || s2 -- 拼接运算符
64
-
65
- -- 截取
66
- SUBSTR(s, pos) / SUBSTRING(s, pos)
67
- SUBSTR(s, pos, len) / SUBSTRING(s, pos, len)
68
- LEFT(s, n) / RIGHT(s, n)
69
- MID(s, pos, len) -- 同 SUBSTR
70
-
71
- -- 查找
72
- INSTR(s, substr) -- 查找位置(1-based,0表示未找到)
73
- LOCATE(substr, s) -- 同 INSTR,参数顺序不同
74
- LOCATE(substr, s, pos) -- 从pos开始查找
75
- POSITION(substr IN s) -- ✅ 支持,返回子串位置(1-based)
76
- FIND_IN_SET(s, list) -- 在逗号分隔列表中查找
77
-
78
- -- 替换
79
- REPLACE(s, old, new) -- 替换所有
80
- TRANSLATE(s, from_chars, to_chars) -- 字符级替换
81
- -- ⚠️ OVERLAY 语法不支持,用 CONCAT(LEFT(s,pos-1), new, SUBSTR(s,pos+len)) 替代
82
-
83
- -- 正则
84
- REGEXP_EXTRACT(s, pattern, group) -- 提取匹配组
85
- REGEXP_EXTRACT_ALL(s, pattern) -- 提取所有匹配
86
- REGEXP_REPLACE(s, pattern, repl) -- 正则替换
87
- REGEXP_LIKE(s, pattern) -- 正则匹配(返回布尔)
88
- RLIKE(s, pattern) -- 同 REGEXP_LIKE
89
- s RLIKE pattern -- 运算符形式
90
- REGEXP_COUNT(s, pattern) -- 匹配次数
91
- REGEXP_SUBSTR(s, pattern) -- 提取第一个匹配
92
-
93
- -- 分割
94
- SPLIT(s, delimiter) -- 按分隔符分割,返回 ARRAY
95
- SPLIT_PART(s, delimiter, n) -- 取第n个分割部分(1-based)
96
-
97
- -- 格式化
98
- FORMAT_STRING(fmt, args...) -- printf 风格(如 FORMAT_STRING('%d items', 5) → '5 items')
99
- -- ⚠️ FORMAT(number, decimals) 数字千分位格式化不支持,用 ROUND + CAST 替代
100
-
101
- -- 编码
102
- BASE64(s) / UNBASE64(s) -- Base64 编解码
103
- MD5(s) -- MD5 哈希
104
- SHA1(s) / SHA2(s, bits) -- SHA 哈希
105
- CRC32(s) -- CRC32
106
- ENCODE(s, charset) / DECODE(s, charset) -- 字符集编解码
107
-
108
- -- 其他
109
- ASCII(s) -- 首字符 ASCII 码
110
- CHAR(n) -- ASCII 码转字符
111
- -- ⚠️ SOUNDEX 不支持
112
- -- ⚠️ LEVENSHTEIN 不支持,用 Python UDF 或 ZettaPark 替代
113
- HAMMING_DISTANCE(s1, s2) -- 汉明距离(字符串)
114
- ```
115
-
116
- **与 Snowflake 差异:**
117
- - Snowflake `CHARINDEX(substr, s)` → ClickZetta `INSTR(s, substr)` 或 `LOCATE(substr, s)`(参数顺序不同!)
118
- - Snowflake `EDITDISTANCE(s1, s2)` → ClickZetta 不支持 LEVENSHTEIN,需用 Python UDF
119
- - Snowflake `STRTOK(s, delim, n)` → ClickZetta `SPLIT_PART(s, delim, n)`
120
- - Snowflake `ILIKE(s, pattern)` → ClickZetta `ILIKE` ✅ 也支持!
121
- - Snowflake `CONTAINS(s, substr)` → ClickZetta `INSTR(s, substr) > 0`
122
- - Snowflake `STARTSWITH(s, prefix)` → ClickZetta `s LIKE 'prefix%'` 或 `STARTSWITH(s, prefix)`
123
- - Snowflake `ENDSWITH(s, suffix)` → ClickZetta `s LIKE '%suffix'` 或 `ENDSWITH(s, suffix)`
124
-
125
- ---
126
-
127
- ## 日期时间函数
128
-
129
- ```sql
130
- -- 获取当前时间
131
- CURRENT_DATE() -- 当前日期
132
- CURRENT_TIMESTAMP() / NOW() -- 当前时间戳(带时区)
133
- CURRENT_TIME() -- 当前时间
134
- LOCALTIMESTAMP() -- 本地时间戳
135
-
136
- -- 提取部分
137
- YEAR(dt) / MONTH(dt) / DAY(dt)
138
- HOUR(dt) / MINUTE(dt) / SECOND(dt)
139
- DAYOFWEEK(dt) -- 1=周日, 7=周六
140
- DAYOFMONTH(dt) -- 同 DAY
141
- DAYOFYEAR(dt) -- 年中第几天
142
- WEEKOFYEAR(dt) -- 年中第几周
143
- QUARTER(dt) -- 季度(1-4)
144
- EXTRACT(YEAR FROM dt) -- 标准SQL提取
145
- -- ⚠️ DATE_PART('year', dt) 不支持,用 EXTRACT 或 YEAR(dt) 替代
146
-
147
- -- 日期加减
148
- DATE_ADD(dt, n) -- 加n天
149
- DATE_SUB(dt, n) -- 减n天
150
- dt + INTERVAL n DAY -- 加n天(标准SQL)
151
- dt - INTERVAL n DAY -- 减n天
152
- dt + INTERVAL '1-2' YEAR TO MONTH -- 加1年2个月
153
- ADDDATE(dt, n) -- 同 DATE_ADD
154
- SUBDATE(dt, n) -- 同 DATE_SUB
155
- ADD_MONTHS(dt, n) -- 加n个月
156
- MONTHS_BETWEEN(dt1, dt2) -- 月份差
157
-
158
- -- 日期差
159
- DATEDIFF(end_dt, start_dt) -- 两参数形式:返回天数差(end在前)
160
- DATEDIFF(unit, start_dt, end_dt) -- 三参数形式:指定单位(day/hour/month等),与 Snowflake 兼容
161
- TIMESTAMPDIFF(unit, dt1, dt2) -- 指定单位的差值
162
-
163
- -- 截断
164
- DATE_TRUNC('year', dt) -- 截断到年
165
- DATE_TRUNC('month', dt) -- 截断到月
166
- DATE_TRUNC('day', dt) -- 截断到天
167
- DATE_TRUNC('hour', dt) -- 截断到小时
168
- DATE_TRUNC('week', dt) -- 截断到周(周一)
169
- TRUNC(dt, 'MM') -- Oracle 风格截断
170
-
171
- -- 格式化
172
- DATE_FORMAT(dt, 'yyyy-MM-dd') -- 格式化为字符串
173
- DATE_FORMAT(dt, 'yyyy-MM-dd HH:mm:ss')
174
- TO_CHAR(dt, 'YYYY-MM-DD') -- 同 DATE_FORMAT
175
-
176
- -- 转换
177
- TO_DATE('2024-01-01') -- 字符串转日期
178
- TO_DATE('2024-01-01', 'yyyy-MM-dd')
179
- TO_TIMESTAMP('2024-01-01 12:00:00')
180
- TO_TIMESTAMP('2024-01-01', 'yyyy-MM-dd')
181
- CAST('2024-01-01' AS DATE)
182
- CAST('2024-01-01 12:00:00' AS TIMESTAMP)
183
- FROM_UNIXTIME(unix_ts) -- Unix时间戳转时间戳
184
- FROM_UNIXTIME(unix_ts, fmt) -- 转格式化字符串
185
- UNIX_TIMESTAMP() -- 当前Unix时间戳
186
- UNIX_TIMESTAMP(dt) -- 日期转Unix时间戳
187
- UNIX_TIMESTAMP(s, fmt) -- 字符串转Unix时间戳
188
-
189
- -- 其他
190
- LAST_DAY(dt) -- 月末日期
191
- NEXT_DAY(dt, 'Monday') -- 下一个指定星期几
192
- MAKE_DATE(year, month, day) -- 构造日期(注意:是 MAKE_DATE 不是 MAKEDATE)
193
- ADD_MONTHS(dt, n) -- 加n个月
194
- MONTHS_BETWEEN(dt1, dt2) -- 月份差
195
- TIMESTAMPDIFF(unit, dt1, dt2) -- 指定单位的差值(如 TIMESTAMPDIFF(MONTH, ...))
196
- FROM_UTC_TIMESTAMP(ts, tz) -- UTC 转指定时区
197
- TO_UTC_TIMESTAMP(ts, tz) -- 指定时区转 UTC
198
- -- ⚠️ CONVERT_TZ(dt, from_tz, to_tz) 不支持,用 FROM_UTC_TIMESTAMP/TO_UTC_TIMESTAMP 替代
199
- -- ⚠️ MAKEDATE(year, dayofyear) 不支持,用 MAKE_DATE(year, month, day) 替代
200
- -- ⚠️ MAKETIME / PERIOD_ADD / PERIOD_DIFF 不支持
201
- ```
202
-
203
- **与 Snowflake 差异:**
204
- - Snowflake `DATEADD(day, n, dt)` → ClickZetta `DATEADD(day, n, dt)` ✅ 也支持;或用 `DATE_ADD(dt, n)` / `dt + INTERVAL n DAY`
205
- - Snowflake `DATEDIFF(day, start, end)` → ClickZetta `DATEDIFF(day, start, end)` ✅ 三参数形式也支持;或用 `DATEDIFF(end, start)` 两参数形式(返回天数)
206
- - Snowflake `DATE_TRUNC('day', dt)` → ClickZetta 相同
207
- - Snowflake `TO_DATE(s)` → ClickZetta 相同
208
- - Snowflake `CONVERT_TIMEZONE(from, to, ts)` → ClickZetta `FROM_UTC_TIMESTAMP` / `TO_UTC_TIMESTAMP`
209
- - Snowflake `CONVERT_TIMEZONE(tz, dt)` → ClickZetta `CONVERT_TZ(dt, from_tz, to_tz)`
210
- - Snowflake `SYSDATE()` / `GETDATE()` → ClickZetta `CURRENT_TIMESTAMP()` / `NOW()`
211
- - Snowflake `TIMESTAMPADD(unit, n, dt)` → ClickZetta `dt + INTERVAL n unit`
212
-
213
- **与 Spark SQL 差异:**
214
- - 大部分函数相同,ClickZetta 兼容 Spark 日期函数
215
-
216
- ---
217
-
218
- ## 条件函数
219
-
220
- ```sql
221
- -- IF
222
- IF(condition, true_val, false_val)
223
-
224
- -- CASE WHEN
225
- CASE WHEN cond1 THEN val1
226
- WHEN cond2 THEN val2
227
- ELSE default_val
228
- END
229
-
230
- -- 简单 CASE
231
- CASE status
232
- WHEN 'A' THEN 'Active'
233
- WHEN 'I' THEN 'Inactive'
234
- ELSE 'Unknown'
235
- END
236
-
237
- -- NULL 处理
238
- COALESCE(a, b, c) -- 第一个非NULL值
239
- NVL(a, b) -- a为NULL时返回b(同 IFNULL)
240
- IFNULL(a, b) -- 同 NVL
241
- NULLIF(a, b) -- a=b时返回NULL,否则返回a
242
- NVL2(a, b, c) -- a非NULL返回b,否则返回c
243
- ISNULL(a) -- 是否为NULL(返回布尔)
244
- ISNOTNULL(a) -- 是否非NULL
245
-
246
- -- DECODE(Oracle/Hive 风格)
247
- DECODE(expr, val1, res1, val2, res2, ..., default)
248
-
249
- -- 类型检查
250
- TYPEOF(expr) -- 返回类型名称字符串
251
- ```
252
-
253
- **与 Snowflake 差异:**
254
- - Snowflake `IFF(cond, a, b)` → ClickZetta `IF(cond, a, b)`
255
- - Snowflake `ZEROIFNULL(x)` → ClickZetta `COALESCE(x, 0)` 或 `NVL(x, 0)`
256
- - Snowflake `NULLIFZERO(x)` → ClickZetta `NULLIF(x, 0)`
257
- - Snowflake `BOOLAND(a, b)` / `BOOLOR(a, b)` → ClickZetta `a AND b` / `a OR b`
258
-
259
- ---
260
-
261
- ## 聚合函数
262
-
263
- ```sql
264
- -- 基本聚合
265
- COUNT(*) / COUNT(col) / COUNT(DISTINCT col)
266
- SUM(col) / AVG(col) / MAX(col) / MIN(col)
267
- STDDEV(col) / STDDEV_POP(col) / STDDEV_SAMP(col)
268
- VARIANCE(col) / VAR_POP(col) / VAR_SAMP(col)
269
-
270
- -- 布尔聚合
271
- BOOL_OR(cond) -- 任意一个为真
272
- BOOL_AND(cond) -- 全部为真
273
- EVERY(cond) -- 同 BOOL_AND
274
-
275
- -- 字符串聚合
276
- GROUP_CONCAT(col ORDER BY col SEPARATOR ',') -- 替代 Snowflake LISTAGG
277
- GROUP_CONCAT(DISTINCT col SEPARATOR ',')
278
-
279
- -- 数组聚合
280
- ARRAY_AGG(col) -- 收集为数组(含NULL)
281
- COLLECT_LIST(col) -- 同 ARRAY_AGG
282
- COLLECT_SET(col) -- 去重收集
283
-
284
- -- 近似聚合
285
- APPROX_COUNT_DISTINCT(col) -- 近似去重计数(HyperLogLog)
286
- APPROX_PERCENTILE(col, p) -- 近似百分位数
287
-
288
- -- 统计聚合
289
- CORR(x, y) -- 相关系数
290
- COVAR_POP(x, y) / COVAR_SAMP(x, y) -- 协方差
291
- -- ⚠️ REGR_SLOPE / REGR_INTERCEPT 不支持
292
- -- 替代:CORR(y,x) * STDDEV(y) / STDDEV(x) 计算斜率
293
-
294
- -- 有序集合聚合
295
- PERCENTILE(col, p) -- 精确百分位数
296
- PERCENTILE_APPROX(col, p) -- 近似百分位数
297
- MEDIAN(col) -- 中位数
298
- ```
299
-
300
- **与 Snowflake 差异:**
301
- - Snowflake `LISTAGG(col, ',') WITHIN GROUP (ORDER BY col)` → ClickZetta `GROUP_CONCAT(col ORDER BY col SEPARATOR ',')`
302
- - Snowflake `ARRAY_AGG(col) WITHIN GROUP (ORDER BY col)` → ClickZetta `ARRAY_AGG(col)` 不支持 WITHIN GROUP
303
- - Snowflake `OBJECT_AGG(key, value)` → ClickZetta `MAP_AGG(key, value)`
304
- - Snowflake `BITAND_AGG / BITOR_AGG / BITXOR_AGG` → ClickZetta `BIT_AND / BIT_OR / BIT_XOR`
305
-
306
- ---
307
-
308
- ## 类型转换函数
309
-
310
- ```sql
311
- -- 显式转换
312
- CAST(expr AS target_type)
313
- expr::target_type -- 简写语法
314
-
315
- -- 安全转换(失败返回NULL而非报错)
316
- TRY_CAST(expr AS target_type)
317
-
318
- -- 字符串转换
319
- TO_NUMBER(s) / TO_DECIMAL(s)
320
- TO_DOUBLE(s)
321
- TO_BOOLEAN(s) -- 'true'/'false'/'1'/'0'
322
-
323
- -- 示例
324
- CAST('123' AS INT)
325
- CAST(123 AS STRING)
326
- CAST('2024-01-01' AS DATE)
327
- CAST('[1,2,3]' AS VECTOR(3)) -- 字符串转向量
328
- TRY_CAST('abc' AS INT) -- 返回 NULL
329
- ```
330
-
331
- **与 Snowflake 差异:**
332
- - Snowflake `TRY_TO_NUMBER / TRY_TO_DATE` → ClickZetta `TRY_CAST`
333
- - Snowflake `TO_VARIANT(x)` → ClickZetta `PARSE_JSON(TO_JSON(x))`
334
-
335
- ---
336
-
337
- ## 系统/上下文函数
338
-
339
- ```sql
340
- CURRENT_USER() -- 当前用户名
341
- CURRENT_WORKSPACE() -- 当前工作空间
342
- CURRENT_SCHEMA() -- 当前 Schema
343
- CURRENT_VCLUSTER() -- 当前计算集群
344
- CURRENT_INSTANCE_ID() -- 当前实例ID
345
- VERSION() -- 版本信息
346
- ```
347
-
348
- **与 Snowflake 差异:**
349
- - Snowflake `CURRENT_DATABASE()` → ClickZetta `CURRENT_WORKSPACE()`
350
- - Snowflake `CURRENT_WAREHOUSE()` → ClickZetta `CURRENT_VCLUSTER()`
351
- - Snowflake `CURRENT_ROLE()` → ClickZetta 无直接对应
352
-
353
- ---
354
-
355
- ## 向量函数
356
-
357
- ```sql
358
- -- 距离计算
359
- L2_DISTANCE(v1, v2) -- 欧几里得距离(越小越相似)
360
- COSINE_DISTANCE(v1, v2) -- 余弦距离(越小越相似)
361
- DOT_PRODUCT(v1, v2) -- 点积(越大越相似,需归一化)
362
- HAMMING_DISTANCE(v1, v2) -- 汉明距离(二值向量)
363
- JACCARD_DISTANCE(v1, v2) -- 雅卡德距离
364
-
365
- -- 向量操作
366
- BINARY_QUANTIZE(v) -- float向量二值化
367
- VECTOR(v1, v2, ...) -- 构建向量
368
-
369
- -- 构建向量
370
- SELECT VECTOR(0.1, 0.2, 0.3, 0.4);
371
- SELECT CAST('[0.1, 0.2, 0.3]' AS VECTOR(3));
372
- ```
@@ -1,260 +0,0 @@
1
- # Databricks → ClickZetta 迁移指南
2
-
3
- > 覆盖从 Databricks(Delta Lake)迁移到 ClickZetta Lakehouse 时的 SQL 兼容性问题,所有结论均经过真实 Lakehouse 验证。
4
-
5
- ---
6
-
7
- ## 对象概念映射
8
-
9
- | Databricks | ClickZetta | 说明 |
10
- |---|---|---|
11
- | Catalog(内部数据) | WORKSPACE | 顶层命名空间,Catalog.Schema.Table ≈ Workspace.Schema.Table |
12
- | Catalog(外部数据源) | EXTERNAL CATALOG | 联邦查询外部系统时的三层命名空间顶层(catalog.schema.table) |
13
- | Database / Schema | SCHEMA | 相同 |
14
- | Cluster / SQL Warehouse | VCLUSTER | 计算集群 |
15
- | Delta Table(普通表) | TABLE | ClickZetta 默认 Parquet 存储,支持 Iceberg 格式 |
16
- | Delta Table(增量计算) | DYNAMIC TABLE | 自动增量刷新,替代 DLT Pipeline |
17
- | External Location | STORAGE CONNECTION + EXTERNAL VOLUME | STORAGE CONNECTION 负责认证,EXTERNAL VOLUME 负责挂载路径 |
18
- | Unity Catalog(元数据治理) | 无完整对应 | ClickZetta 通过 RBAC + SCHEMA 权限管理实现部分治理能力 |
19
- | Unity Catalog(外部数据联邦查询) | EXTERNAL CATALOG | 支持 Hive、Iceberg REST、Databricks Unity Catalog 联邦查询 |
20
- | Structured Streaming | PIPE + TABLE STREAM | PIPE 负责持续摄入,TABLE STREAM 负责 CDC 变更捕获 |
21
- | APPLY CHANGES INTO(DLT CDC) | TABLE STREAM + MERGE INTO | 先建 Stream 捕获变更,再用 MERGE 消费 |
22
- | Auto Loader | PIPE(EVENT_NOTIFICATION 模式) | 文件上传即触发加载,仅支持 OSS/S3 |
23
-
24
- ---
25
-
26
- ## DDL 差异
27
-
28
- ### CREATE TABLE
29
-
30
- ```sql
31
- -- Databricks Delta Lake
32
- CREATE TABLE orders (
33
- id BIGINT GENERATED ALWAYS AS IDENTITY,
34
- customer_id INT,
35
- amount DECIMAL(18,2),
36
- status STRING DEFAULT 'pending',
37
- created_at TIMESTAMP DEFAULT current_timestamp(),
38
- meta STRUCT<city: STRING, zip: STRING>,
39
- tags ARRAY<STRING>
40
- )
41
- USING DELTA
42
- PARTITIONED BY (DATE(created_at))
43
- TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');
44
-
45
- -- ClickZetta 等价写法
46
- CREATE TABLE IF NOT EXISTS orders (
47
- id BIGINT IDENTITY(1), -- GENERATED ALWAYS AS IDENTITY → IDENTITY
48
- customer_id INT,
49
- amount DECIMAL(18,2),
50
- status STRING DEFAULT 'pending',
51
- created_at TIMESTAMP DEFAULT current_timestamp(),
52
- meta STRUCT<city:STRING, zip:STRING>,
53
- tags ARRAY<STRING>
54
- )
55
- -- 不需要 USING DELTA(默认 Parquet)
56
- PARTITIONED BY (days(created_at)); -- DATE() → days() 转换函数
57
- -- TBLPROPERTIES → PROPERTIES
58
- -- CDC 通过 TABLE STREAM 实现,不需要 enableChangeDataFeed
59
- ```
60
-
61
- ### 不支持的 DDL
62
-
63
- ```sql
64
- -- ❌ USING DELTA / USING PARQUET(ClickZetta 默认 Parquet,不需要指定)
65
- CREATE TABLE t (...) USING DELTA;
66
- CREATE TABLE t (...) USING PARQUET;
67
-
68
- -- ❌ TBLPROPERTIES(用 PROPERTIES)
69
- CREATE TABLE t (...) TBLPROPERTIES ('key' = 'value');
70
- -- ✅ ClickZetta
71
- CREATE TABLE t (...) PROPERTIES ('data_lifecycle' = '30');
72
-
73
- -- ❌ GENERATED ALWAYS AS IDENTITY(用 IDENTITY)
74
- id BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1)
75
- -- ✅ ClickZetta
76
- id BIGINT IDENTITY(1)
77
-
78
- -- ❌ OPTIMIZE ... ZORDER BY(ClickZetta 有 OPTIMIZE 但无 ZORDER)
79
- OPTIMIZE orders ZORDER BY (customer_id, created_at);
80
- -- ✅ ClickZetta(小文件合并,无 ZORDER)
81
- OPTIMIZE orders;
82
-
83
- -- ❌ VACUUM(ClickZetta 自动管理存储)
84
- VACUUM orders RETAIN 168 HOURS;
85
- ```
86
-
87
- ---
88
-
89
- ## ⚠️ 写入时类型转换(重要差异)
90
-
91
- Databricks 允许字符串隐式转换,ClickZetta **不允许**:
92
-
93
- ```sql
94
- -- ❌ Databricks 可以,ClickZetta 报错
95
- INSERT INTO t VALUES ('2024-01-15', 'true', '123');
96
-
97
- -- ✅ ClickZetta 必须显式转换
98
- INSERT INTO t VALUES (DATE '2024-01-15', TRUE, CAST('123' AS INT));
99
- ```
100
-
101
- 详见 [migration-snowflake.md](migration-snowflake.md) 中的类型转换表(规则相同)。
102
-
103
- ---
104
-
105
- ## DML 差异
106
-
107
- ### MERGE INTO(WHEN NOT MATCHED BY SOURCE)
108
-
109
- ```sql
110
- -- Databricks:支持 WHEN NOT MATCHED BY SOURCE
111
- MERGE INTO target t USING source s ON t.id = s.id
112
- WHEN MATCHED THEN UPDATE SET t.val = s.val
113
- WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
114
- WHEN NOT MATCHED BY SOURCE THEN DELETE; -- ❌ ClickZetta 不支持
115
-
116
- -- ClickZetta 替代方案:两步操作
117
- -- 步骤1:MERGE 处理匹配和新增
118
- MERGE INTO target t USING source s ON t.id = s.id
119
- WHEN MATCHED THEN UPDATE SET t.val = s.val
120
- WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val);
121
- -- 步骤2:DELETE 不在 source 中的行
122
- DELETE FROM target WHERE id NOT IN (SELECT id FROM source);
123
- ```
124
-
125
- ### APPLY CHANGES INTO(CDC)
126
-
127
- ```sql
128
- -- Databricks:APPLY CHANGES INTO(DLT 专有)
129
- APPLY CHANGES INTO target
130
- FROM source
131
- KEYS (id)
132
- SEQUENCE BY ts
133
- APPLY AS DELETE WHEN operation = 'DELETE';
134
-
135
- -- ClickZetta:用 TABLE STREAM + MERGE 实现
136
- CREATE TABLE STREAM source_stream ON TABLE source
137
- WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
138
-
139
- MERGE INTO target t
140
- USING source_stream s ON t.id = s.id
141
- WHEN MATCHED AND s.__change_type = 'UPDATE_AFTER' THEN UPDATE SET t.val = s.val
142
- WHEN MATCHED AND s.__change_type = 'DELETE' THEN DELETE
143
- WHEN NOT MATCHED AND s.__change_type = 'INSERT' THEN INSERT (id, val) VALUES (s.id, s.val);
144
- ```
145
-
146
- ### 事务
147
-
148
- ```sql
149
- -- ❌ ClickZetta 不支持事务语法
150
- BEGIN;
151
- COMMIT;
152
- ROLLBACK;
153
- ```
154
-
155
- ---
156
-
157
- ## DQL 差异
158
-
159
- ### QUALIFY(窗口函数过滤)
160
-
161
- ```sql
162
- -- 两者都支持 QUALIFY
163
- SELECT * FROM orders
164
- QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) = 1;
165
- ```
166
-
167
- ### RECURSIVE CTE
168
-
169
- ```sql
170
- -- Databricks:支持 WITH RECURSIVE
171
- WITH RECURSIVE nums AS (
172
- SELECT 1 AS n
173
- UNION ALL
174
- SELECT n + 1 FROM nums WHERE n < 5
175
- )
176
- SELECT * FROM nums;
177
-
178
- -- ❌ ClickZetta:不支持 WITH RECURSIVE(验证失败)
179
- -- 替代方案:用 Python/ZettaPark 生成序列,或预建辅助表
180
- ```
181
-
182
- ### STRUCT 命名字段
183
-
184
- ```sql
185
- -- Databricks:支持命名字段
186
- SELECT STRUCT(1 AS id, 'Alice' AS name) AS person;
187
-
188
- -- ClickZetta:用 named_struct 实现命名字段
189
- SELECT named_struct('id', 1, 'name', 'Alice') AS person; -- ✅ 推荐
190
- SELECT STRUCT(1, 'Alice') AS person; -- 位置参数写法,访问时用 person.col1, person.col2
191
- ```
192
-
193
- ---
194
-
195
- ## 分区差异
196
-
197
- ### 分区函数
198
-
199
- ```sql
200
- -- Databricks:直接用列名
201
- CREATE TABLE t (...) PARTITIONED BY (year, month);
202
-
203
- -- ClickZetta:Iceberg 隐藏分区,用转换函数
204
- CREATE TABLE t (...) PARTITIONED BY (years(created_at)); -- 按年
205
- CREATE TABLE t (...) PARTITIONED BY (months(created_at)); -- 按月
206
- CREATE TABLE t (...) PARTITIONED BY (days(created_at)); -- 按天
207
- CREATE TABLE t (...) PARTITIONED BY (bucket(16, user_id)); -- 按 bucket
208
- ```
209
-
210
- ### 分区裁剪
211
-
212
- ```sql
213
- -- ✅ ClickZetta 的 YEAR() 函数在 WHERE 中能触发分区裁剪(引擎自动转换)
214
- SELECT * FROM t WHERE YEAR(dt) = 2024; -- 实际会转换为范围过滤
215
-
216
- -- ✅ 更推荐的写法(明确范围)
217
- SELECT * FROM t WHERE dt >= DATE '2024-01-01' AND dt < DATE '2025-01-01';
218
- ```
219
-
220
- ---
221
-
222
- ## Delta Lake 特有功能对照
223
-
224
- | Delta Lake 功能 | ClickZetta 对应 | 说明 |
225
- |---|---|---|
226
- | `OPTIMIZE ... ZORDER BY` | `OPTIMIZE table`(无 ZORDER) | 只做小文件合并 |
227
- | `VACUUM` | 自动管理 | 不需要手动 VACUUM |
228
- | `DESCRIBE HISTORY` | `DESC HISTORY table` | 相同功能 |
229
- | `RESTORE TABLE ... VERSION AS OF` | `RESTORE TABLE ... TIMESTAMP AS OF` | 按时间戳恢复 |
230
- | `Time Travel VERSION AS OF n` | `TIMESTAMP AS OF '...'` | ClickZetta 按时间戳,不按版本号 |
231
- | `enableChangeDataFeed` | TABLE STREAM | 不同实现方式 |
232
- | `MERGE ... WHEN NOT MATCHED BY SOURCE` | 不支持,需两步操作 | |
233
- | `APPLY CHANGES INTO` | TABLE STREAM + MERGE | |
234
- | `GENERATED ALWAYS AS IDENTITY` | `IDENTITY(seed)` | |
235
- | `TBLPROPERTIES` | `PROPERTIES` | |
236
- | `USING DELTA` | 不需要(默认 Parquet) | |
237
-
238
- ---
239
-
240
- ## 已验证的兼容性(Databricks 有,ClickZetta 也有)
241
-
242
- - `SEMI JOIN` / `ANTI JOIN` ✅
243
- - `LATERAL VIEW EXPLODE` / `POSEXPLODE` ✅
244
- - `QUALIFY` ✅
245
- - `MERGE INTO`(基本语法)✅
246
- - `GROUPING SETS` / `ROLLUP` / `CUBE` ✅
247
- - `WITH CTE`(非递归)✅
248
- - `STRUCT` / `ARRAY` / `MAP` 类型 ✅
249
- - `TRANSFORM` / `FILTER` / `AGGREGATE` 高阶函数 ✅
250
- - `ARRAY_AGG` / `COLLECT_LIST` / `COLLECT_SET` ✅
251
- - `REGEXP_EXTRACT` / `REGEXP_REPLACE` ✅
252
- - `DATE_TRUNC` / `DATE_FORMAT` ✅
253
- - `TRY_CAST` ✅
254
- - `IDENTITY` 列 ✅
255
- - `GENERATED ALWAYS AS (expr)` 生成列 ✅
256
- - `DEFAULT` 默认值 ✅
257
- - `OPTIMIZE`(小文件合并)✅
258
- - `DESC HISTORY` ✅
259
- - `RESTORE TABLE ... TIMESTAMP AS OF` ✅
260
- - `UNDROP TABLE` ✅