@clickzetta/cz-cli-darwin-arm64 0.3.92 → 0.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
  3. package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
  4. package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
  5. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
  6. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
  7. package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
  8. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
  9. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
  10. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
  11. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
  12. package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
  13. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
  14. package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
  15. package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
  16. package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
  17. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
  18. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
  19. package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
  20. package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
  21. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
  22. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
  23. package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
  24. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
  25. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
  26. package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
  27. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
  28. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
  29. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
  30. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
  31. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
  32. package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
  33. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
  34. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
  35. package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
  36. package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
  37. package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
  38. package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
  39. package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
  40. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
  41. package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
  42. package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
  43. package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
  44. package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
  45. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
  46. package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
  47. package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
  48. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
  49. package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
  50. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
  51. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
  52. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
  53. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
  54. package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
  55. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
  56. package/package.json +1 -1
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  58. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  59. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  60. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
  61. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  62. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  63. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  64. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  65. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  66. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  67. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  68. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  69. /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
@@ -1,260 +0,0 @@
1
- # Databricks → ClickZetta 迁移指南
2
-
3
- > 覆盖从 Databricks(Delta Lake)迁移到 ClickZetta Lakehouse 时的 SQL 兼容性问题,所有结论均经过真实 Lakehouse 验证。
4
-
5
- ---
6
-
7
- ## 对象概念映射
8
-
9
- | Databricks | ClickZetta | 说明 |
10
- |---|---|---|
11
- | Catalog(内部数据) | WORKSPACE | 顶层命名空间,Catalog.Schema.Table ≈ Workspace.Schema.Table |
12
- | Catalog(外部数据源) | EXTERNAL CATALOG | 联邦查询外部系统时的三层命名空间顶层(catalog.schema.table) |
13
- | Database / Schema | SCHEMA | 相同 |
14
- | Cluster / SQL Warehouse | VCLUSTER | 计算集群 |
15
- | Delta Table(普通表) | TABLE | ClickZetta 默认 Parquet 存储,支持 Iceberg 格式 |
16
- | Delta Table(增量计算) | DYNAMIC TABLE | 自动增量刷新,替代 DLT Pipeline |
17
- | External Location | STORAGE CONNECTION + EXTERNAL VOLUME | STORAGE CONNECTION 负责认证,EXTERNAL VOLUME 负责挂载路径 |
18
- | Unity Catalog(元数据治理) | 无完整对应 | ClickZetta 通过 RBAC + SCHEMA 权限管理实现部分治理能力 |
19
- | Unity Catalog(外部数据联邦查询) | EXTERNAL CATALOG | 支持 Hive、Iceberg REST、Databricks Unity Catalog 联邦查询 |
20
- | Structured Streaming | PIPE + TABLE STREAM | PIPE 负责持续摄入,TABLE STREAM 负责 CDC 变更捕获 |
21
- | APPLY CHANGES INTO(DLT CDC) | TABLE STREAM + MERGE INTO | 先建 Stream 捕获变更,再用 MERGE 消费 |
22
- | Auto Loader | PIPE(EVENT_NOTIFICATION 模式) | 文件上传即触发加载,仅支持 OSS/S3 |
23
-
24
- ---
25
-
26
- ## DDL 差异
27
-
28
- ### CREATE TABLE
29
-
30
- ```sql
31
- -- Databricks Delta Lake
32
- CREATE TABLE orders (
33
- id BIGINT GENERATED ALWAYS AS IDENTITY,
34
- customer_id INT,
35
- amount DECIMAL(18,2),
36
- status STRING DEFAULT 'pending',
37
- created_at TIMESTAMP DEFAULT current_timestamp(),
38
- meta STRUCT<city: STRING, zip: STRING>,
39
- tags ARRAY<STRING>
40
- )
41
- USING DELTA
42
- PARTITIONED BY (DATE(created_at))
43
- TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');
44
-
45
- -- ClickZetta 等价写法
46
- CREATE TABLE IF NOT EXISTS orders (
47
- id BIGINT IDENTITY(1), -- GENERATED ALWAYS AS IDENTITY → IDENTITY
48
- customer_id INT,
49
- amount DECIMAL(18,2),
50
- status STRING DEFAULT 'pending',
51
- created_at TIMESTAMP DEFAULT current_timestamp(),
52
- meta STRUCT<city:STRING, zip:STRING>,
53
- tags ARRAY<STRING>
54
- )
55
- -- 不需要 USING DELTA(默认 Parquet)
56
- PARTITIONED BY (days(created_at)); -- DATE() → days() 转换函数
57
- -- TBLPROPERTIES → PROPERTIES
58
- -- CDC 通过 TABLE STREAM 实现,不需要 enableChangeDataFeed
59
- ```
60
-
61
- ### 不支持的 DDL
62
-
63
- ```sql
64
- -- ❌ USING DELTA / USING PARQUET(ClickZetta 默认 Parquet,不需要指定)
65
- CREATE TABLE t (...) USING DELTA;
66
- CREATE TABLE t (...) USING PARQUET;
67
-
68
- -- ❌ TBLPROPERTIES(用 PROPERTIES)
69
- CREATE TABLE t (...) TBLPROPERTIES ('key' = 'value');
70
- -- ✅ ClickZetta
71
- CREATE TABLE t (...) PROPERTIES ('data_lifecycle' = '30');
72
-
73
- -- ❌ GENERATED ALWAYS AS IDENTITY(用 IDENTITY)
74
- id BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1)
75
- -- ✅ ClickZetta
76
- id BIGINT IDENTITY(1)
77
-
78
- -- ❌ OPTIMIZE ... ZORDER BY(ClickZetta 有 OPTIMIZE 但无 ZORDER)
79
- OPTIMIZE orders ZORDER BY (customer_id, created_at);
80
- -- ✅ ClickZetta(小文件合并,无 ZORDER)
81
- OPTIMIZE orders;
82
-
83
- -- ❌ VACUUM(ClickZetta 自动管理存储)
84
- VACUUM orders RETAIN 168 HOURS;
85
- ```
86
-
87
- ---
88
-
89
- ## ⚠️ 写入时类型转换(重要差异)
90
-
91
- Databricks 允许字符串隐式转换,ClickZetta **不允许**:
92
-
93
- ```sql
94
- -- ❌ Databricks 可以,ClickZetta 报错
95
- INSERT INTO t VALUES ('2024-01-15', 'true', '123');
96
-
97
- -- ✅ ClickZetta 必须显式转换
98
- INSERT INTO t VALUES (DATE '2024-01-15', TRUE, CAST('123' AS INT));
99
- ```
100
-
101
- 详见 [migration-snowflake.md](migration-snowflake.md) 中的类型转换表(规则相同)。
102
-
103
- ---
104
-
105
- ## DML 差异
106
-
107
- ### MERGE INTO(WHEN NOT MATCHED BY SOURCE)
108
-
109
- ```sql
110
- -- Databricks:支持 WHEN NOT MATCHED BY SOURCE
111
- MERGE INTO target t USING source s ON t.id = s.id
112
- WHEN MATCHED THEN UPDATE SET t.val = s.val
113
- WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
114
- WHEN NOT MATCHED BY SOURCE THEN DELETE; -- ❌ ClickZetta 不支持
115
-
116
- -- ClickZetta 替代方案:两步操作
117
- -- 步骤1:MERGE 处理匹配和新增
118
- MERGE INTO target t USING source s ON t.id = s.id
119
- WHEN MATCHED THEN UPDATE SET t.val = s.val
120
- WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val);
121
- -- 步骤2:DELETE 不在 source 中的行
122
- DELETE FROM target WHERE id NOT IN (SELECT id FROM source);
123
- ```
124
-
125
- ### APPLY CHANGES INTO(CDC)
126
-
127
- ```sql
128
- -- Databricks:APPLY CHANGES INTO(DLT 专有)
129
- APPLY CHANGES INTO target
130
- FROM source
131
- KEYS (id)
132
- SEQUENCE BY ts
133
- APPLY AS DELETE WHEN operation = 'DELETE';
134
-
135
- -- ClickZetta:用 TABLE STREAM + MERGE 实现
136
- CREATE TABLE STREAM source_stream ON TABLE source
137
- WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
138
-
139
- MERGE INTO target t
140
- USING source_stream s ON t.id = s.id
141
- WHEN MATCHED AND s.__change_type = 'UPDATE_AFTER' THEN UPDATE SET t.val = s.val
142
- WHEN MATCHED AND s.__change_type = 'DELETE' THEN DELETE
143
- WHEN NOT MATCHED AND s.__change_type = 'INSERT' THEN INSERT (id, val) VALUES (s.id, s.val);
144
- ```
145
-
146
- ### 事务
147
-
148
- ```sql
149
- -- ❌ ClickZetta 不支持事务语法
150
- BEGIN;
151
- COMMIT;
152
- ROLLBACK;
153
- ```
154
-
155
- ---
156
-
157
- ## DQL 差异
158
-
159
- ### QUALIFY(窗口函数过滤)
160
-
161
- ```sql
162
- -- 两者都支持 QUALIFY
163
- SELECT * FROM orders
164
- QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) = 1;
165
- ```
166
-
167
- ### RECURSIVE CTE
168
-
169
- ```sql
170
- -- Databricks:支持 WITH RECURSIVE
171
- WITH RECURSIVE nums AS (
172
- SELECT 1 AS n
173
- UNION ALL
174
- SELECT n + 1 FROM nums WHERE n < 5
175
- )
176
- SELECT * FROM nums;
177
-
178
- -- ❌ ClickZetta:不支持 WITH RECURSIVE(验证失败)
179
- -- 替代方案:用 Python/ZettaPark 生成序列,或预建辅助表
180
- ```
181
-
182
- ### STRUCT 命名字段
183
-
184
- ```sql
185
- -- Databricks:支持命名字段
186
- SELECT STRUCT(1 AS id, 'Alice' AS name) AS person;
187
-
188
- -- ClickZetta:用 named_struct 实现命名字段
189
- SELECT named_struct('id', 1, 'name', 'Alice') AS person; -- ✅ 推荐
190
- SELECT STRUCT(1, 'Alice') AS person; -- 位置参数写法,访问时用 person.col1, person.col2
191
- ```
192
-
193
- ---
194
-
195
- ## 分区差异
196
-
197
- ### 分区函数
198
-
199
- ```sql
200
- -- Databricks:直接用列名
201
- CREATE TABLE t (...) PARTITIONED BY (year, month);
202
-
203
- -- ClickZetta:Iceberg 隐藏分区,用转换函数
204
- CREATE TABLE t (...) PARTITIONED BY (years(created_at)); -- 按年
205
- CREATE TABLE t (...) PARTITIONED BY (months(created_at)); -- 按月
206
- CREATE TABLE t (...) PARTITIONED BY (days(created_at)); -- 按天
207
- CREATE TABLE t (...) PARTITIONED BY (bucket(16, user_id)); -- 按 bucket
208
- ```
209
-
210
- ### 分区裁剪
211
-
212
- ```sql
213
- -- ✅ ClickZetta 的 YEAR() 函数在 WHERE 中能触发分区裁剪(引擎自动转换)
214
- SELECT * FROM t WHERE YEAR(dt) = 2024; -- 实际会转换为范围过滤
215
-
216
- -- ✅ 更推荐的写法(明确范围)
217
- SELECT * FROM t WHERE dt >= DATE '2024-01-01' AND dt < DATE '2025-01-01';
218
- ```
219
-
220
- ---
221
-
222
- ## Delta Lake 特有功能对照
223
-
224
- | Delta Lake 功能 | ClickZetta 对应 | 说明 |
225
- |---|---|---|
226
- | `OPTIMIZE ... ZORDER BY` | `OPTIMIZE table`(无 ZORDER) | 只做小文件合并 |
227
- | `VACUUM` | 自动管理 | 不需要手动 VACUUM |
228
- | `DESCRIBE HISTORY` | `DESC HISTORY table` | 相同功能 |
229
- | `RESTORE TABLE ... VERSION AS OF` | `RESTORE TABLE ... TIMESTAMP AS OF` | 按时间戳恢复 |
230
- | `Time Travel VERSION AS OF n` | `TIMESTAMP AS OF '...'` | ClickZetta 按时间戳,不按版本号 |
231
- | `enableChangeDataFeed` | TABLE STREAM | 不同实现方式 |
232
- | `MERGE ... WHEN NOT MATCHED BY SOURCE` | 不支持,需两步操作 | |
233
- | `APPLY CHANGES INTO` | TABLE STREAM + MERGE | |
234
- | `GENERATED ALWAYS AS IDENTITY` | `IDENTITY(seed)` | |
235
- | `TBLPROPERTIES` | `PROPERTIES` | |
236
- | `USING DELTA` | 不需要(默认 Parquet) | |
237
-
238
- ---
239
-
240
- ## 已验证的兼容性(Databricks 有,ClickZetta 也有)
241
-
242
- - `SEMI JOIN` / `ANTI JOIN` ✅
243
- - `LATERAL VIEW EXPLODE` / `POSEXPLODE` ✅
244
- - `QUALIFY` ✅
245
- - `MERGE INTO`(基本语法)✅
246
- - `GROUPING SETS` / `ROLLUP` / `CUBE` ✅
247
- - `WITH CTE`(非递归)✅
248
- - `STRUCT` / `ARRAY` / `MAP` 类型 ✅
249
- - `TRANSFORM` / `FILTER` / `AGGREGATE` 高阶函数 ✅
250
- - `ARRAY_AGG` / `COLLECT_LIST` / `COLLECT_SET` ✅
251
- - `REGEXP_EXTRACT` / `REGEXP_REPLACE` ✅
252
- - `DATE_TRUNC` / `DATE_FORMAT` ✅
253
- - `TRY_CAST` ✅
254
- - `IDENTITY` 列 ✅
255
- - `GENERATED ALWAYS AS (expr)` 生成列 ✅
256
- - `DEFAULT` 默认值 ✅
257
- - `OPTIMIZE`(小文件合并)✅
258
- - `DESC HISTORY` ✅
259
- - `RESTORE TABLE ... TIMESTAMP AS OF` ✅
260
- - `UNDROP TABLE` ✅
@@ -1,346 +0,0 @@
1
- # ClickZetta Lakehouse vs Snowflake SQL 差异
2
-
3
- > 来源:产品文档 + 迁移实践
4
-
5
- ## 对象概念映射
6
-
7
- | ClickZetta Lakehouse | Snowflake | 说明 |
8
- |---|---|---|
9
- | WORKSPACE | DATABASE | 工作空间 ≈ 数据库 |
10
- | SCHEMA | SCHEMA | 相同 |
11
- | VCLUSTER | WAREHOUSE | 计算集群 |
12
- | STORAGE CONNECTION | STORAGE INTEGRATION | 对象存储认证 |
13
- | VOLUME | STAGE | 文件存储区域 |
14
- | TABLE | TABLE | 相同 |
15
- | PIPE | SNOWPIPE | 持续导入管道 |
16
- | TABLE STREAM | STREAM | 变更数据捕获 |
17
- | DYNAMIC TABLE | DYNAMIC TABLE | 增量计算表(语法不同) |
18
- | Studio 任务 | TASK | 调度任务 |
19
-
20
- ---
21
-
22
- ## DDL 差异
23
-
24
- ### CREATE OR REPLACE vs IF NOT EXISTS
25
-
26
- ```sql
27
- -- Snowflake:支持 CREATE OR REPLACE
28
- CREATE OR REPLACE TABLE orders (id INT, amount DECIMAL);
29
-
30
- -- ClickZetta:不支持 CREATE OR REPLACE,用 IF NOT EXISTS
31
- CREATE TABLE IF NOT EXISTS orders (id INT, amount DECIMAL);
32
- -- 修改已有表用 ALTER TABLE
33
- ```
34
-
35
- ### 注释语法
36
-
37
- ```sql
38
- -- Snowflake:支持 // 和 ///
39
- // 这是注释
40
- /// 这也是注释
41
-
42
- -- ClickZetta:只支持 -- 和 /* */
43
- -- 这是注释
44
- /* 这也是注释 */
45
- ```
46
-
47
- ### 数据类型差异
48
-
49
- | ClickZetta | Snowflake | 说明 |
50
- |---|---|---|
51
- | `STRING` | `VARCHAR` / `TEXT` | ClickZetta 推荐用 STRING |
52
- | `TIMESTAMP` | `TIMESTAMP_LTZ` | 本地时区时间戳 |
53
- | `TIMESTAMP_NTZ` | `TIMESTAMP_NTZ` | 无时区时间戳 |
54
- | `JSON` | `VARIANT` | 半结构化数据 |
55
- | `ARRAY<T>` | `ARRAY` | ClickZetta 需指定元素类型 |
56
- | `MAP<K,V>` | `OBJECT` | 键值对 |
57
- | `STRUCT<f:T,...>` | `OBJECT` | 结构体 |
58
- | `VECTOR(FLOAT, N)` | 无原生支持 | 向量类型(ClickZetta 特有) |
59
- | `TINYINT` | `NUMBER(3,0)` | 1字节整数 |
60
- | `SMALLINT` | `NUMBER(5,0)` | 2字节整数 |
61
- | 无 `NUMBER` | `NUMBER(p,s)` | ClickZetta 用 `DECIMAL(p,s)` |
62
-
63
- ### ⚠️ 写入时隐式类型转换(重要差异)
64
-
65
- Snowflake 允许写入时字符串隐式转换为日期/布尔等类型,ClickZetta **不允许**:
66
-
67
- | 操作 | Snowflake | ClickZetta |
68
- |---|---|---|
69
- | INSERT 字符串→DATE | ✅ 允许 | ❌ 报错,需 `CAST` 或 `DATE '...'` |
70
- | INSERT 字符串→TIMESTAMP | ✅ 允许 | ❌ 报错,需 `CAST` 或 `TIMESTAMP '...'` |
71
- | INSERT 字符串→BOOLEAN | ✅ 允许 | ❌ 报错,需 `TRUE`/`FALSE` 或 `CAST` |
72
- | INSERT 字符串→INT | ✅ 允许 | ❌ 报错,需 `CAST('123' AS INT)` |
73
- | INSERT 字符串→JSON | ✅ 允许 | ❌ 报错,需 `PARSE_JSON(...)` 或 `CAST` |
74
- | UPDATE 字符串→DATE | ✅ 允许 | ❌ 报错,需 `CAST` |
75
- | WHERE 字符串=DATE | ✅ 允许 | ✅ 允许(查询中可隐式比较) |
76
-
77
- ### 建表语法差异
78
-
79
- ```sql
80
- -- Snowflake:CLUSTER BY
81
- CREATE TABLE orders (id INT, dt DATE)
82
- CLUSTER BY (dt);
83
-
84
- -- ClickZetta:CLUSTERED BY + PARTITIONED BY
85
- CREATE TABLE orders (
86
- id INT,
87
- dt DATE
88
- )
89
- PARTITIONED BY (dt)
90
- CLUSTERED BY (id) INTO 8 BUCKETS;
91
-
92
- -- ClickZetta 特有:Sort Key(内联索引)
93
- CREATE TABLE orders (
94
- id INT,
95
- amount DECIMAL,
96
- INDEX amount_bf (amount) USING BLOOM_FILTER
97
- );
98
- ```
99
-
100
- ---
101
-
102
- ## DML 差异
103
-
104
- ### INSERT
105
-
106
- ```sql
107
- -- 两者基本相同,ClickZetta 额外支持:
108
- INSERT OVERWRITE TABLE orders SELECT * FROM staging; -- 覆盖写入(Hive 风格)
109
- INSERT INTO orders PARTITION (dt='2024-01-01') VALUES (1, 100); -- 静态分区
110
- ```
111
-
112
- ### UPDATE
113
-
114
- ```sql
115
- -- Snowflake
116
- UPDATE orders SET amount = amount * 1.1 WHERE status = 'VIP';
117
-
118
- -- ClickZetta:相同语法,额外支持 ORDER BY + LIMIT
119
- UPDATE orders SET amount = amount * 1.1
120
- WHERE status = 'VIP'
121
- ORDER BY created_at DESC
122
- LIMIT 1000;
123
- ```
124
-
125
- ### MERGE INTO
126
-
127
- ```sql
128
- -- ClickZetta 限制:WHEN NOT MATCHED 只能有一个
129
- -- Snowflake 支持多个 WHEN NOT MATCHED
130
-
131
- -- ClickZetta MERGE 示例(⚠️ UPDATE 必须在 DELETE 之前)
132
- MERGE INTO target t
133
- USING source s ON t.id = s.id
134
- WHEN MATCHED THEN UPDATE SET t.amount = s.amount
135
- WHEN MATCHED AND s.action = 'DELETE' THEN DELETE
136
- WHEN NOT MATCHED THEN INSERT (id, amount) VALUES (s.id, s.amount);
137
- ```
138
-
139
- ---
140
-
141
- ## 查询语法差异
142
-
143
- ### SELECT 扩展
144
-
145
- ```sql
146
- -- ClickZetta 特有:SELECT * EXCEPT(col)
147
- SELECT * EXCEPT(sensitive_col) FROM users;
148
-
149
- -- ClickZetta 特有:GROUP BY ALL(自动推断分组列)
150
- SELECT year, month, SUM(amount)
151
- FROM orders
152
- GROUP BY ALL;
153
-
154
- -- 两者都支持:GROUPING SETS / ROLLUP / CUBE
155
- SELECT region, product, SUM(sales)
156
- FROM orders
157
- GROUP BY GROUPING SETS ((region), (product), ());
158
- ```
159
-
160
- ### JSON 查询
161
-
162
- ```sql
163
- -- Snowflake:VARIANT 类型,用 : 访问
164
- SELECT data:address:city FROM users;
165
- SELECT data[0]:name FROM users;
166
-
167
- -- ClickZetta:JSON 类型,用 [] 访问
168
- SELECT data['address']['city'] FROM users;
169
- SELECT data['phoneNumbers'][0]['number'] FROM users;
170
-
171
- -- 两者都支持 PARSE_JSON
172
- SELECT parse_json('{"name":"Alice"}')['name'];
173
- ```
174
-
175
- ### LATERAL VIEW(展开数组)
176
-
177
- ```sql
178
- -- ClickZetta(Hive 风格)
179
- SELECT e.id, s.skill
180
- FROM employees e
181
- LATERAL VIEW EXPLODE(e.skills) s AS skill;
182
-
183
- -- Snowflake(用 FLATTEN)
184
- SELECT e.id, f.value::STRING AS skill
185
- FROM employees e,
186
- LATERAL FLATTEN(input => e.skills) f;
187
- ```
188
-
189
- ### QUALIFY(窗口函数过滤)
190
-
191
- ```sql
192
- -- 两者都支持 QUALIFY
193
- SELECT * FROM orders
194
- QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) = 1;
195
- ```
196
-
197
- ### PIVOT / UNPIVOT
198
-
199
- ```sql
200
- -- Snowflake 原生支持 PIVOT
201
- SELECT * FROM sales
202
- PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'));
203
-
204
- -- ClickZetta:用 CASE WHEN 实现
205
- SELECT
206
- product,
207
- SUM(CASE WHEN month = 'Jan' THEN amount END) AS Jan,
208
- SUM(CASE WHEN month = 'Feb' THEN amount END) AS Feb
209
- FROM sales GROUP BY product;
210
- ```
211
-
212
- ---
213
-
214
- ## 流(Stream)差异
215
-
216
- ```sql
217
- -- Snowflake Stream 元数据字段
218
- METADATA$ACTION -- 'INSERT' / 'DELETE'
219
- METADATA$ISUPDATE -- TRUE/FALSE
220
- METADATA$ROW_ID -- 行唯一标识
221
-
222
- -- ClickZetta Table Stream 元数据字段
223
- __change_type -- 'INSERT' / 'UPDATE_BEFORE' / 'UPDATE_AFTER' / 'DELETE'
224
- __commit_version -- 提交版本号
225
- __commit_timestamp -- 提交时间戳
226
- ```
227
-
228
- ---
229
-
230
- ## 动态表(Dynamic Table)差异
231
-
232
- ```sql
233
- -- Snowflake Dynamic Table
234
- CREATE DYNAMIC TABLE product_sales
235
- TARGET_LAG = '1 minutes'
236
- WAREHOUSE = my_warehouse
237
- AS SELECT ...;
238
-
239
- -- ClickZetta Dynamic Table(不支持 TARGET_LAG)
240
- CREATE DYNAMIC TABLE product_sales
241
- REFRESH INTERVAL 1 MINUTE VCLUSTER default_ap
242
- AS SELECT ...;
243
- ```
244
-
245
- ---
246
-
247
- ## 对象存储(Stage vs Volume)
248
-
249
- ```sql
250
- -- Snowflake:Stage
251
- CREATE STAGE my_stage
252
- URL = 's3://bucket/path'
253
- STORAGE_INTEGRATION = my_integration;
254
-
255
- COPY INTO orders FROM @my_stage/data.csv;
256
-
257
- -- ClickZetta:Volume
258
- CREATE EXTERNAL VOLUME my_volume
259
- LOCATION = 'oss://bucket/path'
260
- USING CONNECTION my_oss_conn;
261
-
262
- COPY INTO orders FROM VOLUME my_volume USING CSV;
263
- ```
264
-
265
- ---
266
-
267
- ## 函数差异
268
-
269
- ### 日期函数
270
-
271
- ```sql
272
- -- Snowflake
273
- DATEADD(day, 7, order_date)
274
- DATEDIFF(day, start_date, end_date)
275
- DATE_TRUNC('month', order_date)
276
- TO_DATE('2024-01-01')
277
- CURRENT_TIMESTAMP()
278
-
279
- -- ClickZetta(兼容 Hive/Spark 风格,同时也支持 Snowflake 风格)
280
- DATEADD(day, 7, order_date) -- ✅ 与 Snowflake 相同语法也支持
281
- DATE_ADD(order_date, 7) -- 或 Hive 风格
282
- DATEDIFF(end_date, start_date) -- 注意参数顺序相反!
283
- DATE_TRUNC('month', order_date) -- 相同
284
- TO_DATE('2024-01-01') -- 相同
285
- CURRENT_TIMESTAMP() -- 相同,也支持 NOW()
286
- ```
287
-
288
- ### 字符串函数
289
-
290
- ```sql
291
- -- Snowflake
292
- CHARINDEX('sub', str) -- 查找子串位置
293
- EDITDISTANCE(s1, s2) -- 编辑距离
294
- SOUNDEX(str) -- 语音相似度
295
- INITCAP(str) -- 首字母大写
296
-
297
- -- ClickZetta
298
- INSTR(str, 'sub') -- 查找子串位置(Hive 风格)
299
- LOCATE('sub', str) -- 也支持
300
- LEVENSHTEIN(s1, s2) -- 编辑距离
301
- INITCAP(str) -- 相同
302
- ```
303
-
304
- ### 条件函数
305
-
306
- ```sql
307
- -- Snowflake
308
- IFF(condition, true_val, false_val)
309
- ZEROIFNULL(expr)
310
- NULLIFZERO(expr)
311
- DECODE(expr, val1, res1, val2, res2, default)
312
-
313
- -- ClickZetta
314
- IF(condition, true_val, false_val) -- 或 CASE WHEN
315
- COALESCE(expr, 0) -- 替代 ZEROIFNULL
316
- NULLIF(expr, 0) -- 替代 NULLIFZERO
317
- DECODE(expr, val1, res1, ...) -- 支持(兼容)
318
- ```
319
-
320
- ### 聚合函数
321
-
322
- ```sql
323
- -- Snowflake
324
- LISTAGG(col, ',') WITHIN GROUP (ORDER BY col)
325
- ARRAY_AGG(col)
326
- OBJECT_AGG(key, value)
327
- APPROX_COUNT_DISTINCT(col)
328
-
329
- -- ClickZetta
330
- GROUP_CONCAT(col ORDER BY col SEPARATOR ',') -- 替代 LISTAGG
331
- ARRAY_AGG(col) -- 相同
332
- MAP_AGG(key, value) -- 替代 OBJECT_AGG
333
- APPROX_COUNT_DISTINCT(col) -- 相同
334
- ```
335
-
336
- ---
337
-
338
- ## 权限体系差异
339
-
340
- | 概念 | ClickZetta | Snowflake |
341
- |---|---|---|
342
- | 顶层容器 | WORKSPACE | DATABASE |
343
- | 权限对象 | VCLUSTER / SCHEMA / TABLE / VIEW | WAREHOUSE / DATABASE / SCHEMA / TABLE |
344
- | 角色授予 | `GRANT ROLE r TO USER u` | `GRANT ROLE r TO USER u` |
345
- | 查看权限 | `SHOW GRANTS TO USER u` | `SHOW GRANTS TO USER u` |
346
- | 系统角色 | instance_admin / workspace_admin / workspace_dev / workspace_analyst | ACCOUNTADMIN / SYSADMIN / USERADMIN |