@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
- package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
- package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
- package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
- package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
- package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
- package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
- package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
- package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
- package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
- package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
- package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
- package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
- package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
- package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
- package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
- package/package.json +1 -1
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
# Databricks → ClickZetta 迁移指南
|
|
2
|
-
|
|
3
|
-
> 覆盖从 Databricks(Delta Lake)迁移到 ClickZetta Lakehouse 时的 SQL 兼容性问题,所有结论均经过真实 Lakehouse 验证。
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## 对象概念映射
|
|
8
|
-
|
|
9
|
-
| Databricks | ClickZetta | 说明 |
|
|
10
|
-
|---|---|---|
|
|
11
|
-
| Catalog(内部数据) | WORKSPACE | 顶层命名空间,Catalog.Schema.Table ≈ Workspace.Schema.Table |
|
|
12
|
-
| Catalog(外部数据源) | EXTERNAL CATALOG | 联邦查询外部系统时的三层命名空间顶层(catalog.schema.table) |
|
|
13
|
-
| Database / Schema | SCHEMA | 相同 |
|
|
14
|
-
| Cluster / SQL Warehouse | VCLUSTER | 计算集群 |
|
|
15
|
-
| Delta Table(普通表) | TABLE | ClickZetta 默认 Parquet 存储,支持 Iceberg 格式 |
|
|
16
|
-
| Delta Table(增量计算) | DYNAMIC TABLE | 自动增量刷新,替代 DLT Pipeline |
|
|
17
|
-
| External Location | STORAGE CONNECTION + EXTERNAL VOLUME | STORAGE CONNECTION 负责认证,EXTERNAL VOLUME 负责挂载路径 |
|
|
18
|
-
| Unity Catalog(元数据治理) | 无完整对应 | ClickZetta 通过 RBAC + SCHEMA 权限管理实现部分治理能力 |
|
|
19
|
-
| Unity Catalog(外部数据联邦查询) | EXTERNAL CATALOG | 支持 Hive、Iceberg REST、Databricks Unity Catalog 联邦查询 |
|
|
20
|
-
| Structured Streaming | PIPE + TABLE STREAM | PIPE 负责持续摄入,TABLE STREAM 负责 CDC 变更捕获 |
|
|
21
|
-
| APPLY CHANGES INTO(DLT CDC) | TABLE STREAM + MERGE INTO | 先建 Stream 捕获变更,再用 MERGE 消费 |
|
|
22
|
-
| Auto Loader | PIPE(EVENT_NOTIFICATION 模式) | 文件上传即触发加载,仅支持 OSS/S3 |
|
|
23
|
-
|
|
24
|
-
---
|
|
25
|
-
|
|
26
|
-
## DDL 差异
|
|
27
|
-
|
|
28
|
-
### CREATE TABLE
|
|
29
|
-
|
|
30
|
-
```sql
|
|
31
|
-
-- Databricks Delta Lake
|
|
32
|
-
CREATE TABLE orders (
|
|
33
|
-
id BIGINT GENERATED ALWAYS AS IDENTITY,
|
|
34
|
-
customer_id INT,
|
|
35
|
-
amount DECIMAL(18,2),
|
|
36
|
-
status STRING DEFAULT 'pending',
|
|
37
|
-
created_at TIMESTAMP DEFAULT current_timestamp(),
|
|
38
|
-
meta STRUCT<city: STRING, zip: STRING>,
|
|
39
|
-
tags ARRAY<STRING>
|
|
40
|
-
)
|
|
41
|
-
USING DELTA
|
|
42
|
-
PARTITIONED BY (DATE(created_at))
|
|
43
|
-
TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');
|
|
44
|
-
|
|
45
|
-
-- ClickZetta 等价写法
|
|
46
|
-
CREATE TABLE IF NOT EXISTS orders (
|
|
47
|
-
id BIGINT IDENTITY(1), -- GENERATED ALWAYS AS IDENTITY → IDENTITY
|
|
48
|
-
customer_id INT,
|
|
49
|
-
amount DECIMAL(18,2),
|
|
50
|
-
status STRING DEFAULT 'pending',
|
|
51
|
-
created_at TIMESTAMP DEFAULT current_timestamp(),
|
|
52
|
-
meta STRUCT<city:STRING, zip:STRING>,
|
|
53
|
-
tags ARRAY<STRING>
|
|
54
|
-
)
|
|
55
|
-
-- 不需要 USING DELTA(默认 Parquet)
|
|
56
|
-
PARTITIONED BY (days(created_at)); -- DATE() → days() 转换函数
|
|
57
|
-
-- TBLPROPERTIES → PROPERTIES
|
|
58
|
-
-- CDC 通过 TABLE STREAM 实现,不需要 enableChangeDataFeed
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
### 不支持的 DDL
|
|
62
|
-
|
|
63
|
-
```sql
|
|
64
|
-
-- ❌ USING DELTA / USING PARQUET(ClickZetta 默认 Parquet,不需要指定)
|
|
65
|
-
CREATE TABLE t (...) USING DELTA;
|
|
66
|
-
CREATE TABLE t (...) USING PARQUET;
|
|
67
|
-
|
|
68
|
-
-- ❌ TBLPROPERTIES(用 PROPERTIES)
|
|
69
|
-
CREATE TABLE t (...) TBLPROPERTIES ('key' = 'value');
|
|
70
|
-
-- ✅ ClickZetta
|
|
71
|
-
CREATE TABLE t (...) PROPERTIES ('data_lifecycle' = '30');
|
|
72
|
-
|
|
73
|
-
-- ❌ GENERATED ALWAYS AS IDENTITY(用 IDENTITY)
|
|
74
|
-
id BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1)
|
|
75
|
-
-- ✅ ClickZetta
|
|
76
|
-
id BIGINT IDENTITY(1)
|
|
77
|
-
|
|
78
|
-
-- ❌ OPTIMIZE ... ZORDER BY(ClickZetta 有 OPTIMIZE 但无 ZORDER)
|
|
79
|
-
OPTIMIZE orders ZORDER BY (customer_id, created_at);
|
|
80
|
-
-- ✅ ClickZetta(小文件合并,无 ZORDER)
|
|
81
|
-
OPTIMIZE orders;
|
|
82
|
-
|
|
83
|
-
-- ❌ VACUUM(ClickZetta 自动管理存储)
|
|
84
|
-
VACUUM orders RETAIN 168 HOURS;
|
|
85
|
-
```
|
|
86
|
-
|
|
87
|
-
---
|
|
88
|
-
|
|
89
|
-
## ⚠️ 写入时类型转换(重要差异)
|
|
90
|
-
|
|
91
|
-
Databricks 允许字符串隐式转换,ClickZetta **不允许**:
|
|
92
|
-
|
|
93
|
-
```sql
|
|
94
|
-
-- ❌ Databricks 可以,ClickZetta 报错
|
|
95
|
-
INSERT INTO t VALUES ('2024-01-15', 'true', '123');
|
|
96
|
-
|
|
97
|
-
-- ✅ ClickZetta 必须显式转换
|
|
98
|
-
INSERT INTO t VALUES (DATE '2024-01-15', TRUE, CAST('123' AS INT));
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
详见 [migration-snowflake.md](migration-snowflake.md) 中的类型转换表(规则相同)。
|
|
102
|
-
|
|
103
|
-
---
|
|
104
|
-
|
|
105
|
-
## DML 差异
|
|
106
|
-
|
|
107
|
-
### MERGE INTO(WHEN NOT MATCHED BY SOURCE)
|
|
108
|
-
|
|
109
|
-
```sql
|
|
110
|
-
-- Databricks:支持 WHEN NOT MATCHED BY SOURCE
|
|
111
|
-
MERGE INTO target t USING source s ON t.id = s.id
|
|
112
|
-
WHEN MATCHED THEN UPDATE SET t.val = s.val
|
|
113
|
-
WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
|
|
114
|
-
WHEN NOT MATCHED BY SOURCE THEN DELETE; -- ❌ ClickZetta 不支持
|
|
115
|
-
|
|
116
|
-
-- ClickZetta 替代方案:两步操作
|
|
117
|
-
-- 步骤1:MERGE 处理匹配和新增
|
|
118
|
-
MERGE INTO target t USING source s ON t.id = s.id
|
|
119
|
-
WHEN MATCHED THEN UPDATE SET t.val = s.val
|
|
120
|
-
WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val);
|
|
121
|
-
-- 步骤2:DELETE 不在 source 中的行
|
|
122
|
-
DELETE FROM target WHERE id NOT IN (SELECT id FROM source);
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### APPLY CHANGES INTO(CDC)
|
|
126
|
-
|
|
127
|
-
```sql
|
|
128
|
-
-- Databricks:APPLY CHANGES INTO(DLT 专有)
|
|
129
|
-
APPLY CHANGES INTO target
|
|
130
|
-
FROM source
|
|
131
|
-
KEYS (id)
|
|
132
|
-
SEQUENCE BY ts
|
|
133
|
-
APPLY AS DELETE WHEN operation = 'DELETE';
|
|
134
|
-
|
|
135
|
-
-- ClickZetta:用 TABLE STREAM + MERGE 实现
|
|
136
|
-
CREATE TABLE STREAM source_stream ON TABLE source
|
|
137
|
-
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
|
|
138
|
-
|
|
139
|
-
MERGE INTO target t
|
|
140
|
-
USING source_stream s ON t.id = s.id
|
|
141
|
-
WHEN MATCHED AND s.__change_type = 'UPDATE_AFTER' THEN UPDATE SET t.val = s.val
|
|
142
|
-
WHEN MATCHED AND s.__change_type = 'DELETE' THEN DELETE
|
|
143
|
-
WHEN NOT MATCHED AND s.__change_type = 'INSERT' THEN INSERT (id, val) VALUES (s.id, s.val);
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
### 事务
|
|
147
|
-
|
|
148
|
-
```sql
|
|
149
|
-
-- ❌ ClickZetta 不支持事务语法
|
|
150
|
-
BEGIN;
|
|
151
|
-
COMMIT;
|
|
152
|
-
ROLLBACK;
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
---
|
|
156
|
-
|
|
157
|
-
## DQL 差异
|
|
158
|
-
|
|
159
|
-
### QUALIFY(窗口函数过滤)
|
|
160
|
-
|
|
161
|
-
```sql
|
|
162
|
-
-- 两者都支持 QUALIFY
|
|
163
|
-
SELECT * FROM orders
|
|
164
|
-
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) = 1;
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
### RECURSIVE CTE
|
|
168
|
-
|
|
169
|
-
```sql
|
|
170
|
-
-- Databricks:支持 WITH RECURSIVE
|
|
171
|
-
WITH RECURSIVE nums AS (
|
|
172
|
-
SELECT 1 AS n
|
|
173
|
-
UNION ALL
|
|
174
|
-
SELECT n + 1 FROM nums WHERE n < 5
|
|
175
|
-
)
|
|
176
|
-
SELECT * FROM nums;
|
|
177
|
-
|
|
178
|
-
-- ❌ ClickZetta:不支持 WITH RECURSIVE(验证失败)
|
|
179
|
-
-- 替代方案:用 Python/ZettaPark 生成序列,或预建辅助表
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
### STRUCT 命名字段
|
|
183
|
-
|
|
184
|
-
```sql
|
|
185
|
-
-- Databricks:支持命名字段
|
|
186
|
-
SELECT STRUCT(1 AS id, 'Alice' AS name) AS person;
|
|
187
|
-
|
|
188
|
-
-- ClickZetta:用 named_struct 实现命名字段
|
|
189
|
-
SELECT named_struct('id', 1, 'name', 'Alice') AS person; -- ✅ 推荐
|
|
190
|
-
SELECT STRUCT(1, 'Alice') AS person; -- 位置参数写法,访问时用 person.col1, person.col2
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
---
|
|
194
|
-
|
|
195
|
-
## 分区差异
|
|
196
|
-
|
|
197
|
-
### 分区函数
|
|
198
|
-
|
|
199
|
-
```sql
|
|
200
|
-
-- Databricks:直接用列名
|
|
201
|
-
CREATE TABLE t (...) PARTITIONED BY (year, month);
|
|
202
|
-
|
|
203
|
-
-- ClickZetta:Iceberg 隐藏分区,用转换函数
|
|
204
|
-
CREATE TABLE t (...) PARTITIONED BY (years(created_at)); -- 按年
|
|
205
|
-
CREATE TABLE t (...) PARTITIONED BY (months(created_at)); -- 按月
|
|
206
|
-
CREATE TABLE t (...) PARTITIONED BY (days(created_at)); -- 按天
|
|
207
|
-
CREATE TABLE t (...) PARTITIONED BY (bucket(16, user_id)); -- 按 bucket
|
|
208
|
-
```
|
|
209
|
-
|
|
210
|
-
### 分区裁剪
|
|
211
|
-
|
|
212
|
-
```sql
|
|
213
|
-
-- ✅ ClickZetta 的 YEAR() 函数在 WHERE 中能触发分区裁剪(引擎自动转换)
|
|
214
|
-
SELECT * FROM t WHERE YEAR(dt) = 2024; -- 实际会转换为范围过滤
|
|
215
|
-
|
|
216
|
-
-- ✅ 更推荐的写法(明确范围)
|
|
217
|
-
SELECT * FROM t WHERE dt >= DATE '2024-01-01' AND dt < DATE '2025-01-01';
|
|
218
|
-
```
|
|
219
|
-
|
|
220
|
-
---
|
|
221
|
-
|
|
222
|
-
## Delta Lake 特有功能对照
|
|
223
|
-
|
|
224
|
-
| Delta Lake 功能 | ClickZetta 对应 | 说明 |
|
|
225
|
-
|---|---|---|
|
|
226
|
-
| `OPTIMIZE ... ZORDER BY` | `OPTIMIZE table`(无 ZORDER) | 只做小文件合并 |
|
|
227
|
-
| `VACUUM` | 自动管理 | 不需要手动 VACUUM |
|
|
228
|
-
| `DESCRIBE HISTORY` | `DESC HISTORY table` | 相同功能 |
|
|
229
|
-
| `RESTORE TABLE ... VERSION AS OF` | `RESTORE TABLE ... TIMESTAMP AS OF` | 按时间戳恢复 |
|
|
230
|
-
| `Time Travel VERSION AS OF n` | `TIMESTAMP AS OF '...'` | ClickZetta 按时间戳,不按版本号 |
|
|
231
|
-
| `enableChangeDataFeed` | TABLE STREAM | 不同实现方式 |
|
|
232
|
-
| `MERGE ... WHEN NOT MATCHED BY SOURCE` | 不支持,需两步操作 | |
|
|
233
|
-
| `APPLY CHANGES INTO` | TABLE STREAM + MERGE | |
|
|
234
|
-
| `GENERATED ALWAYS AS IDENTITY` | `IDENTITY(seed)` | |
|
|
235
|
-
| `TBLPROPERTIES` | `PROPERTIES` | |
|
|
236
|
-
| `USING DELTA` | 不需要(默认 Parquet) | |
|
|
237
|
-
|
|
238
|
-
---
|
|
239
|
-
|
|
240
|
-
## 已验证的兼容性(Databricks 有,ClickZetta 也有)
|
|
241
|
-
|
|
242
|
-
- `SEMI JOIN` / `ANTI JOIN` ✅
|
|
243
|
-
- `LATERAL VIEW EXPLODE` / `POSEXPLODE` ✅
|
|
244
|
-
- `QUALIFY` ✅
|
|
245
|
-
- `MERGE INTO`(基本语法)✅
|
|
246
|
-
- `GROUPING SETS` / `ROLLUP` / `CUBE` ✅
|
|
247
|
-
- `WITH CTE`(非递归)✅
|
|
248
|
-
- `STRUCT` / `ARRAY` / `MAP` 类型 ✅
|
|
249
|
-
- `TRANSFORM` / `FILTER` / `AGGREGATE` 高阶函数 ✅
|
|
250
|
-
- `ARRAY_AGG` / `COLLECT_LIST` / `COLLECT_SET` ✅
|
|
251
|
-
- `REGEXP_EXTRACT` / `REGEXP_REPLACE` ✅
|
|
252
|
-
- `DATE_TRUNC` / `DATE_FORMAT` ✅
|
|
253
|
-
- `TRY_CAST` ✅
|
|
254
|
-
- `IDENTITY` 列 ✅
|
|
255
|
-
- `GENERATED ALWAYS AS (expr)` 生成列 ✅
|
|
256
|
-
- `DEFAULT` 默认值 ✅
|
|
257
|
-
- `OPTIMIZE`(小文件合并)✅
|
|
258
|
-
- `DESC HISTORY` ✅
|
|
259
|
-
- `RESTORE TABLE ... TIMESTAMP AS OF` ✅
|
|
260
|
-
- `UNDROP TABLE` ✅
|
|
@@ -1,346 +0,0 @@
|
|
|
1
|
-
# ClickZetta Lakehouse vs Snowflake SQL 差异
|
|
2
|
-
|
|
3
|
-
> 来源:产品文档 + 迁移实践
|
|
4
|
-
|
|
5
|
-
## 对象概念映射
|
|
6
|
-
|
|
7
|
-
| ClickZetta Lakehouse | Snowflake | 说明 |
|
|
8
|
-
|---|---|---|
|
|
9
|
-
| WORKSPACE | DATABASE | 工作空间 ≈ 数据库 |
|
|
10
|
-
| SCHEMA | SCHEMA | 相同 |
|
|
11
|
-
| VCLUSTER | WAREHOUSE | 计算集群 |
|
|
12
|
-
| STORAGE CONNECTION | STORAGE INTEGRATION | 对象存储认证 |
|
|
13
|
-
| VOLUME | STAGE | 文件存储区域 |
|
|
14
|
-
| TABLE | TABLE | 相同 |
|
|
15
|
-
| PIPE | SNOWPIPE | 持续导入管道 |
|
|
16
|
-
| TABLE STREAM | STREAM | 变更数据捕获 |
|
|
17
|
-
| DYNAMIC TABLE | DYNAMIC TABLE | 增量计算表(语法不同) |
|
|
18
|
-
| Studio 任务 | TASK | 调度任务 |
|
|
19
|
-
|
|
20
|
-
---
|
|
21
|
-
|
|
22
|
-
## DDL 差异
|
|
23
|
-
|
|
24
|
-
### CREATE OR REPLACE vs IF NOT EXISTS
|
|
25
|
-
|
|
26
|
-
```sql
|
|
27
|
-
-- Snowflake:支持 CREATE OR REPLACE
|
|
28
|
-
CREATE OR REPLACE TABLE orders (id INT, amount DECIMAL);
|
|
29
|
-
|
|
30
|
-
-- ClickZetta:不支持 CREATE OR REPLACE,用 IF NOT EXISTS
|
|
31
|
-
CREATE TABLE IF NOT EXISTS orders (id INT, amount DECIMAL);
|
|
32
|
-
-- 修改已有表用 ALTER TABLE
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
### 注释语法
|
|
36
|
-
|
|
37
|
-
```sql
|
|
38
|
-
-- Snowflake:支持 // 和 ///
|
|
39
|
-
// 这是注释
|
|
40
|
-
/// 这也是注释
|
|
41
|
-
|
|
42
|
-
-- ClickZetta:只支持 -- 和 /* */
|
|
43
|
-
-- 这是注释
|
|
44
|
-
/* 这也是注释 */
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
### 数据类型差异
|
|
48
|
-
|
|
49
|
-
| ClickZetta | Snowflake | 说明 |
|
|
50
|
-
|---|---|---|
|
|
51
|
-
| `STRING` | `VARCHAR` / `TEXT` | ClickZetta 推荐用 STRING |
|
|
52
|
-
| `TIMESTAMP` | `TIMESTAMP_LTZ` | 本地时区时间戳 |
|
|
53
|
-
| `TIMESTAMP_NTZ` | `TIMESTAMP_NTZ` | 无时区时间戳 |
|
|
54
|
-
| `JSON` | `VARIANT` | 半结构化数据 |
|
|
55
|
-
| `ARRAY<T>` | `ARRAY` | ClickZetta 需指定元素类型 |
|
|
56
|
-
| `MAP<K,V>` | `OBJECT` | 键值对 |
|
|
57
|
-
| `STRUCT<f:T,...>` | `OBJECT` | 结构体 |
|
|
58
|
-
| `VECTOR(FLOAT, N)` | 无原生支持 | 向量类型(ClickZetta 特有) |
|
|
59
|
-
| `TINYINT` | `NUMBER(3,0)` | 1字节整数 |
|
|
60
|
-
| `SMALLINT` | `NUMBER(5,0)` | 2字节整数 |
|
|
61
|
-
| 无 `NUMBER` | `NUMBER(p,s)` | ClickZetta 用 `DECIMAL(p,s)` |
|
|
62
|
-
|
|
63
|
-
### ⚠️ 写入时隐式类型转换(重要差异)
|
|
64
|
-
|
|
65
|
-
Snowflake 允许写入时字符串隐式转换为日期/布尔等类型,ClickZetta **不允许**:
|
|
66
|
-
|
|
67
|
-
| 操作 | Snowflake | ClickZetta |
|
|
68
|
-
|---|---|---|
|
|
69
|
-
| INSERT 字符串→DATE | ✅ 允许 | ❌ 报错,需 `CAST` 或 `DATE '...'` |
|
|
70
|
-
| INSERT 字符串→TIMESTAMP | ✅ 允许 | ❌ 报错,需 `CAST` 或 `TIMESTAMP '...'` |
|
|
71
|
-
| INSERT 字符串→BOOLEAN | ✅ 允许 | ❌ 报错,需 `TRUE`/`FALSE` 或 `CAST` |
|
|
72
|
-
| INSERT 字符串→INT | ✅ 允许 | ❌ 报错,需 `CAST('123' AS INT)` |
|
|
73
|
-
| INSERT 字符串→JSON | ✅ 允许 | ❌ 报错,需 `PARSE_JSON(...)` 或 `CAST` |
|
|
74
|
-
| UPDATE 字符串→DATE | ✅ 允许 | ❌ 报错,需 `CAST` |
|
|
75
|
-
| WHERE 字符串=DATE | ✅ 允许 | ✅ 允许(查询中可隐式比较) |
|
|
76
|
-
|
|
77
|
-
### 建表语法差异
|
|
78
|
-
|
|
79
|
-
```sql
|
|
80
|
-
-- Snowflake:CLUSTER BY
|
|
81
|
-
CREATE TABLE orders (id INT, dt DATE)
|
|
82
|
-
CLUSTER BY (dt);
|
|
83
|
-
|
|
84
|
-
-- ClickZetta:CLUSTERED BY + PARTITIONED BY
|
|
85
|
-
CREATE TABLE orders (
|
|
86
|
-
id INT,
|
|
87
|
-
dt DATE
|
|
88
|
-
)
|
|
89
|
-
PARTITIONED BY (dt)
|
|
90
|
-
CLUSTERED BY (id) INTO 8 BUCKETS;
|
|
91
|
-
|
|
92
|
-
-- ClickZetta 特有:Sort Key(内联索引)
|
|
93
|
-
CREATE TABLE orders (
|
|
94
|
-
id INT,
|
|
95
|
-
amount DECIMAL,
|
|
96
|
-
INDEX amount_bf (amount) USING BLOOM_FILTER
|
|
97
|
-
);
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
---
|
|
101
|
-
|
|
102
|
-
## DML 差异
|
|
103
|
-
|
|
104
|
-
### INSERT
|
|
105
|
-
|
|
106
|
-
```sql
|
|
107
|
-
-- 两者基本相同,ClickZetta 额外支持:
|
|
108
|
-
INSERT OVERWRITE TABLE orders SELECT * FROM staging; -- 覆盖写入(Hive 风格)
|
|
109
|
-
INSERT INTO orders PARTITION (dt='2024-01-01') VALUES (1, 100); -- 静态分区
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
### UPDATE
|
|
113
|
-
|
|
114
|
-
```sql
|
|
115
|
-
-- Snowflake
|
|
116
|
-
UPDATE orders SET amount = amount * 1.1 WHERE status = 'VIP';
|
|
117
|
-
|
|
118
|
-
-- ClickZetta:相同语法,额外支持 ORDER BY + LIMIT
|
|
119
|
-
UPDATE orders SET amount = amount * 1.1
|
|
120
|
-
WHERE status = 'VIP'
|
|
121
|
-
ORDER BY created_at DESC
|
|
122
|
-
LIMIT 1000;
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### MERGE INTO
|
|
126
|
-
|
|
127
|
-
```sql
|
|
128
|
-
-- ClickZetta 限制:WHEN NOT MATCHED 只能有一个
|
|
129
|
-
-- Snowflake 支持多个 WHEN NOT MATCHED
|
|
130
|
-
|
|
131
|
-
-- ClickZetta MERGE 示例(⚠️ UPDATE 必须在 DELETE 之前)
|
|
132
|
-
MERGE INTO target t
|
|
133
|
-
USING source s ON t.id = s.id
|
|
134
|
-
WHEN MATCHED THEN UPDATE SET t.amount = s.amount
|
|
135
|
-
WHEN MATCHED AND s.action = 'DELETE' THEN DELETE
|
|
136
|
-
WHEN NOT MATCHED THEN INSERT (id, amount) VALUES (s.id, s.amount);
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
---
|
|
140
|
-
|
|
141
|
-
## 查询语法差异
|
|
142
|
-
|
|
143
|
-
### SELECT 扩展
|
|
144
|
-
|
|
145
|
-
```sql
|
|
146
|
-
-- ClickZetta 特有:SELECT * EXCEPT(col)
|
|
147
|
-
SELECT * EXCEPT(sensitive_col) FROM users;
|
|
148
|
-
|
|
149
|
-
-- ClickZetta 特有:GROUP BY ALL(自动推断分组列)
|
|
150
|
-
SELECT year, month, SUM(amount)
|
|
151
|
-
FROM orders
|
|
152
|
-
GROUP BY ALL;
|
|
153
|
-
|
|
154
|
-
-- 两者都支持:GROUPING SETS / ROLLUP / CUBE
|
|
155
|
-
SELECT region, product, SUM(sales)
|
|
156
|
-
FROM orders
|
|
157
|
-
GROUP BY GROUPING SETS ((region), (product), ());
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
### JSON 查询
|
|
161
|
-
|
|
162
|
-
```sql
|
|
163
|
-
-- Snowflake:VARIANT 类型,用 : 访问
|
|
164
|
-
SELECT data:address:city FROM users;
|
|
165
|
-
SELECT data[0]:name FROM users;
|
|
166
|
-
|
|
167
|
-
-- ClickZetta:JSON 类型,用 [] 访问
|
|
168
|
-
SELECT data['address']['city'] FROM users;
|
|
169
|
-
SELECT data['phoneNumbers'][0]['number'] FROM users;
|
|
170
|
-
|
|
171
|
-
-- 两者都支持 PARSE_JSON
|
|
172
|
-
SELECT parse_json('{"name":"Alice"}')['name'];
|
|
173
|
-
```
|
|
174
|
-
|
|
175
|
-
### LATERAL VIEW(展开数组)
|
|
176
|
-
|
|
177
|
-
```sql
|
|
178
|
-
-- ClickZetta(Hive 风格)
|
|
179
|
-
SELECT e.id, s.skill
|
|
180
|
-
FROM employees e
|
|
181
|
-
LATERAL VIEW EXPLODE(e.skills) s AS skill;
|
|
182
|
-
|
|
183
|
-
-- Snowflake(用 FLATTEN)
|
|
184
|
-
SELECT e.id, f.value::STRING AS skill
|
|
185
|
-
FROM employees e,
|
|
186
|
-
LATERAL FLATTEN(input => e.skills) f;
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
### QUALIFY(窗口函数过滤)
|
|
190
|
-
|
|
191
|
-
```sql
|
|
192
|
-
-- 两者都支持 QUALIFY
|
|
193
|
-
SELECT * FROM orders
|
|
194
|
-
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) = 1;
|
|
195
|
-
```
|
|
196
|
-
|
|
197
|
-
### PIVOT / UNPIVOT
|
|
198
|
-
|
|
199
|
-
```sql
|
|
200
|
-
-- Snowflake 原生支持 PIVOT
|
|
201
|
-
SELECT * FROM sales
|
|
202
|
-
PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'));
|
|
203
|
-
|
|
204
|
-
-- ClickZetta:用 CASE WHEN 实现
|
|
205
|
-
SELECT
|
|
206
|
-
product,
|
|
207
|
-
SUM(CASE WHEN month = 'Jan' THEN amount END) AS Jan,
|
|
208
|
-
SUM(CASE WHEN month = 'Feb' THEN amount END) AS Feb
|
|
209
|
-
FROM sales GROUP BY product;
|
|
210
|
-
```
|
|
211
|
-
|
|
212
|
-
---
|
|
213
|
-
|
|
214
|
-
## 流(Stream)差异
|
|
215
|
-
|
|
216
|
-
```sql
|
|
217
|
-
-- Snowflake Stream 元数据字段
|
|
218
|
-
METADATA$ACTION -- 'INSERT' / 'DELETE'
|
|
219
|
-
METADATA$ISUPDATE -- TRUE/FALSE
|
|
220
|
-
METADATA$ROW_ID -- 行唯一标识
|
|
221
|
-
|
|
222
|
-
-- ClickZetta Table Stream 元数据字段
|
|
223
|
-
__change_type -- 'INSERT' / 'UPDATE_BEFORE' / 'UPDATE_AFTER' / 'DELETE'
|
|
224
|
-
__commit_version -- 提交版本号
|
|
225
|
-
__commit_timestamp -- 提交时间戳
|
|
226
|
-
```
|
|
227
|
-
|
|
228
|
-
---
|
|
229
|
-
|
|
230
|
-
## 动态表(Dynamic Table)差异
|
|
231
|
-
|
|
232
|
-
```sql
|
|
233
|
-
-- Snowflake Dynamic Table
|
|
234
|
-
CREATE DYNAMIC TABLE product_sales
|
|
235
|
-
TARGET_LAG = '1 minutes'
|
|
236
|
-
WAREHOUSE = my_warehouse
|
|
237
|
-
AS SELECT ...;
|
|
238
|
-
|
|
239
|
-
-- ClickZetta Dynamic Table(不支持 TARGET_LAG)
|
|
240
|
-
CREATE DYNAMIC TABLE product_sales
|
|
241
|
-
REFRESH INTERVAL 1 MINUTE VCLUSTER default_ap
|
|
242
|
-
AS SELECT ...;
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
---
|
|
246
|
-
|
|
247
|
-
## 对象存储(Stage vs Volume)
|
|
248
|
-
|
|
249
|
-
```sql
|
|
250
|
-
-- Snowflake:Stage
|
|
251
|
-
CREATE STAGE my_stage
|
|
252
|
-
URL = 's3://bucket/path'
|
|
253
|
-
STORAGE_INTEGRATION = my_integration;
|
|
254
|
-
|
|
255
|
-
COPY INTO orders FROM @my_stage/data.csv;
|
|
256
|
-
|
|
257
|
-
-- ClickZetta:Volume
|
|
258
|
-
CREATE EXTERNAL VOLUME my_volume
|
|
259
|
-
LOCATION = 'oss://bucket/path'
|
|
260
|
-
USING CONNECTION my_oss_conn;
|
|
261
|
-
|
|
262
|
-
COPY INTO orders FROM VOLUME my_volume USING CSV;
|
|
263
|
-
```
|
|
264
|
-
|
|
265
|
-
---
|
|
266
|
-
|
|
267
|
-
## 函数差异
|
|
268
|
-
|
|
269
|
-
### 日期函数
|
|
270
|
-
|
|
271
|
-
```sql
|
|
272
|
-
-- Snowflake
|
|
273
|
-
DATEADD(day, 7, order_date)
|
|
274
|
-
DATEDIFF(day, start_date, end_date)
|
|
275
|
-
DATE_TRUNC('month', order_date)
|
|
276
|
-
TO_DATE('2024-01-01')
|
|
277
|
-
CURRENT_TIMESTAMP()
|
|
278
|
-
|
|
279
|
-
-- ClickZetta(兼容 Hive/Spark 风格,同时也支持 Snowflake 风格)
|
|
280
|
-
DATEADD(day, 7, order_date) -- ✅ 与 Snowflake 相同语法也支持
|
|
281
|
-
DATE_ADD(order_date, 7) -- 或 Hive 风格
|
|
282
|
-
DATEDIFF(end_date, start_date) -- 注意参数顺序相反!
|
|
283
|
-
DATE_TRUNC('month', order_date) -- 相同
|
|
284
|
-
TO_DATE('2024-01-01') -- 相同
|
|
285
|
-
CURRENT_TIMESTAMP() -- 相同,也支持 NOW()
|
|
286
|
-
```
|
|
287
|
-
|
|
288
|
-
### 字符串函数
|
|
289
|
-
|
|
290
|
-
```sql
|
|
291
|
-
-- Snowflake
|
|
292
|
-
CHARINDEX('sub', str) -- 查找子串位置
|
|
293
|
-
EDITDISTANCE(s1, s2) -- 编辑距离
|
|
294
|
-
SOUNDEX(str) -- 语音相似度
|
|
295
|
-
INITCAP(str) -- 首字母大写
|
|
296
|
-
|
|
297
|
-
-- ClickZetta
|
|
298
|
-
INSTR(str, 'sub') -- 查找子串位置(Hive 风格)
|
|
299
|
-
LOCATE('sub', str) -- 也支持
|
|
300
|
-
LEVENSHTEIN(s1, s2) -- 编辑距离
|
|
301
|
-
INITCAP(str) -- 相同
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
### 条件函数
|
|
305
|
-
|
|
306
|
-
```sql
|
|
307
|
-
-- Snowflake
|
|
308
|
-
IFF(condition, true_val, false_val)
|
|
309
|
-
ZEROIFNULL(expr)
|
|
310
|
-
NULLIFZERO(expr)
|
|
311
|
-
DECODE(expr, val1, res1, val2, res2, default)
|
|
312
|
-
|
|
313
|
-
-- ClickZetta
|
|
314
|
-
IF(condition, true_val, false_val) -- 或 CASE WHEN
|
|
315
|
-
COALESCE(expr, 0) -- 替代 ZEROIFNULL
|
|
316
|
-
NULLIF(expr, 0) -- 替代 NULLIFZERO
|
|
317
|
-
DECODE(expr, val1, res1, ...) -- 支持(兼容)
|
|
318
|
-
```
|
|
319
|
-
|
|
320
|
-
### 聚合函数
|
|
321
|
-
|
|
322
|
-
```sql
|
|
323
|
-
-- Snowflake
|
|
324
|
-
LISTAGG(col, ',') WITHIN GROUP (ORDER BY col)
|
|
325
|
-
ARRAY_AGG(col)
|
|
326
|
-
OBJECT_AGG(key, value)
|
|
327
|
-
APPROX_COUNT_DISTINCT(col)
|
|
328
|
-
|
|
329
|
-
-- ClickZetta
|
|
330
|
-
GROUP_CONCAT(col ORDER BY col SEPARATOR ',') -- 替代 LISTAGG
|
|
331
|
-
ARRAY_AGG(col) -- 相同
|
|
332
|
-
MAP_AGG(key, value) -- 替代 OBJECT_AGG
|
|
333
|
-
APPROX_COUNT_DISTINCT(col) -- 相同
|
|
334
|
-
```
|
|
335
|
-
|
|
336
|
-
---
|
|
337
|
-
|
|
338
|
-
## 权限体系差异
|
|
339
|
-
|
|
340
|
-
| 概念 | ClickZetta | Snowflake |
|
|
341
|
-
|---|---|---|
|
|
342
|
-
| 顶层容器 | WORKSPACE | DATABASE |
|
|
343
|
-
| 权限对象 | VCLUSTER / SCHEMA / TABLE / VIEW | WAREHOUSE / DATABASE / SCHEMA / TABLE |
|
|
344
|
-
| 角色授予 | `GRANT ROLE r TO USER u` | `GRANT ROLE r TO USER u` |
|
|
345
|
-
| 查看权限 | `SHOW GRANTS TO USER u` | `SHOW GRANTS TO USER u` |
|
|
346
|
-
| 系统角色 | instance_admin / workspace_admin / workspace_dev / workspace_analyst | ACCOUNTADMIN / SYSADMIN / USERADMIN |
|