@clickzetta/cz-cli-darwin-arm64 0.3.40 → 0.3.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +153 -0
- package/bin/skills/clickzetta-app-python-sdk/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +196 -0
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +143 -0
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +122 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +128 -287
- package/bin/skills/clickzetta-bi-connect/SKILL.md +176 -0
- package/bin/skills/clickzetta-bi-connect/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +170 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +633 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-data-science/SKILL.md +125 -0
- package/bin/skills/clickzetta-data-science/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +146 -0
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +110 -0
- package/bin/skills/clickzetta-data-science/references/setup.md +160 -0
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +195 -0
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +122 -0
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +156 -0
- package/bin/skills/clickzetta-data-sharing/SKILL.md +160 -0
- package/bin/skills/clickzetta-data-sharing/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +134 -0
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +103 -11
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +58 -2
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +4 -4
- package/bin/skills/clickzetta-external-catalog/SKILL.md +123 -0
- package/bin/skills/clickzetta-external-catalog/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +130 -0
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +34 -0
- package/bin/skills/clickzetta-java-sdk/SKILL.md +186 -0
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +163 -0
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +212 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +31 -0
- package/bin/skills/clickzetta-metadata/SKILL.md +28 -30
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +39 -0
- package/bin/skills/clickzetta-pipeline-review/SKILL.md +377 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +323 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-semantic-view/SKILL.md +207 -0
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +167 -0
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +92 -0
- package/bin/skills/clickzetta-spark-flink-connector/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +147 -0
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +132 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +115 -9
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +249 -0
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +279 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +504 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +260 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +382 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +652 -0
- package/bin/skills/clickzetta-table-lineage/SKILL.md +90 -0
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -0
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +14 -0
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +38 -0
- package/bin/skills/clickzetta-table-lineage/references/table_lineage_standalone.html +562 -0
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +25 -0
- package/bin/skills/clickzetta-zettapark/SKILL.md +248 -0
- package/bin/skills/clickzetta-zettapark/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +283 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
- package/bin/skills/clickzetta-ai-vector-search/eval_cases.jsonl +0 -4
- package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
# DDL 完整语法参考
|
|
2
|
+
|
|
3
|
+
> 基于 ClickZetta Lakehouse 产品文档整理,含与 Snowflake / Spark SQL 的差异标注
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## SCHEMA 操作
|
|
8
|
+
|
|
9
|
+
```sql
|
|
10
|
+
-- 创建
|
|
11
|
+
CREATE SCHEMA IF NOT EXISTS my_schema COMMENT '说明';
|
|
12
|
+
|
|
13
|
+
-- 修改
|
|
14
|
+
ALTER SCHEMA my_schema RENAME TO new_schema;
|
|
15
|
+
ALTER SCHEMA my_schema SET COMMENT '新注释';
|
|
16
|
+
|
|
17
|
+
-- 删除(级联删除所有对象)
|
|
18
|
+
DROP SCHEMA IF EXISTS my_schema;
|
|
19
|
+
|
|
20
|
+
-- 查看
|
|
21
|
+
SHOW SCHEMAS;
|
|
22
|
+
SHOW SCHEMAS EXTENDED; -- 含 type 列(MANAGED/EXTERNAL)
|
|
23
|
+
SHOW SCHEMAS LIKE 'sales%';
|
|
24
|
+
SHOW SCHEMAS WHERE schema_name = 'public';
|
|
25
|
+
|
|
26
|
+
-- 切换
|
|
27
|
+
USE SCHEMA my_schema;
|
|
28
|
+
USE my_schema; -- SCHEMA 关键字可省略
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**与 Snowflake 差异:**
|
|
32
|
+
- Snowflake 用 `USE DATABASE` + `USE SCHEMA`;ClickZetta 无 DATABASE 层,直接 `USE SCHEMA`
|
|
33
|
+
- Snowflake 支持 `CREATE OR REPLACE SCHEMA`;ClickZetta 不支持,用 `IF NOT EXISTS`
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## TABLE 操作
|
|
38
|
+
|
|
39
|
+
### CREATE TABLE
|
|
40
|
+
|
|
41
|
+
```sql
|
|
42
|
+
-- 基本建表
|
|
43
|
+
CREATE TABLE IF NOT EXISTS orders (
|
|
44
|
+
id BIGINT,
|
|
45
|
+
customer_id INT,
|
|
46
|
+
amount DECIMAL(18, 2) NOT NULL,
|
|
47
|
+
status STRING DEFAULT 'pending',
|
|
48
|
+
created_at TIMESTAMP,
|
|
49
|
+
tags ARRAY<STRING>,
|
|
50
|
+
meta JSON,
|
|
51
|
+
COMMENT '订单表'
|
|
52
|
+
);
|
|
53
|
+
|
|
54
|
+
-- 主键表(ENABLE VALIDATE RELY:SQL写入也去重)
|
|
55
|
+
CREATE TABLE pk_orders (
|
|
56
|
+
id BIGINT PRIMARY KEY,
|
|
57
|
+
amount DECIMAL(18, 2)
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
-- 主键表(DISABLE NOVALIDATE RELY:仅实时写入去重,SQL写入不去重)
|
|
61
|
+
CREATE TABLE cdc_orders (
|
|
62
|
+
id BIGINT PRIMARY KEY DISABLE NOVALIDATE RELY,
|
|
63
|
+
amount DECIMAL(18, 2)
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
-- 自增列(仅 BIGINT,不保证连续)
|
|
67
|
+
CREATE TABLE auto_id_table (
|
|
68
|
+
id BIGINT IDENTITY(1), -- 从1开始
|
|
69
|
+
col STRING
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
-- 生成列(确定性表达式,不可手动插入)
|
|
73
|
+
CREATE TABLE orders_with_year (
|
|
74
|
+
id BIGINT,
|
|
75
|
+
created_at TIMESTAMP,
|
|
76
|
+
year INT GENERATED ALWAYS AS (YEAR(created_at))
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
-- 默认值(支持非确定性函数)
|
|
80
|
+
CREATE TABLE t_default (
|
|
81
|
+
id INT,
|
|
82
|
+
created_at TIMESTAMP DEFAULT current_timestamp(),
|
|
83
|
+
status STRING DEFAULT 'active',
|
|
84
|
+
score DOUBLE DEFAULT random()
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
-- 分区表(Iceberg 隐藏分区)
|
|
88
|
+
CREATE TABLE orders_partitioned (
|
|
89
|
+
id BIGINT,
|
|
90
|
+
amount DECIMAL(18, 2),
|
|
91
|
+
created_at TIMESTAMP
|
|
92
|
+
)
|
|
93
|
+
PARTITIONED BY (days(created_at)); -- 按天分区
|
|
94
|
+
|
|
95
|
+
-- 分区转换函数
|
|
96
|
+
-- years(col) months(col) days(col) hours(col)
|
|
97
|
+
-- bucket(N, col) truncate(col, W)
|
|
98
|
+
|
|
99
|
+
-- 分桶表
|
|
100
|
+
CREATE TABLE orders_bucketed (
|
|
101
|
+
id BIGINT,
|
|
102
|
+
customer_id INT,
|
|
103
|
+
amount DECIMAL(18, 2)
|
|
104
|
+
)
|
|
105
|
+
CLUSTERED BY (customer_id)
|
|
106
|
+
SORTED BY (id ASC)
|
|
107
|
+
INTO 16 BUCKETS;
|
|
108
|
+
|
|
109
|
+
-- 数据保留周期
|
|
110
|
+
CREATE TABLE orders (id BIGINT)
|
|
111
|
+
PROPERTIES ('data_lifecycle' = '30'); -- 保留30天
|
|
112
|
+
|
|
113
|
+
-- CTAS(从查询建表)
|
|
114
|
+
CREATE TABLE orders_copy AS
|
|
115
|
+
SELECT * FROM orders WHERE status = 'completed';
|
|
116
|
+
|
|
117
|
+
-- 外部表(映射对象存储)
|
|
118
|
+
CREATE EXTERNAL TABLE ext_orders (
|
|
119
|
+
id BIGINT,
|
|
120
|
+
amount DECIMAL(18, 2)
|
|
121
|
+
)
|
|
122
|
+
LOCATION 'oss://bucket/orders/'
|
|
123
|
+
STORED AS PARQUET;
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**与 Snowflake 差异:**
|
|
127
|
+
- Snowflake `CREATE OR REPLACE TABLE` → ClickZetta `CREATE TABLE IF NOT EXISTS`
|
|
128
|
+
- Snowflake `CLUSTER BY (col)` → ClickZetta `CLUSTERED BY (col) INTO N BUCKETS`
|
|
129
|
+
- Snowflake `AUTOINCREMENT` → ClickZetta `IDENTITY[(seed)]`
|
|
130
|
+
- Snowflake `TRANSIENT TABLE` → ClickZetta 无对应(用 `data_lifecycle` 控制保留期)
|
|
131
|
+
- Snowflake `TEMPORARY TABLE` → ClickZetta 无临时表概念
|
|
132
|
+
- Snowflake `COPY GRANTS` → ClickZetta 不支持
|
|
133
|
+
|
|
134
|
+
**与 Spark SQL 差异:**
|
|
135
|
+
- Spark `USING PARQUET` → ClickZetta 不需要(默认 Parquet)
|
|
136
|
+
- Spark `TBLPROPERTIES` → ClickZetta `PROPERTIES`
|
|
137
|
+
- Spark `LOCATION` 外部表语法基本相同
|
|
138
|
+
|
|
139
|
+
### ALTER TABLE
|
|
140
|
+
|
|
141
|
+
```sql
|
|
142
|
+
-- 重命名
|
|
143
|
+
ALTER TABLE orders RENAME TO orders_v2;
|
|
144
|
+
|
|
145
|
+
-- 注释
|
|
146
|
+
ALTER TABLE orders SET COMMENT '新注释';
|
|
147
|
+
|
|
148
|
+
-- 数据保留周期
|
|
149
|
+
ALTER TABLE orders SET PROPERTIES ('data_retention_days' = '7');
|
|
150
|
+
|
|
151
|
+
-- 添加列
|
|
152
|
+
ALTER TABLE orders ADD COLUMN region STRING AFTER status;
|
|
153
|
+
ALTER TABLE orders ADD COLUMN region STRING FIRST;
|
|
154
|
+
|
|
155
|
+
-- 添加复杂类型嵌套字段
|
|
156
|
+
ALTER TABLE t ADD COLUMN address.zip STRING; -- STRUCT 嵌套
|
|
157
|
+
ALTER TABLE t ADD COLUMN items.ELEMENT.price DOUBLE; -- ARRAY<STRUCT> 嵌套
|
|
158
|
+
|
|
159
|
+
-- 修改列类型(有限制)
|
|
160
|
+
ALTER TABLE orders ALTER COLUMN amount TYPE DOUBLE;
|
|
161
|
+
|
|
162
|
+
-- 重命名列
|
|
163
|
+
ALTER TABLE orders RENAME COLUMN old_col TO new_col;
|
|
164
|
+
|
|
165
|
+
-- 删除列
|
|
166
|
+
ALTER TABLE orders DROP COLUMN unnecessary_col;
|
|
167
|
+
|
|
168
|
+
-- 修改列注释
|
|
169
|
+
ALTER TABLE orders ALTER COLUMN amount COMMENT '订单金额';
|
|
170
|
+
|
|
171
|
+
-- 添加索引(含 ARRAY/JSON 列的表必须单独添加)
|
|
172
|
+
-- ⚠️ 索引语法:BLOOMFILTER(不是 USING BLOOM_FILTER)
|
|
173
|
+
CREATE BLOOMFILTER INDEX IF NOT EXISTS id_bf ON TABLE orders(id);
|
|
174
|
+
CREATE BLOOMFILTER INDEX IF NOT EXISTS name_bf ON TABLE orders(name)
|
|
175
|
+
PROPERTIES ('analyzer' = 'ngram', 'n' = '3'); -- ngram 分词
|
|
176
|
+
|
|
177
|
+
-- 倒排索引
|
|
178
|
+
CREATE INVERTED INDEX IF NOT EXISTS content_inv ON TABLE articles(content);
|
|
179
|
+
|
|
180
|
+
-- 向量索引(建表时内联)
|
|
181
|
+
-- 见 CREATE TABLE 示例
|
|
182
|
+
|
|
183
|
+
-- 删除索引(⚠️ 不需要 ON table_name)
|
|
184
|
+
DROP INDEX IF EXISTS id_bf;
|
|
185
|
+
DROP INDEX id_bf;
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**与 Snowflake 差异:**
|
|
189
|
+
- Snowflake `ALTER TABLE ... ADD COLUMN` 只能加到末尾;ClickZetta 支持 `FIRST/AFTER/BEFORE`
|
|
190
|
+
- Snowflake 不支持 `DROP COLUMN`(需重建表);ClickZetta 支持
|
|
191
|
+
- Snowflake 无 BLOOM_FILTER/INVERTED/VECTOR 索引
|
|
192
|
+
|
|
193
|
+
### DROP / TRUNCATE TABLE
|
|
194
|
+
|
|
195
|
+
```sql
|
|
196
|
+
-- 删除表(可 UNDROP 恢复)
|
|
197
|
+
DROP TABLE IF EXISTS orders;
|
|
198
|
+
DROP TABLE my_schema.orders;
|
|
199
|
+
|
|
200
|
+
-- 清空表(保留结构)
|
|
201
|
+
TRUNCATE TABLE orders;
|
|
202
|
+
TRUNCATE TABLE IF EXISTS orders; -- ✅ 支持 IF EXISTS
|
|
203
|
+
|
|
204
|
+
-- 清空指定分区
|
|
205
|
+
TRUNCATE TABLE orders PARTITION (dt = '2024-01-01');
|
|
206
|
+
TRUNCATE TABLE orders PARTITION (dt > '2024-01-01');
|
|
207
|
+
TRUNCATE TABLE orders PARTITION (dt >= '2024-01-01' AND dt < '2024-02-01');
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
**与 Snowflake 差异:**
|
|
211
|
+
- Snowflake `TRUNCATE TABLE` 不支持分区条件;ClickZetta 支持
|
|
212
|
+
- Snowflake `DROP TABLE ... PURGE` 立即删除;ClickZetta 删除后在保留期内可 UNDROP
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## VIEW 操作
|
|
217
|
+
|
|
218
|
+
```sql
|
|
219
|
+
-- 创建视图
|
|
220
|
+
CREATE VIEW IF NOT EXISTS order_summary AS
|
|
221
|
+
SELECT customer_id, COUNT(*) AS cnt, SUM(amount) AS total
|
|
222
|
+
FROM orders GROUP BY customer_id;
|
|
223
|
+
|
|
224
|
+
-- 替换视图(ClickZetta 支持 OR REPLACE,与 Snowflake 相同)
|
|
225
|
+
CREATE OR REPLACE VIEW order_summary AS
|
|
226
|
+
SELECT customer_id, SUM(amount) AS total FROM orders GROUP BY customer_id;
|
|
227
|
+
|
|
228
|
+
-- 带列别名和注释
|
|
229
|
+
CREATE VIEW order_summary (cust_id COMMENT '客户ID', total COMMENT '总金额')
|
|
230
|
+
COMMENT '订单汇总视图'
|
|
231
|
+
AS SELECT customer_id, SUM(amount) FROM orders GROUP BY 1;
|
|
232
|
+
|
|
233
|
+
-- 删除
|
|
234
|
+
DROP VIEW IF EXISTS order_summary;
|
|
235
|
+
|
|
236
|
+
-- 查看
|
|
237
|
+
SHOW TABLES WHERE is_view = true;
|
|
238
|
+
SHOW TABLES IN my_schema WHERE is_view = true;
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
**注意:** ClickZetta 的 `CREATE OR REPLACE VIEW` 与 Snowflake 相同,但 `CREATE OR REPLACE TABLE` 不支持。
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## INDEX 操作
|
|
246
|
+
|
|
247
|
+
```sql
|
|
248
|
+
-- 查看索引
|
|
249
|
+
SHOW INDEX FROM table_name;
|
|
250
|
+
SHOW INDEX FROM my_schema.table_name;
|
|
251
|
+
|
|
252
|
+
-- 查看索引详情
|
|
253
|
+
DESC INDEX index_name;
|
|
254
|
+
DESC INDEX EXTENDED index_name;
|
|
255
|
+
|
|
256
|
+
-- 构建存量数据索引(仅向量索引和倒排索引,不支持 Bloom Filter)
|
|
257
|
+
BUILD INDEX index_name ON table_name;
|
|
258
|
+
BUILD INDEX index_name ON table_name WHERE partition_col = '2024-01-01';
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## 查看对象信息
|
|
264
|
+
|
|
265
|
+
```sql
|
|
266
|
+
-- 表结构
|
|
267
|
+
DESC table_name;
|
|
268
|
+
DESC EXTENDED table_name; -- 含大小、记录数等扩展信息
|
|
269
|
+
DESCRIBE TABLE table_name; -- 同 DESC
|
|
270
|
+
|
|
271
|
+
-- 列信息
|
|
272
|
+
SHOW COLUMNS IN table_name;
|
|
273
|
+
SHOW COLUMNS FROM table_name IN schema_name;
|
|
274
|
+
|
|
275
|
+
-- 建表语句
|
|
276
|
+
SHOW CREATE TABLE table_name;
|
|
277
|
+
|
|
278
|
+
-- 表列表
|
|
279
|
+
SHOW TABLES;
|
|
280
|
+
SHOW TABLES IN my_schema;
|
|
281
|
+
SHOW TABLES LIKE 'order%';
|
|
282
|
+
SHOW TABLES WHERE is_view = false AND is_materialized_view = false;
|
|
283
|
+
SHOW TABLES WHERE is_dynamic = true;
|
|
284
|
+
SHOW TABLES WHERE is_external = true;
|
|
285
|
+
|
|
286
|
+
-- 分区信息
|
|
287
|
+
SHOW PARTITIONS table_name;
|
|
288
|
+
SHOW PARTITIONS EXTENDED table_name; -- 含文件数、大小、修改时间
|
|
289
|
+
SHOW PARTITIONS table_name PARTITION (dt = '2024-01-01');
|
|
290
|
+
SHOW PARTITIONS table_name WHERE total_rows > 1000;
|
|
291
|
+
|
|
292
|
+
-- 历史版本
|
|
293
|
+
DESC HISTORY table_name;
|
|
294
|
+
SHOW TABLES HISTORY; -- 含已删除的表
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## SYNONYM(同义词)操作
|
|
300
|
+
|
|
301
|
+
```sql
|
|
302
|
+
-- 为表创建同义词(跨 Schema 访问)
|
|
303
|
+
CREATE SYNONYM my_orders FOR TABLE other_schema.orders;
|
|
304
|
+
|
|
305
|
+
-- 为 Volume 创建同义词
|
|
306
|
+
CREATE SYNONYM my_vol FOR VOLUME other_schema.data_volume;
|
|
307
|
+
|
|
308
|
+
-- 为函数创建同义词
|
|
309
|
+
CREATE SYNONYM my_func FOR FUNCTION other_schema.udf_name;
|
|
310
|
+
|
|
311
|
+
-- 查看同义词
|
|
312
|
+
SHOW SYNONYMS;
|
|
313
|
+
SHOW SYNONYMS IN my_schema;
|
|
314
|
+
SHOW SYNONYMS LIKE 'my_%';
|
|
315
|
+
|
|
316
|
+
-- 删除同义词(需指定对象类型)
|
|
317
|
+
DROP SYNONYM my_orders FOR TABLE;
|
|
318
|
+
DROP SYNONYM my_vol FOR VOLUME;
|
|
319
|
+
DROP SYNONYM my_func FOR FUNCTION;
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
> 同义词支持的对象类型:TABLE(含普通表、Table Stream、物化视图、动态表)、VOLUME、FUNCTION。
|
|
323
|
+
> 使用场景:跨 Schema 访问、数据一致性维护、应用层解耦。
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
## Time Travel & 数据恢复
|
|
328
|
+
|
|
329
|
+
```sql
|
|
330
|
+
-- 查询历史版本
|
|
331
|
+
SELECT * FROM orders TIMESTAMP AS OF '2024-01-01 00:00:00';
|
|
332
|
+
SELECT * FROM orders TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL 12 HOURS;
|
|
333
|
+
SELECT * FROM orders TIMESTAMP AS OF CAST('2024-01-01' AS TIMESTAMP);
|
|
334
|
+
|
|
335
|
+
-- 恢复表到历史版本(表未删除)
|
|
336
|
+
RESTORE TABLE orders TO TIMESTAMP AS OF '2024-01-01 00:00:00';
|
|
337
|
+
|
|
338
|
+
-- 恢复已删除的表
|
|
339
|
+
UNDROP TABLE orders;
|
|
340
|
+
UNDROP TABLE my_schema.orders;
|
|
341
|
+
|
|
342
|
+
-- 设置保留周期(0-90天,默认1天)
|
|
343
|
+
ALTER TABLE orders SET PROPERTIES ('data_retention_days' = '7');
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
**与 Snowflake 差异:**
|
|
347
|
+
- Snowflake `AT (TIMESTAMP => ...)` → ClickZetta `TIMESTAMP AS OF ...`
|
|
348
|
+
- Snowflake `BEFORE (STATEMENT => ...)` → ClickZetta 不支持按 statement_id 回溯
|
|
349
|
+
- Snowflake `UNDROP TABLE` → ClickZetta 相同
|
|
350
|
+
- Snowflake 默认保留 1 天(Enterprise 90 天);ClickZetta 默认 1 天,最长 90 天
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# DML 完整语法参考
|
|
2
|
+
|
|
3
|
+
> 基于 ClickZetta Lakehouse 产品文档整理,含与 Snowflake / Spark SQL 的差异标注
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## ⚠️ 隐式类型转换规则(INSERT / UPDATE 通用)
|
|
8
|
+
|
|
9
|
+
**ClickZetta 对写入操作(INSERT/UPDATE)严格禁止隐式类型转换,必须显式 CAST。**
|
|
10
|
+
但 SELECT/WHERE/表达式中允许隐式转换。
|
|
11
|
+
|
|
12
|
+
### 完整规则表(已验证)
|
|
13
|
+
|
|
14
|
+
| 目标列类型 | 写入值 | INSERT/UPDATE | WHERE/SELECT |
|
|
15
|
+
|---|---|---|---|
|
|
16
|
+
| `DATE` | `'2024-01-15'`(字符串) | ❌ 报错 | ✅ 允许 |
|
|
17
|
+
| `TIMESTAMP` | `'2024-01-15 12:00:00'`(字符串) | ❌ 报错 | ✅ 允许 |
|
|
18
|
+
| `BOOLEAN` | `'true'` / `'false'`(字符串) | ❌ 报错 | ✅ 允许 |
|
|
19
|
+
| `BOOLEAN` | `1` / `0`(整数) | ❌ 报错 | ✅ 允许 |
|
|
20
|
+
| `JSON` | `'{"k":1}'`(字符串) | ❌ 报错 | ✅ 允许 |
|
|
21
|
+
| `INT` / `BIGINT` | `'123'`(字符串) | ❌ 报错 | ✅ 允许 |
|
|
22
|
+
| `BIGINT` | `100`(INT) | ✅ 允许 | ✅ 允许 |
|
|
23
|
+
| `DOUBLE` | `1.5`(FLOAT) | ✅ 允许 | ✅ 允许 |
|
|
24
|
+
| `BIGINT` | `1.5`(FLOAT) | ✅ 允许(截断) | ✅ 允许 |
|
|
25
|
+
|
|
26
|
+
### 各类型正确写法
|
|
27
|
+
|
|
28
|
+
```sql
|
|
29
|
+
-- DATE(以下写法等价)
|
|
30
|
+
INSERT INTO t VALUES (CAST('2024-01-15' AS DATE));
|
|
31
|
+
INSERT INTO t VALUES (DATE '2024-01-15');
|
|
32
|
+
INSERT INTO t VALUES (TO_DATE('2024-01-15'));
|
|
33
|
+
INSERT INTO t VALUES (DATE('2024-01-15')); -- 函数形式,也支持
|
|
34
|
+
|
|
35
|
+
-- TIMESTAMP(以下写法等价)
|
|
36
|
+
INSERT INTO t VALUES (CAST('2024-01-15 12:00:00' AS TIMESTAMP));
|
|
37
|
+
INSERT INTO t VALUES (TIMESTAMP '2024-01-15 12:00:00');
|
|
38
|
+
INSERT INTO t VALUES (TO_TIMESTAMP('2024-01-15 12:00:00'));
|
|
39
|
+
INSERT INTO t VALUES (TIMESTAMP('2024-01-15 12:00:00')); -- 函数形式,也支持
|
|
40
|
+
INSERT INTO t VALUES (CURRENT_TIMESTAMP());
|
|
41
|
+
INSERT INTO t VALUES (CURRENT_DATE() - INTERVAL 7 DAY);
|
|
42
|
+
|
|
43
|
+
-- BOOLEAN(只接受 TRUE/FALSE 字面量或 CAST)
|
|
44
|
+
INSERT INTO t VALUES (TRUE);
|
|
45
|
+
INSERT INTO t VALUES (FALSE);
|
|
46
|
+
INSERT INTO t VALUES (CAST(1 AS BOOLEAN));
|
|
47
|
+
INSERT INTO t VALUES (CAST('true' AS BOOLEAN));
|
|
48
|
+
|
|
49
|
+
-- JSON(必须用 PARSE_JSON 或 CAST)
|
|
50
|
+
INSERT INTO t VALUES (PARSE_JSON('{"key":"value"}'));
|
|
51
|
+
INSERT INTO t VALUES (CAST('{"key":"value"}' AS JSON));
|
|
52
|
+
|
|
53
|
+
-- INT/BIGINT(字符串必须 CAST)
|
|
54
|
+
INSERT INTO t VALUES (CAST('123' AS INT));
|
|
55
|
+
INSERT INTO t VALUES (CAST('456' AS BIGINT));
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### UPDATE 同样适用
|
|
59
|
+
|
|
60
|
+
```sql
|
|
61
|
+
-- ❌ UPDATE 也不允许字符串隐式转换
|
|
62
|
+
UPDATE orders SET dt = '2024-06-01' WHERE id = 1; -- 报错
|
|
63
|
+
UPDATE orders SET flag = 0 WHERE id = 1; -- 报错
|
|
64
|
+
|
|
65
|
+
-- ✅ 必须显式转换
|
|
66
|
+
UPDATE orders SET dt = CAST('2024-06-01' AS DATE) WHERE id = 1;
|
|
67
|
+
UPDATE orders SET flag = CAST(0 AS BOOLEAN) WHERE id = 1;
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### WHERE 中字符串可以隐式比较
|
|
71
|
+
|
|
72
|
+
```sql
|
|
73
|
+
-- ✅ WHERE 中允许字符串与日期/数字比较
|
|
74
|
+
SELECT * FROM orders WHERE dt = '2024-01-15';
|
|
75
|
+
SELECT * FROM orders WHERE dt >= '2024-01-01' AND dt < '2025-01-01';
|
|
76
|
+
SELECT * FROM orders WHERE id = '123';
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**与 Snowflake / Spark 差异:**
|
|
80
|
+
- Snowflake / Spark:INSERT/UPDATE 时字符串可隐式转为日期/布尔/数字类型
|
|
81
|
+
- ClickZetta:写入时**必须显式转换**,查询时可隐式比较
|
|
82
|
+
|
|
83
|
+
> **同样适用于 RESTORE TABLE**:`RESTORE TABLE t TO TIMESTAMP AS OF '2024-01-15'` 会报错,必须用 `CAST('2024-01-15 10:00:00' AS TIMESTAMP)` 或完整毫秒时间戳字符串。
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## INSERT
|
|
88
|
+
|
|
89
|
+
```sql
|
|
90
|
+
-- 追加(单行)
|
|
91
|
+
INSERT INTO orders VALUES (1, 101, 100.0, 'pending');
|
|
92
|
+
INSERT INTO orders (id, customer_id, amount) VALUES (1, 101, 100.0);
|
|
93
|
+
|
|
94
|
+
-- 追加(多行)
|
|
95
|
+
INSERT INTO orders VALUES
|
|
96
|
+
(1, 101, 100.0, 'pending'),
|
|
97
|
+
(2, 102, 200.0, 'completed');
|
|
98
|
+
|
|
99
|
+
-- 从查询追加
|
|
100
|
+
INSERT INTO orders SELECT * FROM staging_orders WHERE status = 'new';
|
|
101
|
+
|
|
102
|
+
-- 覆盖整表
|
|
103
|
+
INSERT OVERWRITE TABLE orders SELECT * FROM new_orders;
|
|
104
|
+
|
|
105
|
+
-- 覆盖指定分区(静态分区)
|
|
106
|
+
INSERT OVERWRITE TABLE orders PARTITION (dt = '2024-01-01')
|
|
107
|
+
SELECT id, amount FROM staging WHERE dt = '2024-01-01';
|
|
108
|
+
|
|
109
|
+
-- 动态分区(自动根据数据值分区)
|
|
110
|
+
INSERT INTO orders PARTITION (dt)
|
|
111
|
+
SELECT id, amount, dt FROM staging;
|
|
112
|
+
|
|
113
|
+
-- 不推荐大量数据用 VALUES,适合测试
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**与 Snowflake 差异:**
|
|
117
|
+
- Snowflake 无 `INSERT OVERWRITE`;用 `TRUNCATE` + `INSERT` 或 `MERGE` 替代
|
|
118
|
+
- Snowflake 无 `PARTITION` 子句(Snowflake 用 CLUSTER BY 自动管理)
|
|
119
|
+
- ClickZetta 支持 Hive 风格动态分区
|
|
120
|
+
|
|
121
|
+
**与 Spark SQL 差异:**
|
|
122
|
+
- 语法基本相同,ClickZetta 完全兼容 Spark INSERT 语法
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## UPDATE
|
|
127
|
+
|
|
128
|
+
```sql
|
|
129
|
+
-- 基本更新
|
|
130
|
+
UPDATE orders SET status = 'cancelled' WHERE id = 123;
|
|
131
|
+
|
|
132
|
+
-- 多列更新
|
|
133
|
+
UPDATE orders
|
|
134
|
+
SET status = 'completed', updated_at = current_timestamp()
|
|
135
|
+
WHERE id = 123;
|
|
136
|
+
|
|
137
|
+
-- 子查询更新
|
|
138
|
+
UPDATE orders
|
|
139
|
+
SET amount = amount * 1.1
|
|
140
|
+
WHERE customer_id IN (
|
|
141
|
+
SELECT id FROM customers WHERE tier = 'VIP'
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
-- 带 ORDER BY + LIMIT(分批更新)
|
|
145
|
+
UPDATE orders
|
|
146
|
+
SET status = 'archived'
|
|
147
|
+
WHERE created_at < '2020-01-01'
|
|
148
|
+
ORDER BY created_at ASC
|
|
149
|
+
LIMIT 10000;
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**与 Snowflake 差异:**
|
|
153
|
+
- Snowflake `UPDATE ... FROM` 语法(JOIN 更新)→ ClickZetta 用子查询替代
|
|
154
|
+
- ClickZetta 额外支持 `ORDER BY + LIMIT`(Snowflake 不支持)
|
|
155
|
+
|
|
156
|
+
**与 Spark SQL 差异:**
|
|
157
|
+
- Spark SQL 不支持 `UPDATE`(Delta Lake 支持);ClickZetta 原生支持
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## DELETE
|
|
162
|
+
|
|
163
|
+
```sql
|
|
164
|
+
-- 基本删除
|
|
165
|
+
DELETE FROM orders WHERE id = 123;
|
|
166
|
+
|
|
167
|
+
-- 条件删除
|
|
168
|
+
DELETE FROM orders WHERE created_at < '2020-01-01';
|
|
169
|
+
|
|
170
|
+
-- 子查询删除
|
|
171
|
+
DELETE FROM orders
|
|
172
|
+
WHERE order_id IN (
|
|
173
|
+
SELECT order_id FROM order_details WHERE status = 'cancelled'
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
-- 删除所有行(等价于 TRUNCATE,但会记录版本)
|
|
177
|
+
DELETE FROM orders WHERE 1 = 1;
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**与 Snowflake 差异:**
|
|
181
|
+
- 语法基本相同
|
|
182
|
+
|
|
183
|
+
**与 Spark SQL 差异:**
|
|
184
|
+
- Spark SQL 不支持 `DELETE`(Delta Lake 支持);ClickZetta 原生支持
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## MERGE INTO(UPSERT)
|
|
189
|
+
|
|
190
|
+
```sql
|
|
191
|
+
-- 标准 MERGE(⚠️ 多个 WHEN MATCHED 时,UPDATE 必须在 DELETE 之前)
|
|
192
|
+
MERGE INTO target t
|
|
193
|
+
USING source s ON t.id = s.id
|
|
194
|
+
WHEN MATCHED AND s.is_deleted = 0 THEN UPDATE SET -- UPDATE 在前
|
|
195
|
+
t.amount = s.amount,
|
|
196
|
+
t.status = s.status,
|
|
197
|
+
t.updated_at = current_timestamp()
|
|
198
|
+
WHEN MATCHED AND s.is_deleted = 1 THEN DELETE -- DELETE 在后
|
|
199
|
+
WHEN NOT MATCHED THEN INSERT (id, amount, status, created_at)
|
|
200
|
+
VALUES (s.id, s.amount, s.status, current_timestamp());
|
|
201
|
+
|
|
202
|
+
-- 多个 WHEN MATCHED(UPDATE 必须在 DELETE 前)
|
|
203
|
+
MERGE INTO target t
|
|
204
|
+
USING source s ON t.id = s.id
|
|
205
|
+
WHEN MATCHED AND s.action = 'update' THEN UPDATE SET t.amount = s.amount
|
|
206
|
+
WHEN MATCHED AND s.action = 'delete' THEN DELETE
|
|
207
|
+
WHEN NOT MATCHED THEN INSERT VALUES (s.id, s.amount);
|
|
208
|
+
|
|
209
|
+
-- 从子查询 MERGE
|
|
210
|
+
MERGE INTO orders t
|
|
211
|
+
USING (
|
|
212
|
+
SELECT id, SUM(amount) AS total FROM line_items GROUP BY id
|
|
213
|
+
) s ON t.id = s.id
|
|
214
|
+
WHEN MATCHED THEN UPDATE SET t.total = s.total
|
|
215
|
+
WHEN NOT MATCHED THEN INSERT (id, total) VALUES (s.id, s.total);
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
**⚠️ ClickZetta MERGE 限制:**
|
|
219
|
+
1. `WHEN NOT MATCHED` 只能有**一个**(Snowflake 支持多个)
|
|
220
|
+
2. 多个 `WHEN MATCHED` 时,`UPDATE` 必须在 `DELETE` 之前
|
|
221
|
+
3. 一个源行不能匹配多个目标行(否则报错)
|
|
222
|
+
|
|
223
|
+
**与 Snowflake 差异:**
|
|
224
|
+
- Snowflake 支持多个 `WHEN NOT MATCHED`;ClickZetta 只支持一个
|
|
225
|
+
- Snowflake `MERGE ... WHEN NOT MATCHED BY SOURCE THEN DELETE`;ClickZetta 不支持
|
|
226
|
+
- 语法结构基本相同
|
|
227
|
+
|
|
228
|
+
**与 Spark SQL 差异:**
|
|
229
|
+
- Spark SQL(Delta Lake)支持 `WHEN NOT MATCHED BY SOURCE`;ClickZetta 不支持
|
|
230
|
+
- 语法结构基本相同
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## COPY INTO(批量导入/导出)
|
|
235
|
+
|
|
236
|
+
```sql
|
|
237
|
+
-- 从 Volume 导入
|
|
238
|
+
COPY INTO orders
|
|
239
|
+
FROM VOLUME my_oss_volume
|
|
240
|
+
USING CSV
|
|
241
|
+
OPTIONS('header' = 'true', 'sep' = ',')
|
|
242
|
+
SUBDIRECTORY 'data/2024/';
|
|
243
|
+
|
|
244
|
+
-- 从 Volume 导入(Parquet)
|
|
245
|
+
COPY INTO orders
|
|
246
|
+
FROM VOLUME my_oss_volume
|
|
247
|
+
USING PARQUET
|
|
248
|
+
FILES('part-00001.parquet', 'part-00002.parquet');
|
|
249
|
+
|
|
250
|
+
-- 正则匹配文件
|
|
251
|
+
COPY INTO orders
|
|
252
|
+
FROM VOLUME my_oss_volume
|
|
253
|
+
USING PARQUET
|
|
254
|
+
REGEXP '.*2024-0[1-6].parquet';
|
|
255
|
+
|
|
256
|
+
-- 覆盖导入
|
|
257
|
+
COPY OVERWRITE INTO orders
|
|
258
|
+
FROM VOLUME my_oss_volume
|
|
259
|
+
USING CSV OPTIONS('header' = 'true');
|
|
260
|
+
|
|
261
|
+
-- 导出到 Volume
|
|
262
|
+
COPY INTO VOLUME my_oss_volume
|
|
263
|
+
SUBDIRECTORY 'export/orders/'
|
|
264
|
+
FROM orders
|
|
265
|
+
USING PARQUET;
|
|
266
|
+
|
|
267
|
+
-- 导出查询结果
|
|
268
|
+
COPY INTO VOLUME my_oss_volume
|
|
269
|
+
SUBDIRECTORY 'export/2024/'
|
|
270
|
+
FROM (SELECT * FROM orders WHERE YEAR(created_at) = 2024)
|
|
271
|
+
USING CSV OPTIONS('header' = 'true');
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
**与 Snowflake 差异:**
|
|
275
|
+
- Snowflake `COPY INTO t FROM @stage/path/file.csv` → ClickZetta `COPY INTO t FROM VOLUME v USING CSV`
|
|
276
|
+
- Snowflake Stage 用 `@` 前缀;ClickZetta Volume 用对象名
|
|
277
|
+
- Snowflake `COPY INTO @stage FROM t` → ClickZetta `COPY INTO VOLUME v FROM t`
|
|
278
|
+
- Snowflake 支持 `PATTERN = '.*\.csv'`;ClickZetta 用 `REGEXP`
|
|
279
|
+
- Snowflake `FILE_FORMAT = (TYPE = CSV)` → ClickZetta `USING CSV OPTIONS(...)`
|