@clickzetta/cz-cli-darwin-x64 0.3.91 → 0.3.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
- package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
- package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
- package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
- package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
- package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
- package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
- package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
- package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
- package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
- package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
- package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
- package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
- package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
- package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
- package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
- package/package.json +1 -1
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
# ClickZetta Lakehouse vs Spark SQL 差异
|
|
2
|
-
|
|
3
|
-
> 来源:产品文档 + Spark Connector 文档
|
|
4
|
-
|
|
5
|
-
## 数据类型映射
|
|
6
|
-
|
|
7
|
-
| ClickZetta | Spark SQL | 说明 |
|
|
8
|
-
|---|---|---|
|
|
9
|
-
| `BOOLEAN` | `BooleanType` | 相同 |
|
|
10
|
-
| `TINYINT` | `ByteType` | 1字节 |
|
|
11
|
-
| `SMALLINT` | `ShortType` | 2字节 |
|
|
12
|
-
| `INT` | `IntegerType` | 4字节 |
|
|
13
|
-
| `BIGINT` | `LongType` | 8字节 |
|
|
14
|
-
| `FLOAT` | `FloatType` | 4字节浮点 |
|
|
15
|
-
| `DOUBLE` | `DoubleType` | 8字节浮点 |
|
|
16
|
-
| `DECIMAL(p,s)` | `DecimalType(p,s)` | 精确数值 |
|
|
17
|
-
| `STRING` / `VARCHAR` | `StringType` | 字符串 |
|
|
18
|
-
| `BINARY` | `BinaryType` | 二进制 |
|
|
19
|
-
| `DATE` | `DateType` | 日期 |
|
|
20
|
-
| `TIMESTAMP` | `TimestampType` | 带时区时间戳 |
|
|
21
|
-
| `TIMESTAMP_NTZ` | `TimestampNTZType` | 无时区时间戳 |
|
|
22
|
-
| `ARRAY<T>` | `ArrayType` | 数组 |
|
|
23
|
-
| `MAP<K,V>` | `MapType` | 键值对 |
|
|
24
|
-
| `STRUCT<f:T>` | `StructType` | 结构体 |
|
|
25
|
-
|
|
26
|
-
---
|
|
27
|
-
|
|
28
|
-
## 建表语法差异
|
|
29
|
-
|
|
30
|
-
### 分区
|
|
31
|
-
|
|
32
|
-
```sql
|
|
33
|
-
-- Spark SQL:PARTITIONED BY
|
|
34
|
-
CREATE TABLE orders (id INT, amount DECIMAL, dt STRING)
|
|
35
|
-
USING PARQUET
|
|
36
|
-
PARTITIONED BY (dt);
|
|
37
|
-
|
|
38
|
-
-- ClickZetta:相同语法,但不需要 USING 子句
|
|
39
|
-
CREATE TABLE orders (id INT, amount DECIMAL, dt STRING)
|
|
40
|
-
PARTITIONED BY (dt);
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
### Bucket(分桶)
|
|
44
|
-
|
|
45
|
-
```sql
|
|
46
|
-
-- Spark SQL
|
|
47
|
-
CREATE TABLE orders (id INT, amount DECIMAL)
|
|
48
|
-
CLUSTERED BY (id) INTO 8 BUCKETS;
|
|
49
|
-
|
|
50
|
-
-- ClickZetta:相同语法
|
|
51
|
-
CREATE TABLE orders (id INT, amount DECIMAL)
|
|
52
|
-
CLUSTERED BY (id) INTO 8 BUCKETS;
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
### 表属性
|
|
56
|
-
|
|
57
|
-
```sql
|
|
58
|
-
-- Spark SQL:TBLPROPERTIES
|
|
59
|
-
CREATE TABLE orders (id INT)
|
|
60
|
-
TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');
|
|
61
|
-
|
|
62
|
-
-- ClickZetta:PROPERTIES
|
|
63
|
-
CREATE TABLE orders (id INT)
|
|
64
|
-
PROPERTIES ('data_lifecycle' = '30'); -- 数据保留天数
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
---
|
|
68
|
-
|
|
69
|
-
## 查询语法差异
|
|
70
|
-
|
|
71
|
-
### LATERAL VIEW(展开数组)
|
|
72
|
-
|
|
73
|
-
```sql
|
|
74
|
-
-- 两者语法相同(ClickZetta 兼容 Hive/Spark 风格)
|
|
75
|
-
SELECT id, skill
|
|
76
|
-
FROM employees
|
|
77
|
-
LATERAL VIEW EXPLODE(skills) t AS skill;
|
|
78
|
-
|
|
79
|
-
-- POSEXPLODE(带位置索引)
|
|
80
|
-
SELECT id, pos, skill
|
|
81
|
-
FROM employees
|
|
82
|
-
LATERAL VIEW POSEXPLODE(skills) t AS pos, skill;
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
### 窗口函数
|
|
86
|
-
|
|
87
|
-
```sql
|
|
88
|
-
-- 两者基本相同
|
|
89
|
-
SELECT id, amount,
|
|
90
|
-
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) AS rn,
|
|
91
|
-
SUM(amount) OVER (PARTITION BY customer_id
|
|
92
|
-
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total
|
|
93
|
-
FROM orders;
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
### CTE(公用表表达式)
|
|
97
|
-
|
|
98
|
-
```sql
|
|
99
|
-
-- 两者语法相同
|
|
100
|
-
WITH
|
|
101
|
-
monthly_sales AS (
|
|
102
|
-
SELECT DATE_TRUNC('month', order_date) AS month, SUM(amount) AS total
|
|
103
|
-
FROM orders GROUP BY 1
|
|
104
|
-
),
|
|
105
|
-
ranked AS (
|
|
106
|
-
SELECT *, RANK() OVER (ORDER BY total DESC) AS rnk FROM monthly_sales
|
|
107
|
-
)
|
|
108
|
-
SELECT * FROM ranked WHERE rnk <= 3;
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
### STRUCT / ARRAY 操作
|
|
112
|
-
|
|
113
|
-
```sql
|
|
114
|
-
-- Spark SQL
|
|
115
|
-
SELECT address.city FROM users; -- STRUCT 字段访问
|
|
116
|
-
SELECT skills[0] FROM employees; -- ARRAY 索引
|
|
117
|
-
SELECT EXPLODE(skills) FROM employees; -- 展开数组
|
|
118
|
-
SELECT TRANSFORM(skills, x -> UPPER(x)) FROM emp; -- 数组变换
|
|
119
|
-
|
|
120
|
-
-- ClickZetta(相同语法)
|
|
121
|
-
SELECT address.city FROM users;
|
|
122
|
-
SELECT skills[0] FROM employees;
|
|
123
|
-
SELECT EXPLODE(skills) FROM employees;
|
|
124
|
-
SELECT TRANSFORM(skills, x -> UPPER(x)) FROM emp;
|
|
125
|
-
```
|
|
126
|
-
|
|
127
|
-
---
|
|
128
|
-
|
|
129
|
-
## 函数差异
|
|
130
|
-
|
|
131
|
-
### 日期函数
|
|
132
|
-
|
|
133
|
-
```sql
|
|
134
|
-
-- 两者基本兼容
|
|
135
|
-
DATE_ADD(date, days)
|
|
136
|
-
DATE_SUB(date, days)
|
|
137
|
-
DATEDIFF(end_date, start_date) -- 注意:ClickZetta 参数顺序与 Snowflake 相反
|
|
138
|
-
DATE_TRUNC('month', date)
|
|
139
|
-
DATE_FORMAT(date, 'yyyy-MM-dd')
|
|
140
|
-
FROM_UNIXTIME(unix_ts)
|
|
141
|
-
UNIX_TIMESTAMP(date_str)
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
### 字符串函数
|
|
145
|
-
|
|
146
|
-
```sql
|
|
147
|
-
-- 两者基本兼容
|
|
148
|
-
CONCAT(s1, s2, ...)
|
|
149
|
-
CONCAT_WS(',', s1, s2, ...)
|
|
150
|
-
SPLIT(str, ',')
|
|
151
|
-
REGEXP_EXTRACT(str, pattern, group)
|
|
152
|
-
REGEXP_REPLACE(str, pattern, replacement)
|
|
153
|
-
INSTR(str, substr)
|
|
154
|
-
SUBSTR(str, pos, len)
|
|
155
|
-
TRIM(str) / LTRIM(str) / RTRIM(str)
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
### 聚合函数
|
|
159
|
-
|
|
160
|
-
```sql
|
|
161
|
-
-- 两者基本兼容
|
|
162
|
-
COUNT(*) / COUNT(DISTINCT col)
|
|
163
|
-
SUM / AVG / MAX / MIN
|
|
164
|
-
COLLECT_LIST(col) -- Spark:返回数组(含重复)
|
|
165
|
-
COLLECT_SET(col) -- Spark:返回去重数组
|
|
166
|
-
ARRAY_AGG(col) -- ClickZetta:等价于 COLLECT_LIST
|
|
167
|
-
```
|
|
168
|
-
|
|
169
|
-
---
|
|
170
|
-
|
|
171
|
-
## ClickZetta 特有功能(Spark 无对应)
|
|
172
|
-
|
|
173
|
-
```sql
|
|
174
|
-
-- 1. VCLUSTER(计算集群管理)
|
|
175
|
-
CREATE VCLUSTER my_vc VCLUSTER_TYPE = ANALYTICS VCLUSTER_SIZE = 4;
|
|
176
|
-
USE VCLUSTER my_vc;
|
|
177
|
-
|
|
178
|
-
-- 2. DYNAMIC TABLE(增量计算)
|
|
179
|
-
CREATE DYNAMIC TABLE sales_summary
|
|
180
|
-
REFRESH INTERVAL 5 MINUTE VCLUSTER default_ap
|
|
181
|
-
AS SELECT customer_id, SUM(amount) FROM orders GROUP BY 1;
|
|
182
|
-
|
|
183
|
-
-- 3. TABLE STREAM(CDC 变更捕获)
|
|
184
|
-
CREATE TABLE STREAM orders_stream ON TABLE orders
|
|
185
|
-
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
|
|
186
|
-
|
|
187
|
-
-- 4. PIPE(持续导入)
|
|
188
|
-
CREATE PIPE my_pipe
|
|
189
|
-
AS COPY INTO orders FROM VOLUME my_volume USING CSV;
|
|
190
|
-
|
|
191
|
-
-- 5. VECTOR 类型(向量检索)
|
|
192
|
-
CREATE TABLE embeddings (id INT, vec VECTOR(FLOAT, 1024));
|
|
193
|
-
SELECT id, cosine_distance(vec, vector(0.1, 0.2, ...)) AS dist
|
|
194
|
-
FROM embeddings ORDER BY dist LIMIT 10;
|
|
195
|
-
|
|
196
|
-
-- 6. Time Travel
|
|
197
|
-
SELECT * FROM orders TIMESTAMP AS OF '2024-01-01 00:00:00';
|
|
198
|
-
RESTORE TABLE orders TO TIMESTAMP AS OF '2024-01-01 00:00:00';
|
|
199
|
-
UNDROP TABLE orders;
|
|
200
|
-
|
|
201
|
-
-- 7. SHARE(跨实例数据共享)
|
|
202
|
-
CREATE SHARE my_share;
|
|
203
|
-
GRANT SELECT, READ METADATA ON TABLE public.orders TO SHARE my_share;
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
---
|
|
207
|
-
|
|
208
|
-
## Spark SQL 特有功能(ClickZetta 无对应或语法不同)
|
|
209
|
-
|
|
210
|
-
```sql
|
|
211
|
-
-- 1. Delta Lake 特有语法(ClickZetta 无对应)
|
|
212
|
-
OPTIMIZE table_name ZORDER BY (col); -- ClickZetta 有 OPTIMIZE 但无 ZORDER
|
|
213
|
-
VACUUM table_name RETAIN 168 HOURS; -- ClickZetta 自动管理,无需手动 VACUUM
|
|
214
|
-
|
|
215
|
-
-- 2. SHOW TABLES EXTENDED(ClickZetta 无对应)
|
|
216
|
-
SHOW TABLES EXTENDED IN schema LIKE 'orders*';
|
|
217
|
-
|
|
218
|
-
-- 3. DESCRIBE HISTORY(Delta)→ ClickZetta 用 DESC HISTORY
|
|
219
|
-
-- Spark/Delta:
|
|
220
|
-
DESCRIBE HISTORY orders;
|
|
221
|
-
-- ClickZetta:
|
|
222
|
-
DESC HISTORY orders;
|
|
223
|
-
|
|
224
|
-
-- 4. 生成列(语法不同)
|
|
225
|
-
-- Spark:
|
|
226
|
-
CREATE TABLE orders (id INT, year INT GENERATED ALWAYS AS (YEAR(order_date)));
|
|
227
|
-
-- ClickZetta(相同语法,也支持):
|
|
228
|
-
CREATE TABLE orders (id INT, year INT GENERATED ALWAYS AS (YEAR(order_date)));
|
|
229
|
-
```
|
|
File without changes
|