@clickzetta/cz-cli-darwin-arm64 0.3.40 → 0.3.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +153 -0
- package/bin/skills/clickzetta-app-python-sdk/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +196 -0
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +143 -0
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +122 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +128 -287
- package/bin/skills/clickzetta-bi-connect/SKILL.md +176 -0
- package/bin/skills/clickzetta-bi-connect/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +170 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +633 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-data-science/SKILL.md +125 -0
- package/bin/skills/clickzetta-data-science/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +146 -0
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +110 -0
- package/bin/skills/clickzetta-data-science/references/setup.md +160 -0
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +195 -0
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +122 -0
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +156 -0
- package/bin/skills/clickzetta-data-sharing/SKILL.md +160 -0
- package/bin/skills/clickzetta-data-sharing/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +134 -0
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +103 -11
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +58 -2
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +4 -4
- package/bin/skills/clickzetta-external-catalog/SKILL.md +123 -0
- package/bin/skills/clickzetta-external-catalog/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +130 -0
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +34 -0
- package/bin/skills/clickzetta-java-sdk/SKILL.md +186 -0
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +163 -0
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +212 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +31 -0
- package/bin/skills/clickzetta-metadata/SKILL.md +28 -30
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +39 -0
- package/bin/skills/clickzetta-pipeline-review/SKILL.md +377 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +323 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-semantic-view/SKILL.md +207 -0
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +167 -0
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +92 -0
- package/bin/skills/clickzetta-spark-flink-connector/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +147 -0
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +132 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +115 -9
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +249 -0
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +279 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +504 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +260 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +382 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +652 -0
- package/bin/skills/clickzetta-table-lineage/SKILL.md +90 -0
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -0
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +14 -0
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +38 -0
- package/bin/skills/clickzetta-table-lineage/references/table_lineage_standalone.html +562 -0
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +25 -0
- package/bin/skills/clickzetta-zettapark/SKILL.md +248 -0
- package/bin/skills/clickzetta-zettapark/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +283 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
- package/bin/skills/clickzetta-ai-vector-search/eval_cases.jsonl +0 -4
- package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
|
@@ -36,6 +36,20 @@ description: >
|
|
|
36
36
|
|
|
37
37
|
---
|
|
38
38
|
|
|
39
|
+
## 向导:明确操作意图
|
|
40
|
+
|
|
41
|
+
收到请求后,先判断用户意图,选择对应工作流:
|
|
42
|
+
|
|
43
|
+
> 你想做什么?
|
|
44
|
+
>
|
|
45
|
+
> **A. 设计并创建新的数据管道**(从数据源到各层 DT 的完整 SQL)→ 进入 Pipeline Wizard
|
|
46
|
+
> **B. 管理已有管道对象**(修改 DT 刷新间隔、暂停/恢复、查看刷新历史)→ 直接执行对应操作
|
|
47
|
+
> **C. 排查管道问题**(DT 刷新失败、Pipe 停止摄入、Stream 积压)→ 进入故障排查流程
|
|
48
|
+
|
|
49
|
+
**如果用户已经明确说了要做什么(如"帮我创建一个 Kafka 到 DWD 的管道"、"暂停这个动态表"),直接执行,不再询问。**
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
39
53
|
## Pipeline Wizard(管道设计向导)
|
|
40
54
|
|
|
41
55
|
当用户想设计或构建一个完整的数据管道时,这是最高优先级的模式。触发词包括:
|
|
@@ -70,14 +84,33 @@ CREATE SCHEMA IF NOT EXISTS ecommerce_gold;
|
|
|
70
84
|
|
|
71
85
|
**如果用户已经提供了足够信息(数据来源、字段、层次需求、项目前缀),直接生成完整 SQL,不要再问。**
|
|
72
86
|
|
|
73
|
-
|
|
87
|
+
如果信息不完整,优先使用交互式问答工具(如 `question`)收集以下信息并弹出选项菜单;若无此类工具,则用文字一次性列出所有问题:
|
|
74
88
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
89
|
+
```
|
|
90
|
+
question({
|
|
91
|
+
questions: [
|
|
92
|
+
{
|
|
93
|
+
question: "数据来源?",
|
|
94
|
+
options: [
|
|
95
|
+
{ label: "Kafka", description: "提供 broker 地址和 topic 名称" },
|
|
96
|
+
{ label: "对象存储(OSS/S3/COS)", description: "提供 Volume 路径和文件格式" },
|
|
97
|
+
{ label: "已有 Lakehouse 表(仅 INSERT)", description: "Dynamic Table 直接读源表" },
|
|
98
|
+
{ label: "已有 Lakehouse 表(含 UPDATE/DELETE)", description: "需要 Table Stream + Dynamic Table" }
|
|
99
|
+
]
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
question: "刷新频率?",
|
|
103
|
+
options: [
|
|
104
|
+
{ label: "实时(秒级)", description: "REFRESH INTERVAL 10~60 SECOND" },
|
|
105
|
+
{ label: "近实时(分钟级)", description: "REFRESH INTERVAL 1~10 MINUTE" },
|
|
106
|
+
{ label: "低频(小时/天)", description: "REFRESH INTERVAL 1 HOUR 或 1 DAY" }
|
|
107
|
+
]
|
|
108
|
+
}
|
|
109
|
+
]
|
|
110
|
+
})
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
还需确认:项目/业务前缀(Schema 命名用)、层次需求(几层、每层做什么)、目标表字段结构。这些可在用户回答后追问,或从上下文推断。
|
|
81
114
|
|
|
82
115
|
### 生成完整 SQL
|
|
83
116
|
|
|
@@ -89,10 +122,37 @@ CREATE SCHEMA IF NOT EXISTS ecommerce_gold;
|
|
|
89
122
|
3. 数据入口(Pipe 或 Table Stream,根据来源选择)
|
|
90
123
|
4. 中间层动态表(清洗/过滤,REFRESH interval N MINUTE VCLUSTER name)
|
|
91
124
|
5. 服务层动态表(聚合/维度,REFRESH interval N MINUTE VCLUSTER name)
|
|
92
|
-
6.
|
|
93
|
-
7.
|
|
125
|
+
6. 各动态表创建后立即执行 REFRESH DYNAMIC TABLE(重置刷新基准)
|
|
126
|
+
7. 验证命令(SHOW + REFRESH HISTORY)
|
|
127
|
+
8. 运维操作(SUSPEND/RESUME)
|
|
94
128
|
```
|
|
95
129
|
|
|
130
|
+
**SQL 生成后,将各段代码保存为 Studio 任务(代码资产化):**
|
|
131
|
+
|
|
132
|
+
数据管道开发场景下,所有 SQL 都应保存为 Studio 任务,作为可管理的代码资产:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
# 建表 DDL → 保存为 DRAFT 任务(不配 Cron)
|
|
136
|
+
cz-cli task save-content <ddl_task_name> --content "<ddl_sql>"
|
|
137
|
+
|
|
138
|
+
# ETL/转换 SQL → 保存为调度任务(配 Cron + 依赖)
|
|
139
|
+
cz-cli task save-content <etl_task_name> --content "<etl_sql>"
|
|
140
|
+
cz-cli task save-cron <etl_task_name> --cron '0 30 2 * * ? *'
|
|
141
|
+
cz-cli task deploy <etl_task_name>
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
> Dynamic Table DDL 也应保存为 DRAFT 任务(`03_ddl_dws_ads`),方便后续查阅和多环境迁移。
|
|
145
|
+
|
|
146
|
+
**⚠️ DDL 任务 vs 数据流转任务的调度规则(硬性约束,不得违反):**
|
|
147
|
+
|
|
148
|
+
| 任务类型 | 判断标准 | 调度配置 | Studio 状态 |
|
|
149
|
+
|---|---|---|---|
|
|
150
|
+
| DDL 任务 | 包含 `CREATE / DROP / ALTER TABLE/SCHEMA` | **禁止配置 Cron,禁止配置依赖** | DRAFT |
|
|
151
|
+
| 数据流转任务 | 数据同步、ETL 转换、数据质量检查 | 配置 Cron + 上下游依赖 | PUBLISHED |
|
|
152
|
+
| Dynamic Table | DWS/ADS 聚合层 | **不建 Studio 任务**,系统自动刷新 | — |
|
|
153
|
+
|
|
154
|
+
> AI 生成 SQL 管道时,如果涉及 Studio 任务编排,必须遵守以上规则。不得为 DDL 语句生成 Cron 调度配置。
|
|
155
|
+
|
|
96
156
|
**来源 → 入口对象的选择规则:**
|
|
97
157
|
- Kafka → `CREATE PIPE ... AS COPY INTO ... FROM (SELECT ... FROM read_kafka('broker', 'topic', '', 'group', '', '', '', '', 'raw', 'raw', 0, MAP(...)))`
|
|
98
158
|
- 对象存储(OSS/S3/COS)→ `CREATE PIPE ... VIRTUAL_CLUSTER = 'name' INGEST_MODE = 'LIST_PURGE' AS COPY INTO ... FROM VOLUME <volume_name> USING <format> PURGE=true`
|
|
@@ -368,6 +428,52 @@ CREATE TABLE dwd.orders_manual (
|
|
|
368
428
|
|
|
369
429
|
---
|
|
370
430
|
|
|
431
|
+
## 交付验收 Checklist
|
|
432
|
+
|
|
433
|
+
管道创建完成后,**必须逐项验证**,不得跳过:
|
|
434
|
+
|
|
435
|
+
```sql
|
|
436
|
+
-- 1. 行数比对:各层行数与预期一致
|
|
437
|
+
SELECT COUNT(*) FROM ods.<table>; -- ODS 行数 ≈ 源端
|
|
438
|
+
SELECT COUNT(*) FROM dwd.<table>; -- DWD 行数 ≤ ODS(清洗后)
|
|
439
|
+
SELECT COUNT(*) FROM dws.<table>; -- DWS 行数符合聚合逻辑
|
|
440
|
+
|
|
441
|
+
-- 2. Dynamic Table 刷新状态
|
|
442
|
+
SHOW DYNAMIC TABLE REFRESH HISTORY <schema>.<table> LIMIT 5;
|
|
443
|
+
-- 确认最近一次 status = SUCCESS,refresh_mode = INCREMENTAL 或 FULL
|
|
444
|
+
|
|
445
|
+
-- 3. 关键字段非空率
|
|
446
|
+
SELECT
|
|
447
|
+
COUNT(*) AS total,
|
|
448
|
+
COUNT(key_field) AS non_null,
|
|
449
|
+
ROUND(COUNT(key_field) * 100.0 / COUNT(*), 2) AS non_null_pct
|
|
450
|
+
FROM <schema>.<table>;
|
|
451
|
+
-- 核心业务字段非空率应 > 99%
|
|
452
|
+
|
|
453
|
+
-- 4. 主键唯一性(DWD 层事实表)
|
|
454
|
+
SELECT key_col, COUNT(*) AS cnt
|
|
455
|
+
FROM dwd.<table>
|
|
456
|
+
GROUP BY key_col
|
|
457
|
+
HAVING cnt > 1
|
|
458
|
+
LIMIT 10;
|
|
459
|
+
-- 结果为空 = 无重复,符合预期
|
|
460
|
+
|
|
461
|
+
-- 5. Pipe 摄入状态(如有)
|
|
462
|
+
SHOW PIPES;
|
|
463
|
+
-- status = RUNNING,last_ingested_timestamp 持续更新
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
**验收标准:**
|
|
467
|
+
- [ ] 各层行数与预期一致
|
|
468
|
+
- [ ] Dynamic Table 最近刷新状态为 SUCCESS
|
|
469
|
+
- [ ] 关键字段非空率 > 99%
|
|
470
|
+
- [ ] DWD 层主键无重复
|
|
471
|
+
- [ ] Pipe 状态 RUNNING(如有)
|
|
472
|
+
- [ ] 所有 DDL 任务为 DRAFT 状态(如涉及 Studio 任务)
|
|
473
|
+
- [ ] DWS/ADS 层无冗余 Studio 调度任务
|
|
474
|
+
|
|
475
|
+
---
|
|
476
|
+
|
|
371
477
|
## 参考文档
|
|
372
478
|
|
|
373
479
|
- [增量计算概述](https://www.yunqi.tech/documents/streaming_data_pipeline_overview)
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: clickzetta-sql-syntax-guide
|
|
3
|
+
description: |
|
|
4
|
+
ClickZetta Lakehouse SQL 语法完整参考,以及从 Snowflake、Databricks、Spark SQL 迁移的
|
|
5
|
+
全面兼容性指南。覆盖 DDL/DML/DQL 完整语法、隐式类型转换规则、迁移陷阱速查。
|
|
6
|
+
帮助从 Snowflake 或 Databricks 迁移的用户快速找到正确语法,避免常见错误。
|
|
7
|
+
当用户说"Snowflake 迁移"、"Databricks 迁移"、"Spark SQL 迁移"、"语法差异"、
|
|
8
|
+
"ClickZetta 怎么写"、"TARGET_LAG"、"QUALIFY"、"VARIANT"、"METADATA$ACTION"、
|
|
9
|
+
"CREATE OR REPLACE"、"LISTAGG"、"IFF"、"DATEADD"、"FLATTEN"、"PIVOT"、
|
|
10
|
+
"SQL 语法参考"、"数据类型"、"DATEDIFF"、"CHARINDEX"、"ZEROIFNULL"、
|
|
11
|
+
"OBJECT_CONSTRUCT"、"ARRAY_SIZE"、"APPLY CHANGES INTO"、"ZORDER"、
|
|
12
|
+
"WHEN NOT MATCHED BY SOURCE"、"WITH RECURSIVE"、"BEGIN TRANSACTION"、
|
|
13
|
+
"隐式转换"、"implicit cast"、"日期写入"、"BOOLEAN 写入"、"UNION"、"INTERSECT"、
|
|
14
|
+
"EXCEPT"、"集合运算"、"STRUCT AS"、"named_struct"、"JSON"、"半结构化"、
|
|
15
|
+
"大宽表"、"VARIANT"、"JSON 字段"、"灵活 Schema"、"客户案例"时触发。
|
|
16
|
+
Keywords: SQL syntax, DDL, DML, DQL, migration, Snowflake, Databricks, Spark SQL, compatibility
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
# ClickZetta Lakehouse SQL 语法指南
|
|
20
|
+
|
|
21
|
+
## 参考文档索引
|
|
22
|
+
|
|
23
|
+
| 文档 | 内容 |
|
|
24
|
+
|---|---|
|
|
25
|
+
| [Snowflake 迁移指南](references/migration-snowflake.md) | 对象映射、类型转换、语法差异、函数对照(完整) |
|
|
26
|
+
| [Databricks 迁移指南](references/migration-databricks.md) | Delta Lake 差异、APPLY CHANGES、ZORDER 替代方案 |
|
|
27
|
+
| [DDL 参考](references/ddl-reference.md) | Schema/Table/View/Index/Time Travel 完整语法 |
|
|
28
|
+
| [DML 参考](references/dml-reference.md) | INSERT/UPDATE/DELETE/MERGE/COPY INTO + 类型转换规则 |
|
|
29
|
+
| [DQL 参考](references/dql-reference.md) | SELECT/JOIN/窗口函数/CTE/JSON/ARRAY/LATERAL VIEW |
|
|
30
|
+
| [函数参考](references/functions-reference.md) | 数值/字符串/日期/条件/聚合/向量函数完整列表 |
|
|
31
|
+
| [vs Snowflake](references/vs-snowflake.md) | 差异汇总(含隐式转换规则表) |
|
|
32
|
+
| [vs Spark SQL](references/vs-spark.md) | 数据类型映射 + 语法差异汇总 |
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## ⚠️ 最常见迁移陷阱(速查)
|
|
37
|
+
|
|
38
|
+
| 场景 | Snowflake / Spark | ClickZetta 正确写法 |
|
|
39
|
+
|---|---|---|
|
|
40
|
+
| 替换普通表 | `CREATE OR REPLACE TABLE t` | `CREATE OR REPLACE TABLE t` ✅ ClickZetta 支持;`CREATE OR REPLACE TABLE IF NOT EXISTS t` ❌ OR REPLACE 与 IF NOT EXISTS 不能同时使用 |
|
|
41
|
+
| OR REPLACE + IF NOT EXISTS | `CREATE OR REPLACE TABLE IF NOT EXISTS t` | ❌ 两者不能同时使用,会报错 |
|
|
42
|
+
| 动态表刷新 | `TARGET_LAG = '1 hour'` (SF) | `PROPERTIES ('target_lag' = '1 hour', 'warehouse' = 'vc')` |
|
|
43
|
+
| Stream 元数据 | `METADATA$ACTION` | `__change_type` |
|
|
44
|
+
| 对象存储导入 | `COPY INTO t FROM @stage` | `COPY INTO t FROM VOLUME v USING CSV` |
|
|
45
|
+
| 窗口过滤 | `QUALIFY ROW_NUMBER() = 1` | `QUALIFY ROW_NUMBER() = 1` ✅ ClickZetta 也支持! |
|
|
46
|
+
| 数组展开 | `LATERAL FLATTEN(input => arr)` (SF) | `LATERAL VIEW EXPLODE(arr)` |
|
|
47
|
+
| 半结构化访问 | `data:key` (SF) | `data['key']` |
|
|
48
|
+
| 列表聚合 | `LISTAGG(col, ',')` (SF) | `GROUP_CONCAT(col SEPARATOR ',')` |
|
|
49
|
+
| 条件函数 | `IFF(cond, a, b)` (SF) | `IF(cond, a, b)` |
|
|
50
|
+
| 日期加减 | `DATEADD(day, 7, dt)` (SF) | `DATEADD(day, 7, dt)` ✅ 也支持;或用 `DATE_ADD(dt, 7)` |
|
|
51
|
+
| DATEDIFF 顺序 | `DATEDIFF(day, start, end)` (SF) | `DATEDIFF(day, start, end)` ✅ 三参数形式也支持;或 `DATEDIFF(end, start)` 两参数形式(返回天数) |
|
|
52
|
+
| 查找子串位置 | `CHARINDEX(sub, s)` (SF) | `INSTR(s, sub)` ← 参数顺序相反! |
|
|
53
|
+
| 不区分大小写匹配 | `ILIKE` (SF) | `ILIKE` ✅ ClickZetta 也支持! |
|
|
54
|
+
| 差集运算 | `MINUS` (Oracle/DB2) | `MINUS` ✅ ClickZetta 也支持! |
|
|
55
|
+
| 递归 CTE | `WITH RECURSIVE` (SF/Databricks) | ❌ 不支持,需用 Python/ZettaPark 替代 |
|
|
56
|
+
| **⚠️ 时间戳字符串写入** | `INSERT INTO t VALUES (1, '2026-05-01 10:00:00')` | ❌ **报错**:必须显式转换 `CAST('2026-05-01 10:00:00' AS TIMESTAMP)` 或 `TIMESTAMP '2026-05-01 10:00:00'` |
|
|
57
|
+
| 集合运算 | `UNION` / `UNION ALL` / `INTERSECT` / `EXCEPT` | ✅ 全部支持 |
|
|
58
|
+
| 事务 | `BEGIN; COMMIT; ROLLBACK;` | ❌ 不支持,用 MERGE 实现原子操作 |
|
|
59
|
+
| MERGE 不匹配删除 | `WHEN NOT MATCHED BY SOURCE THEN DELETE` | ❌ 不支持,需两步:MERGE + DELETE |
|
|
60
|
+
| Delta ZORDER | `OPTIMIZE t ZORDER BY (col)` | `OPTIMIZE t`(只做小文件合并,无 ZORDER) |
|
|
61
|
+
| STRUCT 命名字段 | `STRUCT(1 AS id, 'Alice' AS name)` | `named_struct('id', 1, 'name', 'Alice')` ✅ |
|
|
62
|
+
| SEQUENCE 对象 | `CREATE SEQUENCE seq` | ❌ 不支持,用 `IDENTITY(1)` 列替代 |
|
|
63
|
+
| IDENTITY 列类型 | `id INT IDENTITY` | `id BIGINT IDENTITY`(IDENTITY 只支持 BIGINT,INT/SMALLINT 会报错) |
|
|
64
|
+
| 当前时间函数 | `NOW()` | `NOW()` ✅ ClickZetta 也支持!也可用 `CURRENT_TIMESTAMP()` |
|
|
65
|
+
| 布尔类型名称 | `BOOL` | `BOOLEAN`(ClickZetta 不支持 BOOL 简写) |
|
|
66
|
+
| 字符串类型 | `VARCHAR(n)` | 推荐用 `STRING`(无长度限制,最大 16MB);`VARCHAR(n)` 也支持但不推荐 |
|
|
67
|
+
| 数值类型 | `NUMBER(p,s)` (SF) | `DECIMAL(p,s)` |
|
|
68
|
+
| 半结构化类型 | `VARIANT` (SF) | `JSON` |
|
|
69
|
+
| 行数限制 | `SELECT TOP 10` (SF) | `SELECT ... LIMIT 10` |
|
|
70
|
+
| NULL转0 | `ZEROIFNULL(x)` (SF) | `COALESCE(x, 0)` |
|
|
71
|
+
| 0转NULL | `NULLIFZERO(x)` (SF) | `NULLIF(x, 0)` |
|
|
72
|
+
| 对象聚合 | `OBJECT_AGG(k, v)` (SF) | `MAP_AGG(k, v)` |
|
|
73
|
+
| 数组大小 | `ARRAY_SIZE(arr)` (SF) | `SIZE(arr)` 或 `ARRAY_SIZE(arr)` ✅ 两者均支持 |
|
|
74
|
+
| PIVOT | 原生 PIVOT 语法 (SF) | `CASE WHEN` 手动实现 |
|
|
75
|
+
| 临时表 | `CREATE TEMPORARY TABLE` (SF) | 不支持,用 CTE 替代 |
|
|
76
|
+
| 日期字符串写入 | `INSERT ... VALUES (..., '2024-01-15', ...)` | `CAST('2024-01-15' AS DATE)` 或 `DATE '2024-01-15'` 或 `TO_DATE(...)` |
|
|
77
|
+
| 时间字符串写入 | `INSERT ... VALUES (..., '2024-01-15 12:00:00', ...)` | `CAST(... AS TIMESTAMP)` 或 `TIMESTAMP '...'` 或 `TO_TIMESTAMP(...)` |
|
|
78
|
+
| BOOLEAN 写入 | `INSERT ... VALUES (..., 'true', ...)` 或 `..., 1, ...` | `TRUE` / `FALSE` 或 `CAST(1 AS BOOLEAN)` |
|
|
79
|
+
| JSON 写入 | `INSERT ... VALUES (..., '{"k":1}', ...)` | `PARSE_JSON('{"k":1}')` 或 `CAST(... AS JSON)` |
|
|
80
|
+
| 字符串写入数字列 | `INSERT ... VALUES (..., '123', ...)` | `CAST('123' AS INT)` |
|
|
81
|
+
| UPDATE 同样限制 | `UPDATE t SET dt = '2024-01-01'` | `UPDATE t SET dt = CAST('2024-01-01' AS DATE)` |
|
|
82
|
+
| WHERE 中可以 | 不适用 | `WHERE dt = '2024-01-01'` ✅ WHERE 中字符串可隐式比较 |
|
|
83
|
+
| 索引语法关键字 | `USING BLOOM_FILTER` | `BLOOMFILTER`(无 USING);向量/倒排建表内联时用 `USING VECTOR` / `USING INVERTED` |
|
|
84
|
+
| DROP INDEX | `DROP INDEX idx ON table` | `DROP INDEX idx`(无 ON table) |
|
|
85
|
+
| TRUNCATE IF EXISTS | `TRUNCATE TABLE IF EXISTS t` | ❌ 不支持 `IF EXISTS`,直接用 `TRUNCATE TABLE t`(表不存在会报错) |
|
|
86
|
+
| DESC TABLE 扩展 | `DESC TABLE t EXTENDED` / `DESC TABLE t HISTORY` | ❌ 不支持 EXTENDED/HISTORY 参数,用 `DESC TABLE t` 或 `SHOW CREATE TABLE t` |
|
|
87
|
+
| TABLESAMPLE | `SELECT * FROM t TABLESAMPLE (50 PERCENT)` | ❌ 不支持 PERCENT 语法,用 `ORDER BY RAND() LIMIT n` 替代 |
|
|
88
|
+
| MERGE 多 MATCHED 顺序 | DELETE 可在 UPDATE 前 | UPDATE 必须在 DELETE 之前 |
|
|
89
|
+
| 同义词 | `CREATE SYNONYM s FOR t` (Oracle) | `CREATE SYNONYM s FOR TABLE t` ✅ 支持 TABLE/VOLUME/FUNCTION 三种对象 |
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## 数据类型速查
|
|
94
|
+
|
|
95
|
+
```sql
|
|
96
|
+
-- 数值
|
|
97
|
+
TINYINT / SMALLINT / INT / BIGINT
|
|
98
|
+
FLOAT / DOUBLE
|
|
99
|
+
DECIMAL(p, s) -- 精确数值(Snowflake 用 NUMBER)
|
|
100
|
+
|
|
101
|
+
-- 字符串
|
|
102
|
+
STRING -- 推荐,无长度限制
|
|
103
|
+
VARCHAR(n) -- 最大 65533 字符
|
|
104
|
+
CHAR(n) -- 定长,1-255
|
|
105
|
+
|
|
106
|
+
-- 时间
|
|
107
|
+
DATE -- YYYY-MM-DD
|
|
108
|
+
TIMESTAMP -- 带本地时区(≈ Snowflake TIMESTAMP_LTZ)
|
|
109
|
+
TIMESTAMP_NTZ -- 无时区(同 Snowflake TIMESTAMP_NTZ)
|
|
110
|
+
|
|
111
|
+
-- 布尔 / 二进制
|
|
112
|
+
BOOLEAN / BINARY
|
|
113
|
+
|
|
114
|
+
-- 半结构化
|
|
115
|
+
JSON -- 替代 Snowflake VARIANT
|
|
116
|
+
ARRAY<T> -- 需指定元素类型,如 ARRAY<INT>
|
|
117
|
+
MAP<K, V> -- 如 MAP<STRING, INT>
|
|
118
|
+
STRUCT<f1:T1, f2:T2> -- 结构体
|
|
119
|
+
|
|
120
|
+
-- AI 专用
|
|
121
|
+
VECTOR(FLOAT, 1024) -- 向量类型(ClickZetta 特有)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## ClickZetta 特有对象(Snowflake/Spark 无对应)
|
|
127
|
+
|
|
128
|
+
```sql
|
|
129
|
+
-- 计算集群
|
|
130
|
+
CREATE VCLUSTER my_vc VCLUSTER_TYPE = ANALYTICS VCLUSTER_SIZE = 4;
|
|
131
|
+
USE VCLUSTER my_vc;
|
|
132
|
+
|
|
133
|
+
-- 动态表(增量计算)
|
|
134
|
+
CREATE DYNAMIC TABLE sales_daily
|
|
135
|
+
REFRESH INTERVAL 5 MINUTE VCLUSTER default_ap
|
|
136
|
+
AS SELECT DATE(created_at) AS dt, SUM(amount) AS total FROM orders GROUP BY 1;
|
|
137
|
+
|
|
138
|
+
-- Table Stream(CDC)
|
|
139
|
+
CREATE TABLE STREAM orders_stream ON TABLE orders
|
|
140
|
+
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
|
|
141
|
+
-- 元数据字段:__change_type(INSERT/UPDATE_BEFORE/UPDATE_AFTER/DELETE)
|
|
142
|
+
|
|
143
|
+
-- Pipe(持续导入)
|
|
144
|
+
CREATE PIPE oss_pipe
|
|
145
|
+
AS COPY INTO orders FROM VOLUME my_volume USING CSV OPTIONS('header'='true');
|
|
146
|
+
|
|
147
|
+
-- Volume(对象存储)
|
|
148
|
+
CREATE EXTERNAL VOLUME my_vol
|
|
149
|
+
LOCATION 'oss://bucket/path'
|
|
150
|
+
USING CONNECTION my_oss_conn;
|
|
151
|
+
|
|
152
|
+
-- Share(跨实例数据共享)
|
|
153
|
+
CREATE SHARE my_share;
|
|
154
|
+
GRANT SELECT, READ METADATA ON TABLE public.orders TO SHARE my_share;
|
|
155
|
+
|
|
156
|
+
-- Synonym(同义词,为对象创建别名)
|
|
157
|
+
CREATE SYNONYM my_orders FOR TABLE other_schema.orders;
|
|
158
|
+
CREATE SYNONYM my_vol FOR VOLUME other_schema.data_volume;
|
|
159
|
+
CREATE SYNONYM my_func FOR FUNCTION other_schema.udf_name;
|
|
160
|
+
DROP SYNONYM my_orders FOR TABLE;
|
|
161
|
+
SHOW SYNONYMS;
|
|
162
|
+
|
|
163
|
+
-- Time Travel
|
|
164
|
+
SELECT * FROM orders TIMESTAMP AS OF '2024-01-01 00:00:00';
|
|
165
|
+
RESTORE TABLE orders TO TIMESTAMP AS OF '2024-01-01 00:00:00';
|
|
166
|
+
UNDROP TABLE orders;
|
|
167
|
+
|
|
168
|
+
-- 向量检索
|
|
169
|
+
CREATE TABLE docs (id INT, vec VECTOR(FLOAT, 1024),
|
|
170
|
+
INDEX vec_idx (vec) USING VECTOR PROPERTIES ("distance.function"="cosine_distance"));
|
|
171
|
+
SELECT id, cosine_distance(vec, CAST('[0.1,0.2,...]' AS VECTOR(1024))) AS dist
|
|
172
|
+
FROM docs ORDER BY dist LIMIT 10;
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## ❌ 明确不支持的功能
|
|
178
|
+
|
|
179
|
+
以下功能在 Snowflake/Databricks/Spark 中存在,但 ClickZetta **不支持**。使用时会报错,需要用替代方案。
|
|
180
|
+
|
|
181
|
+
### 字符串函数
|
|
182
|
+
|
|
183
|
+
| 不支持的函数 | 替代方案 |
|
|
184
|
+
|---|---|
|
|
185
|
+
| `INITCAP(s)` | `CONCAT(UPPER(SUBSTR(s, 1, 1)), LOWER(SUBSTR(s, 2)))` |
|
|
186
|
+
| `SOUNDEX(s)` | 无替代方案 |
|
|
187
|
+
| `CHARINDEX(sub, s)` | `INSTR(s, sub)`(注意参数顺序相反) |
|
|
188
|
+
|
|
189
|
+
### JSON 函数
|
|
190
|
+
|
|
191
|
+
| 不支持的函数 | 替代方案 |
|
|
192
|
+
|---|---|
|
|
193
|
+
| `JSON_ARRAY_LENGTH(json)` | `SIZE(CAST(json_str AS ARRAY<STRING>))` |
|
|
194
|
+
| `JSON_OBJECT_KEYS(json)` | 无直接替代,需手动解析 |
|
|
195
|
+
|
|
196
|
+
### 集合/数组/MAP 函数
|
|
197
|
+
|
|
198
|
+
| 不支持的函数 | 替代方案 |
|
|
199
|
+
|---|---|
|
|
200
|
+
| `MAP_FROM_ZIP(keys, values)` | `MAP_FROM_ARRAYS(keys, values)` |
|
|
201
|
+
| `TO_ARRAY(expr)` | `ARRAY(expr)` 或 `CAST(expr AS ARRAY<T>)` |
|
|
202
|
+
| `ARRAY_SIZE(arr)` (Snowflake) | `SIZE(arr)` 或 `ARRAY_SIZE(arr)` ✅ 两者均支持 |
|
|
203
|
+
|
|
204
|
+
### 正则函数
|
|
205
|
+
|
|
206
|
+
| 不支持的函数 | 替代方案 |
|
|
207
|
+
|---|---|
|
|
208
|
+
| `REGEXP_SUBSTR(s, pattern)` | `REGEXP_EXTRACT(s, '(pattern)')` |
|
|
209
|
+
|
|
210
|
+
### 表函数/生成器
|
|
211
|
+
|
|
212
|
+
| 不支持的函数 | 替代方案 |
|
|
213
|
+
|---|---|
|
|
214
|
+
| `GENERATE(start, end)` | 无直接替代,用 CTE + UNION ALL 或应用层生成 |
|
|
215
|
+
| `RANGE(n)` | 无直接替代 |
|
|
216
|
+
| `TABLESAMPLE (n PERCENT)` | `ORDER BY RAND() LIMIT n` |
|
|
217
|
+
|
|
218
|
+
### 地理空间/网络
|
|
219
|
+
|
|
220
|
+
| 不支持的函数 | 替代方案 |
|
|
221
|
+
|---|---|
|
|
222
|
+
| `ST_GeomFromWKT(wkt)` | 不支持地理空间函数 |
|
|
223
|
+
| `TO_IPV4(ip_string)` | 不支持 IP 地址函数 |
|
|
224
|
+
|
|
225
|
+
### 近似计算
|
|
226
|
+
|
|
227
|
+
| 不支持的函数 | 替代方案 |
|
|
228
|
+
|---|---|
|
|
229
|
+
| `HLL_APPROX(col)` | `APPROX_COUNT_DISTINCT(col)` |
|
|
230
|
+
|
|
231
|
+
### 位运算
|
|
232
|
+
|
|
233
|
+
| 不支持的函数 | 替代方案 |
|
|
234
|
+
|---|---|
|
|
235
|
+
| `BITAND(a, b)` | `a & b`(位运算符) |
|
|
236
|
+
| `BITOR(a, b)` | `a \| b` |
|
|
237
|
+
| `BITXOR(a, b)` | `a ^ b` |
|
|
238
|
+
|
|
239
|
+
### DDL/DML 限制
|
|
240
|
+
|
|
241
|
+
| 不支持的语法 | 替代方案 |
|
|
242
|
+
|---|---|
|
|
243
|
+
| `TRUNCATE TABLE IF EXISTS t` | 先检查表是否存在,再 `TRUNCATE TABLE t` |
|
|
244
|
+
| `DESC TABLE t EXTENDED` | `DESC TABLE t` 或 `SHOW CREATE TABLE t` |
|
|
245
|
+
| `DESC TABLE t HISTORY` | `SHOW TABLES HISTORY WHERE table_name = 't'` |
|
|
246
|
+
| `CREATE TEMPORARY TABLE` | 用 CTE 替代,或创建普通表后手动删除 |
|
|
247
|
+
| `CREATE OR REPLACE TABLE` | `CREATE OR REPLACE TABLE t (...)` ✅ 直接支持 |
|
|
248
|
+
| `BEGIN; COMMIT; ROLLBACK;` | 不支持事务,用 MERGE 实现原子操作 |
|
|
249
|
+
| `WITH RECURSIVE` | 不支持递归 CTE,用 Python/ZettaPark 替代 |
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
{"case_id":"001","type":"should_call","user_input":"Snowflake 的 IFF、ARRAY_SIZE、LISTAGG 在 ClickZetta 怎么写?","expected_skill":"clickzetta-sql-syntax-guide","expected_output_contains":["IF(","SIZE("]}
|
|
2
|
+
{"case_id":"002","type":"should_call","user_input":"Databricks 的 APPLY CHANGES INTO 在 ClickZetta 怎么替代?","expected_skill":"clickzetta-sql-syntax-guide","expected_output_contains":["MERGE INTO"]}
|
|
3
|
+
{"case_id":"003","type":"should_call","user_input":"ClickZetta 的隐式类型转换规则是什么?","expected_skill":"clickzetta-sql-syntax-guide","expected_output_contains":["隐式","转换"]}
|