@clickzetta/cz-cli-darwin-x64 0.3.39 → 0.3.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +153 -0
- package/bin/skills/clickzetta-app-python-sdk/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +196 -0
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +143 -0
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +122 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +128 -287
- package/bin/skills/clickzetta-bi-connect/SKILL.md +176 -0
- package/bin/skills/clickzetta-bi-connect/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +170 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +96 -11
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +237 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-data-science/SKILL.md +125 -0
- package/bin/skills/clickzetta-data-science/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +146 -0
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +110 -0
- package/bin/skills/clickzetta-data-science/references/setup.md +160 -0
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +195 -0
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +122 -0
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +156 -0
- package/bin/skills/clickzetta-data-sharing/SKILL.md +160 -0
- package/bin/skills/clickzetta-data-sharing/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +134 -0
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +103 -11
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +58 -2
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +4 -4
- package/bin/skills/clickzetta-external-catalog/SKILL.md +123 -0
- package/bin/skills/clickzetta-external-catalog/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +130 -0
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +34 -0
- package/bin/skills/clickzetta-java-sdk/SKILL.md +186 -0
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +163 -0
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +212 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +38 -20
- package/bin/skills/clickzetta-metadata/SKILL.md +51 -32
- package/bin/skills/clickzetta-monitoring/SKILL.md +18 -2
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +2 -2
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +63 -38
- package/bin/skills/clickzetta-pipeline-review/SKILL.md +377 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +63 -16
- package/bin/skills/clickzetta-semantic-view/SKILL.md +207 -0
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +167 -0
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +92 -0
- package/bin/skills/clickzetta-spark-flink-connector/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +147 -0
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +132 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +115 -9
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +249 -0
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +279 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +504 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +260 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +382 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +652 -0
- package/bin/skills/clickzetta-table-lineage/SKILL.md +90 -0
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -0
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +14 -0
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +38 -0
- package/bin/skills/clickzetta-table-lineage/references/table_lineage_standalone.html +562 -0
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +25 -0
- package/bin/skills/clickzetta-zettapark/SKILL.md +248 -0
- package/bin/skills/clickzetta-zettapark/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +283 -0
- package/bin/skills/cz-cli-inner/SKILL.md +5 -4
- package/package.json +1 -1
- package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
- package/bin/skills/clickzetta-ai-vector-search/eval_cases.jsonl +0 -4
- package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
|
@@ -16,6 +16,37 @@ description: |
|
|
|
16
16
|
|
|
17
17
|
# Kafka 数据接入管道工作流
|
|
18
18
|
|
|
19
|
+
## 向导:收集必要信息
|
|
20
|
+
|
|
21
|
+
开始搭建 Kafka 管道前,优先使用交互式问答工具(如 `question`)收集以下信息并弹出选项菜单;若无此类工具,则用文字一次性列出所有问题:
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
question({
|
|
25
|
+
questions: [
|
|
26
|
+
{
|
|
27
|
+
question: "Kafka 消息格式?",
|
|
28
|
+
options: [
|
|
29
|
+
{ label: "JSON(简单结构)", description: "顶层字段直接映射" },
|
|
30
|
+
{ label: "JSON(嵌套结构)", description: "需要 JSONPath 解析嵌套字段" },
|
|
31
|
+
{ label: "CSV", description: "逗号分隔文本" },
|
|
32
|
+
{ label: "Avro / 其他", description: "需要额外配置" }
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
question: "接入路径?",
|
|
37
|
+
options: [
|
|
38
|
+
{ label: "READ_KAFKA Pipe(推荐)", description: "通用场景,支持复杂 SQL 转换" },
|
|
39
|
+
{ label: "Kafka 外部表 + Table Stream", description: "需要保留原始消息或多个下游消费同一 Topic" }
|
|
40
|
+
]
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
})
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**如果用户已经提供了足够信息,直接进入工作流,不再弹出菜单。**
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
19
50
|
## 适用场景
|
|
20
51
|
|
|
21
52
|
- 将 Kafka Topic 数据持续导入 ClickZetta Lakehouse 表
|
|
@@ -39,29 +70,18 @@ description: |
|
|
|
39
70
|
- Kafka 集群网络可达(确认 bootstrap 地址和端口)
|
|
40
71
|
- 已知 Kafka Topic 名称和消息格式
|
|
41
72
|
- 认证信息(如需要):SASL 用户名/密码
|
|
42
|
-
-
|
|
43
|
-
- **cz-cli 路径**:已安装 cz-cli(`pip install cz-cli`),并完成 `cz-cli configure` 配置
|
|
44
|
-
- **MCP 路径**:clickzetta-mcp-server 工具可用(`LH_execute_query`、`LH_show_object_list` 等)
|
|
73
|
+
- **执行环境**:已安装并配置 cz-cli
|
|
45
74
|
|
|
46
|
-
##
|
|
75
|
+
## 执行环境
|
|
47
76
|
|
|
48
|
-
|
|
77
|
+
所有 SQL 通过 `cz-cli sql` 执行:
|
|
49
78
|
|
|
50
|
-
**第一步:检测 cz-cli 是否可用**
|
|
51
79
|
```bash
|
|
52
|
-
cz-cli --version
|
|
80
|
+
cz-cli --version # 确认 cz-cli 可用
|
|
81
|
+
cz-cli sql "SELECT 1" --sync # 验证连接
|
|
53
82
|
```
|
|
54
|
-
- 若命令存在 → **走 cz-cli 路径**(见本文档末尾"cz-cli 替代路径"章节)
|
|
55
|
-
- 若命令不存在 → 继续检测 MCP
|
|
56
|
-
|
|
57
|
-
**第二步:检测 MCP 是否可用(仅在 cz-cli 不可用时)**
|
|
58
83
|
|
|
59
|
-
|
|
60
|
-
- 若工具存在于 tool list → **走 MCP 路径**(本文档默认路径)
|
|
61
|
-
- 若工具不存在 → 停止执行,提示用户:
|
|
62
|
-
> "当前环境既无 cz-cli 也无 MCP 工具,请安装其中之一后重试。
|
|
63
|
-
> cz-cli 安装:`pip install cz-cli`,然后运行 `cz-cli configure`
|
|
64
|
-
> MCP 安装:参考 clickzetta-mcp-server 配置文档"
|
|
84
|
+
需要 cz-cli,请参考官方文档安装并完成配置后重试。
|
|
65
85
|
|
|
66
86
|
## ⚠️ 关键注意事项
|
|
67
87
|
|
|
@@ -663,10 +683,8 @@ COPY INTO ods.secure_events FROM (
|
|
|
663
683
|
|
|
664
684
|
---
|
|
665
685
|
|
|
666
|
-
## cz-cli
|
|
686
|
+
## cz-cli 执行路径
|
|
667
687
|
|
|
668
|
-
> 仅在 cz-cli 可用且 MCP 不可用时使用本节。步骤编号与上方 MCP 路径对应。
|
|
669
|
-
> 所有操作通过 `cz-cli agent run` 委托给内置 agent 完成,agent 内置完整的 MCP 工具访问能力。
|
|
670
688
|
|
|
671
689
|
### 路径一:READ_KAFKA Pipe(cz-cli 版)
|
|
672
690
|
|
|
@@ -1,40 +1,39 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: clickzetta-metadata
|
|
3
3
|
description: |
|
|
4
|
-
查询 ClickZetta Lakehouse
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
覆盖所有 SHOW 命令(TABLES/SCHEMAS/CATALOGS/COLUMNS/VOLUMES/CONNECTIONS/JOBS/VCLUSTERS/
|
|
13
|
-
PIPES/SHARES/USERS/ROLES/GRANTS/FUNCTIONS/TABLE STREAMS/PARTITIONS/SYNONYMS/INDEX/
|
|
14
|
-
DYNAMIC TABLE REFRESH HISTORY/TABLES HISTORY),所有 DESC 命令(TABLE/SCHEMA/HISTORY/
|
|
15
|
-
VCLUSTER/VOLUME/CONNECTION/FUNCTION/VIEW/DYNAMIC TABLE/SHARE/INDEX/TABLE STREAM),
|
|
16
|
-
SHOW CREATE TABLE,load_history(),FROM (SHOW ...) 子查询,上下文函数,
|
|
17
|
-
以及 INFORMATION_SCHEMA 空间级和实例级视图(TABLES/COLUMNS/JOB_HISTORY/USERS/ROLES/
|
|
18
|
-
VOLUMES/CONNECTIONS/MATERIALIZED_VIEW_REFRESH_HISTORY/STORAGE_METERING/INSTANCE_USAGE 等)。
|
|
19
|
-
|
|
20
|
-
当用户说"查看表列表"、"查看字段"、"查看字段信息"、"查看作业"、"查看作业历史"、
|
|
21
|
-
"查看 JOB 历史"、"SHOW TABLES"、"DESC TABLE"、"查看分区"、"查看历史版本"、
|
|
22
|
-
"查看删除的表"、"查看导入历史"、"load_history"、"SHOW JOBS"、"查看集群状态"、
|
|
23
|
-
"查看连接"、"查看权限"、"SHOW GRANTS"、"查看函数"、"查看 Volume"、
|
|
24
|
-
"查看 Volume 列表"、"查看 Share"、"查看 Catalog"、"查看慢查询"、
|
|
25
|
-
"查看 CRU 消耗"、"费用分析"、"成本分析"、"计算费用"、"存储费用"、
|
|
26
|
-
"用量统计"、"成本归因"、"哪个用户消耗最多"、"存储用量排行"、
|
|
27
|
-
"查看用户列表"、"查看角色"、"查看 Connection"、"查看物化视图刷新历史"、
|
|
28
|
-
"元数据查询"、"information_schema"、"查看所有表"、"查看 Schema 列表"、
|
|
29
|
-
"统计存储用量"、"SHOW/DESC 和 information_schema 哪个更快"时触发。
|
|
30
|
-
|
|
31
|
-
注意:本 skill 仅覆盖元数据的只读查询(SHOW/DESC/information_schema)。
|
|
32
|
-
权限变更(GRANT/REVOKE/创建用户/角色管理/数据脱敏)请使用 clickzetta-access-control skill。
|
|
33
|
-
Keywords: SHOW, DESC, DESCRIBE, metadata, load_history, information_schema, table info, column info, job history, system view, cost analysis, CRU
|
|
4
|
+
查询 ClickZetta Lakehouse 元数据,覆盖两种方式:
|
|
5
|
+
SHOW/DESC 命令族(实时,适合单个对象即时查询)和
|
|
6
|
+
INFORMATION_SCHEMA 视图(支持复杂 SQL 分析、费用归因、跨对象统计)。
|
|
7
|
+
当用户说"查看表列表"、"查看字段"、"查看作业历史"、"SHOW TABLES"、
|
|
8
|
+
"DESC TABLE"、"查看分区"、"查看权限"、"SHOW GRANTS"、"查看 Volume"、
|
|
9
|
+
"费用分析"、"成本归因"、"用量统计"、"元数据查询"、"information_schema"时触发。
|
|
10
|
+
注意:本 skill 仅覆盖只读元数据查询;权限变更请使用 clickzetta-access-control。
|
|
11
|
+
Keywords: SHOW, DESC, metadata, load_history, information_schema, job history, cost analysis, CRU
|
|
34
12
|
---
|
|
35
13
|
|
|
36
14
|
# ClickZetta 元数据查询指南
|
|
37
15
|
|
|
16
|
+
## 执行方式
|
|
17
|
+
|
|
18
|
+
所有 SQL 通过 `cz-cli sql` 执行,无需 MCP 工具。
|
|
19
|
+
|
|
20
|
+
**执行示例:**
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# 执行 SHOW/DESC 查询
|
|
24
|
+
cz-cli sql "SHOW TABLES" --sync -o table
|
|
25
|
+
|
|
26
|
+
# 执行 information_schema 查询
|
|
27
|
+
cz-cli sql "SELECT * FROM information_schema.tables LIMIT 10" --sync -o table
|
|
28
|
+
|
|
29
|
+
# 执行 load_history 查询
|
|
30
|
+
cz-cli sql "SELECT * FROM load_history('my_schema.my_table') LIMIT 20" --sync -o table
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
注意:`--sync` 等待结果返回;`-o table` 输出为表格格式便于阅读。
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
38
37
|
## 选择查询方式
|
|
39
38
|
|
|
40
39
|
| 场景 | 推荐方式 | 原因 |
|
|
@@ -51,6 +50,26 @@ description: |
|
|
|
51
50
|
|
|
52
51
|
---
|
|
53
52
|
|
|
53
|
+
## 支持的命令与视图全览
|
|
54
|
+
|
|
55
|
+
**SHOW 命令**:TABLES / SCHEMAS / CATALOGS / COLUMNS / VOLUMES / CONNECTIONS / JOBS / VCLUSTERS /
|
|
56
|
+
PIPES / SHARES / USERS / ROLES / GRANTS / FUNCTIONS / TABLE STREAMS / PARTITIONS / SYNONYMS / INDEX /
|
|
57
|
+
DYNAMIC TABLE REFRESH HISTORY / TABLES HISTORY
|
|
58
|
+
|
|
59
|
+
**DESC 命令**:TABLE / SCHEMA / HISTORY / VCLUSTER / VOLUME / CONNECTION / FUNCTION / VIEW /
|
|
60
|
+
DYNAMIC TABLE / SHARE / INDEX / TABLE STREAM
|
|
61
|
+
|
|
62
|
+
**其他**:SHOW CREATE TABLE、load_history()、FROM (SHOW ...) 子查询、上下文函数
|
|
63
|
+
|
|
64
|
+
**INFORMATION_SCHEMA 视图**(空间级):TABLES / COLUMNS / JOB_HISTORY / USERS / ROLES /
|
|
65
|
+
VOLUMES / CONNECTIONS / MATERIALIZED_VIEW_REFRESH_HISTORY / AUTOMV_REFRESH_HISTORY / SORTKEY_CANDIDATES
|
|
66
|
+
|
|
67
|
+
**INFORMATION_SCHEMA 视图**(实例级,需 INSTANCE ADMIN):WORKSPACES / SCHEMAS / TABLES / COLUMNS /
|
|
68
|
+
VIEWS / USERS / ROLES / JOB_HISTORY / VOLUMES / CONNECTIONS / OBJECT_PRIVILEGES /
|
|
69
|
+
STORAGE_METERING / INSTANCE_USAGE
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
54
73
|
## 参考文档
|
|
55
74
|
|
|
56
75
|
- [SHOW/DESC 完整语法](references/show-desc-reference.md)
|
|
@@ -138,7 +157,7 @@ SHOW VCLUSTERS WHERE state = 'RUNNING';
|
|
|
138
157
|
|
|
139
158
|
-- 作业(最近 7 天,最多 10000 条,不支持 ORDER BY)
|
|
140
159
|
SHOW JOBS LIMIT 20;
|
|
141
|
-
SHOW JOBS IN VCLUSTER
|
|
160
|
+
SHOW JOBS IN VCLUSTER default LIMIT 20;
|
|
142
161
|
|
|
143
162
|
-- 动态表刷新历史(最近 7 天)
|
|
144
163
|
SHOW DYNAMIC TABLE REFRESH HISTORY LIMIT 20;
|
|
@@ -169,7 +188,7 @@ SHOW SHARES;
|
|
|
169
188
|
DESC my_table;
|
|
170
189
|
DESC EXTENDED my_table; -- 含 last_modified_time/properties/statistics
|
|
171
190
|
DESC SCHEMA my_schema;
|
|
172
|
-
DESC VCLUSTER
|
|
191
|
+
DESC VCLUSTER default;
|
|
173
192
|
DESC VOLUME my_volume;
|
|
174
193
|
DESC CONNECTION my_oss_conn;
|
|
175
194
|
DESC FUNCTION my_schema.my_function; -- 仅支持外部函数
|
|
@@ -13,6 +13,22 @@ description: |
|
|
|
13
13
|
Keywords: monitoring, job status, performance, resource usage, SHOW JOBS, slow query
|
|
14
14
|
---
|
|
15
15
|
|
|
16
|
+
## 执行方式
|
|
17
|
+
|
|
18
|
+
所有 SQL 通过 `cz-cli sql` 执行,无需 MCP 工具:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# 执行查询
|
|
22
|
+
cz-cli sql "SHOW JOBS LIMIT 20" --sync -o table
|
|
23
|
+
|
|
24
|
+
# 执行 information_schema 查询
|
|
25
|
+
cz-cli sql "SELECT * FROM information_schema.job_history WHERE pt_date >= CAST(CURRENT_DATE - INTERVAL 1 DAY AS DATE) LIMIT 10" --sync -o table
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
注意:`--sync` 等待结果返回;`-o table` 输出为表格格式便于阅读。
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
16
32
|
# ClickZetta 作业监控与分析
|
|
17
33
|
|
|
18
34
|
阅读 [references/show-jobs.md](references/show-jobs.md) 了解 SHOW JOBS 语法。
|
|
@@ -27,7 +43,7 @@ description: |
|
|
|
27
43
|
SHOW JOBS;
|
|
28
44
|
|
|
29
45
|
-- 查看指定集群的作业
|
|
30
|
-
SHOW JOBS IN VCLUSTER
|
|
46
|
+
SHOW JOBS IN VCLUSTER default;
|
|
31
47
|
|
|
32
48
|
-- 查看执行时间超过2分钟的慢查询
|
|
33
49
|
SHOW JOBS WHERE execution_time > INTERVAL 2 MINUTE;
|
|
@@ -36,7 +52,7 @@ SHOW JOBS WHERE execution_time > INTERVAL 2 MINUTE;
|
|
|
36
52
|
SHOW JOBS WHERE status = 'FAILED';
|
|
37
53
|
|
|
38
54
|
-- 限制返回数量
|
|
39
|
-
SHOW JOBS IN VCLUSTER
|
|
55
|
+
SHOW JOBS IN VCLUSTER default LIMIT 50;
|
|
40
56
|
```
|
|
41
57
|
|
|
42
58
|
---
|
|
@@ -24,10 +24,10 @@ SHOW JOBS [IN VCLUSTER vc_name] [LIKE 'pattern'] [WHERE <expr>] [LIMIT num];
|
|
|
24
24
|
SHOW JOBS;
|
|
25
25
|
|
|
26
26
|
-- 查看指定集群的作业
|
|
27
|
-
SHOW JOBS IN VCLUSTER
|
|
27
|
+
SHOW JOBS IN VCLUSTER default;
|
|
28
28
|
|
|
29
29
|
-- 查看执行时间超过2分钟的作业
|
|
30
|
-
SHOW JOBS IN VCLUSTER
|
|
30
|
+
SHOW JOBS IN VCLUSTER default WHERE execution_time > INTERVAL 2 MINUTE;
|
|
31
31
|
|
|
32
32
|
-- 限制返回100条
|
|
33
33
|
SHOW JOBS LIMIT 100;
|
|
@@ -13,6 +13,45 @@ description: |
|
|
|
13
13
|
|
|
14
14
|
# 对象存储数据管道搭建工作流
|
|
15
15
|
|
|
16
|
+
## 向导:收集必要信息
|
|
17
|
+
|
|
18
|
+
开始搭建对象存储管道前,优先使用交互式问答工具(如 `question`)收集以下信息并弹出选项菜单;若无此类工具,则用文字一次性列出所有问题:
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
question({
|
|
22
|
+
questions: [
|
|
23
|
+
{
|
|
24
|
+
question: "云平台?",
|
|
25
|
+
options: [
|
|
26
|
+
{ label: "阿里云 OSS", description: "支持 LIST_PURGE 和 EVENT_NOTIFICATION 两种模式" },
|
|
27
|
+
{ label: "AWS S3", description: "支持 LIST_PURGE 和 EVENT_NOTIFICATION 两种模式" },
|
|
28
|
+
{ label: "腾讯云 COS", description: "仅支持 LIST_PURGE 模式" }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
question: "导入模式?",
|
|
33
|
+
options: [
|
|
34
|
+
{ label: "持续导入(PIPE)", description: "新文件自动触发导入,近实时" },
|
|
35
|
+
{ label: "批量一次性导入", description: "手动或定时执行 COPY INTO" }
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
question: "文件格式?",
|
|
40
|
+
options: [
|
|
41
|
+
{ label: "CSV", description: "逗号分隔文本" },
|
|
42
|
+
{ label: "JSON / JSONL", description: "JSON 或换行分隔 JSON" },
|
|
43
|
+
{ label: "Parquet", description: "列式存储格式" },
|
|
44
|
+
{ label: "ORC", description: "列式存储格式" }
|
|
45
|
+
]
|
|
46
|
+
}
|
|
47
|
+
]
|
|
48
|
+
})
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
**如果用户已经提供了足够信息,直接进入工作流,不再弹出菜单。**
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
16
55
|
## 适用场景
|
|
17
56
|
|
|
18
57
|
- 从对象存储(阿里云 OSS / AWS S3 / 腾讯云 COS)持续自动导入数据到 Lakehouse(PIPE 模式)
|
|
@@ -26,29 +65,18 @@ description: |
|
|
|
26
65
|
|
|
27
66
|
- ClickZetta Lakehouse 账户,具备创建 PIPE、表、存储连接、Volume 等权限
|
|
28
67
|
- 对象存储桶可达(Endpoint、AccessKey 或 Role ARN)
|
|
29
|
-
-
|
|
30
|
-
- **cz-cli 路径**:已安装 cz-cli(`pip install cz-cli`),并完成 `cz-cli configure` 配置
|
|
31
|
-
- **MCP 路径**:clickzetta-mcp-server 工具可用(`LH_execute_query`、`LH_show_object_list` 等)
|
|
68
|
+
- **执行环境**:已安装并配置 cz-cli
|
|
32
69
|
|
|
33
|
-
##
|
|
70
|
+
## 执行环境
|
|
34
71
|
|
|
35
|
-
|
|
72
|
+
所有 SQL 通过 `cz-cli sql` 执行:
|
|
36
73
|
|
|
37
|
-
**第一步:检测 cz-cli 是否可用**
|
|
38
74
|
```bash
|
|
39
|
-
cz-cli --version
|
|
75
|
+
cz-cli --version # 确认 cz-cli 可用
|
|
76
|
+
cz-cli sql "SELECT 1" --sync # 验证连接
|
|
40
77
|
```
|
|
41
|
-
- 若命令存在 → **走 cz-cli 路径**(见本文档末尾"cz-cli 替代路径"章节)
|
|
42
|
-
- 若命令不存在 → 继续检测 MCP
|
|
43
78
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
尝试调用 `LH_execute_query` 工具执行一条简单 SQL(如 `SELECT 1`)。
|
|
47
|
-
- 若工具存在于 tool list → **走 MCP 路径**(本文档默认路径)
|
|
48
|
-
- 若工具不存在 → 停止执行,提示用户:
|
|
49
|
-
> "当前环境既无 cz-cli 也无 MCP 工具,请安装其中之一后重试。
|
|
50
|
-
> cz-cli 安装:`pip install cz-cli`,然后运行 `cz-cli configure`
|
|
51
|
-
> MCP 安装:参考 clickzetta-mcp-server 配置文档"
|
|
79
|
+
需要 cz-cli,请参考官方文档安装并完成配置后重试。
|
|
52
80
|
|
|
53
81
|
## 核心概念
|
|
54
82
|
|
|
@@ -82,7 +110,7 @@ cz-cli --version
|
|
|
82
110
|
#### 步骤 1:创建存储连接(Storage Connection)
|
|
83
111
|
|
|
84
112
|
```sql
|
|
85
|
-
--
|
|
113
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
86
114
|
-- 密钥方式(LIST_PURGE 模式支持)
|
|
87
115
|
CREATE STORAGE CONNECTION IF NOT EXISTS my_oss_connection
|
|
88
116
|
TYPE OSS
|
|
@@ -102,7 +130,7 @@ CREATE STORAGE CONNECTION IF NOT EXISTS my_oss_connection
|
|
|
102
130
|
#### 步骤 2:创建外部 Volume
|
|
103
131
|
|
|
104
132
|
```sql
|
|
105
|
-
--
|
|
133
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
106
134
|
CREATE EXTERNAL VOLUME IF NOT EXISTS pipe_volume
|
|
107
135
|
LOCATION 'oss://my-bucket/data-path/'
|
|
108
136
|
USING CONNECTION my_oss_connection
|
|
@@ -121,7 +149,7 @@ CREATE EXTERNAL VOLUME IF NOT EXISTS pipe_volume
|
|
|
121
149
|
在创建 PIPE 之前,先用 COPY INTO 验证数据能正常加载:
|
|
122
150
|
|
|
123
151
|
```sql
|
|
124
|
-
--
|
|
152
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
125
153
|
COPY INTO my_schema.target_table
|
|
126
154
|
FROM VOLUME pipe_volume
|
|
127
155
|
USING CSV OPTIONS ('header' = 'true', 'delimiter' = ',') PURGE=true;
|
|
@@ -134,7 +162,7 @@ USING CSV OPTIONS ('header' = 'true', 'delimiter' = ',') PURGE=true;
|
|
|
134
162
|
#### 步骤 4:创建 PIPE(LIST_PURGE 模式)
|
|
135
163
|
|
|
136
164
|
```sql
|
|
137
|
-
--
|
|
165
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
138
166
|
CREATE PIPE IF NOT EXISTS my_oss_pipe
|
|
139
167
|
INGEST_MODE = 'LIST_PURGE'
|
|
140
168
|
VIRTUAL_CLUSTER = 'my_vc'
|
|
@@ -168,7 +196,7 @@ USING CSV OPTIONS ('header' = 'true') PURGE=true;
|
|
|
168
196
|
#### 步骤 5:验证 PIPE 状态
|
|
169
197
|
|
|
170
198
|
```sql
|
|
171
|
-
--
|
|
199
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
172
200
|
DESC PIPE EXTENDED my_oss_pipe;
|
|
173
201
|
```
|
|
174
202
|
|
|
@@ -190,7 +218,7 @@ DESC PIPE EXTENDED my_oss_pipe;
|
|
|
190
218
|
#### 步骤 1:创建存储连接(Role ARN 方式)
|
|
191
219
|
|
|
192
220
|
```sql
|
|
193
|
-
--
|
|
221
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
194
222
|
CREATE STORAGE CONNECTION IF NOT EXISTS my_oss_role_connection
|
|
195
223
|
TYPE OSS
|
|
196
224
|
ENDPOINT = 'oss-cn-hangzhou.aliyuncs.com'
|
|
@@ -201,7 +229,7 @@ CREATE STORAGE CONNECTION IF NOT EXISTS my_oss_role_connection
|
|
|
201
229
|
#### 步骤 2:创建外部 Volume
|
|
202
230
|
|
|
203
231
|
```sql
|
|
204
|
-
--
|
|
232
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
205
233
|
CREATE EXTERNAL VOLUME IF NOT EXISTS pipe_event_volume
|
|
206
234
|
LOCATION 'oss://my-bucket/data-path/'
|
|
207
235
|
USING CONNECTION my_oss_role_connection
|
|
@@ -212,7 +240,7 @@ CREATE EXTERNAL VOLUME IF NOT EXISTS pipe_event_volume
|
|
|
212
240
|
#### 步骤 3:创建 PIPE(EVENT_NOTIFICATION 模式)
|
|
213
241
|
|
|
214
242
|
```sql
|
|
215
|
-
--
|
|
243
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
216
244
|
CREATE PIPE IF NOT EXISTS my_oss_event_pipe
|
|
217
245
|
INGEST_MODE = 'EVENT_NOTIFICATION'
|
|
218
246
|
VIRTUAL_CLUSTER = 'my_vc'
|
|
@@ -245,7 +273,7 @@ USING CSV;
|
|
|
245
273
|
#### 步骤 1:创建目标表
|
|
246
274
|
|
|
247
275
|
```sql
|
|
248
|
-
--
|
|
276
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
249
277
|
CREATE TABLE IF NOT EXISTS my_schema.target_table (
|
|
250
278
|
id STRING,
|
|
251
279
|
name STRING,
|
|
@@ -257,7 +285,7 @@ CREATE TABLE IF NOT EXISTS my_schema.target_table (
|
|
|
257
285
|
#### 步骤 2:创建存储连接(access_id/access_key 语法)
|
|
258
286
|
|
|
259
287
|
```sql
|
|
260
|
-
--
|
|
288
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
261
289
|
CREATE STORAGE CONNECTION IF NOT EXISTS my_batch_conn
|
|
262
290
|
TYPE OSS
|
|
263
291
|
ENDPOINT = 'oss-cn-shanghai-internal.aliyuncs.com'
|
|
@@ -273,7 +301,7 @@ CREATE STORAGE CONNECTION IF NOT EXISTS my_batch_conn
|
|
|
273
301
|
#### 步骤 3:创建外部 Volume(启用目录自动刷新)
|
|
274
302
|
|
|
275
303
|
```sql
|
|
276
|
-
--
|
|
304
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
277
305
|
CREATE EXTERNAL VOLUME IF NOT EXISTS my_batch_volume
|
|
278
306
|
LOCATION 'oss://my-bucket/data-path/'
|
|
279
307
|
USING CONNECTION my_batch_conn
|
|
@@ -293,7 +321,7 @@ CREATE EXTERNAL VOLUME IF NOT EXISTS my_batch_volume
|
|
|
293
321
|
#### 步骤 4a:INSERT INTO 从 Volume 导入(支持过滤转换)
|
|
294
322
|
|
|
295
323
|
```sql
|
|
296
|
-
--
|
|
324
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
297
325
|
INSERT INTO my_schema.target_table
|
|
298
326
|
SELECT * FROM VOLUME my_batch_volume (
|
|
299
327
|
id STRING,
|
|
@@ -315,7 +343,7 @@ WHERE amount > 0;
|
|
|
315
343
|
#### 步骤 4b:COPY INTO 从 Volume 导入(简洁语法)
|
|
316
344
|
|
|
317
345
|
```sql
|
|
318
|
-
--
|
|
346
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
319
347
|
COPY INTO my_schema.target_table
|
|
320
348
|
FROM VOLUME my_batch_volume (
|
|
321
349
|
id STRING,
|
|
@@ -334,7 +362,7 @@ FROM VOLUME my_batch_volume (
|
|
|
334
362
|
#### 步骤 5:验证导入结果
|
|
335
363
|
|
|
336
364
|
```sql
|
|
337
|
-
--
|
|
365
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
338
366
|
SELECT COUNT(*) AS total_rows FROM my_schema.target_table;
|
|
339
367
|
SELECT * FROM my_schema.target_table LIMIT 10;
|
|
340
368
|
```
|
|
@@ -346,7 +374,7 @@ SELECT * FROM my_schema.target_table LIMIT 10;
|
|
|
346
374
|
### 查看 PIPE 详细状态
|
|
347
375
|
|
|
348
376
|
```sql
|
|
349
|
-
--
|
|
377
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
350
378
|
DESC PIPE EXTENDED my_oss_pipe;
|
|
351
379
|
```
|
|
352
380
|
|
|
@@ -359,7 +387,7 @@ DESC PIPE EXTENDED my_oss_pipe;
|
|
|
359
387
|
### 查看加载历史
|
|
360
388
|
|
|
361
389
|
```sql
|
|
362
|
-
--
|
|
390
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
363
391
|
SELECT * FROM load_history('my_schema.target_table')
|
|
364
392
|
ORDER BY last_load_time DESC
|
|
365
393
|
LIMIT 20;
|
|
@@ -372,7 +400,7 @@ LIMIT 20;
|
|
|
372
400
|
PIPE 执行的作业会自动打上 `query_tag`,格式为:`pipe.<workspace_name>.<schema_name>.<pipe_name>`
|
|
373
401
|
|
|
374
402
|
```sql
|
|
375
|
-
--
|
|
403
|
+
-- 通过 cz-cli sql "<SQL>" --sync 执行
|
|
376
404
|
-- 在 JOBS 列表中过滤 PIPE 相关作业
|
|
377
405
|
SHOW JOBS WHERE query_tag = 'pipe.my_workspace.my_schema.my_oss_pipe';
|
|
378
406
|
```
|
|
@@ -450,10 +478,7 @@ DROP PIPE IF EXISTS my_oss_pipe;
|
|
|
450
478
|
|
|
451
479
|
---
|
|
452
480
|
|
|
453
|
-
## cz-cli
|
|
454
|
-
|
|
455
|
-
> 仅在 cz-cli 可用且 MCP 不可用时使用本节。步骤编号与上方 MCP 路径对应。
|
|
456
|
-
> 所有操作通过 `cz-cli agent run` 委托给内置 agent 完成,agent 内置完整的 MCP 工具访问能力。
|
|
481
|
+
## cz-cli 执行路径
|
|
457
482
|
|
|
458
483
|
### 模式 A:LIST_PURGE 扫描模式(cz-cli 版)
|
|
459
484
|
|