@clickzetta/cz-cli-darwin-x64 0.3.17 → 0.3.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +386 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +548 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +220 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +112 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +257 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +124 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +96 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +109 -0
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +156 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +751 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +324 -0
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +537 -0
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +156 -0
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +56 -0
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +78 -0
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +65 -0
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +49 -0
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +42 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +276 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +379 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +166 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +185 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +129 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +222 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +125 -0
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +206 -0
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +212 -0
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +54 -0
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +150 -0
- package/bin/skills/clickzetta-volume-manager/SKILL.md +292 -0
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +199 -0
- package/package.json +1 -1
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/SKILL.md +0 -0
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/dt-declaration-strategy.md +0 -0
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/incremental-config-reference.md +0 -0
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/refresh-history-guide.md +0 -0
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/sql-limitations.md +0 -0
- /package/bin/skills/{dynamic-table-alter → clickzetta-dynamic-table/dynamic-table-alter}/SKILL.md +0 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: clickzetta-table-stream-pipeline
|
|
3
|
+
description: |
|
|
4
|
+
搭建和管理 ClickZetta Table Stream 变更数据捕获管道,覆盖从源表配置、Stream 创建、
|
|
5
|
+
数据消费到增量 ETL 的端到端工作流。当用户说"创建 Table Stream"、"Table Stream CDC"、
|
|
6
|
+
"Table Stream 管道"、"Table Stream 增量消费"、"Stream 消费"时触发。
|
|
7
|
+
包含变更跟踪开启、模式选择、offset 管理、元数据字段使用、幂等消费等 ClickZetta 特有逻辑。
|
|
8
|
+
Keywords: table stream, CDC, change capture, incremental ETL, stream
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Table Stream 变更数据捕获工作流
|
|
12
|
+
|
|
13
|
+
## 指令
|
|
14
|
+
|
|
15
|
+
### 步骤 1:开启源表变更跟踪(必需前置)
|
|
16
|
+
执行 SQL 开启源表的 change_tracking:
|
|
17
|
+
```sql
|
|
18
|
+
ALTER TABLE <source_table> SET PROPERTIES ('change_tracking' = 'true');
|
|
19
|
+
```
|
|
20
|
+
- 这是强制性前置步骤,不执行则 Stream 无法正确捕获变更
|
|
21
|
+
- 验证属性是否生效(两种方法):
|
|
22
|
+
```sql
|
|
23
|
+
-- 方法 1:DESC EXTENDED 查看 properties
|
|
24
|
+
DESC EXTENDED <source_table>;
|
|
25
|
+
|
|
26
|
+
-- 方法 2:查询 information_schema
|
|
27
|
+
SELECT table_name, properties FROM information_schema.tables WHERE table_name = '<source_table>';
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### 步骤 2:创建 Table Stream
|
|
31
|
+
执行 SQL 创建 Stream:
|
|
32
|
+
```sql
|
|
33
|
+
CREATE [ OR REPLACE ] TABLE STREAM <stream_name>
|
|
34
|
+
ON TABLE <source_table>
|
|
35
|
+
[ TIMESTAMP AS OF '<timestamp>' ]
|
|
36
|
+
[ COMMENT '<描述>' ]
|
|
37
|
+
WITH PROPERTIES (
|
|
38
|
+
'TABLE_STREAM_MODE' = 'STANDARD | APPEND_ONLY',
|
|
39
|
+
'SHOW_INITIAL_ROWS' = 'TRUE | FALSE'
|
|
40
|
+
);
|
|
41
|
+
```
|
|
42
|
+
关键参数选择:
|
|
43
|
+
- **STANDARD 模式**:捕获 INSERT/UPDATE/DELETE,反映表当前状态(delta 变化) → 适用于数据同步、增量 ETL
|
|
44
|
+
- delta 变化指两个事务时间点之间的净变化。例如:先 INSERT 再 DELETE 同一行 → delta 为空;先 INSERT 再 UPDATE → delta 为一条新行(最终状态)
|
|
45
|
+
- **APPEND_ONLY 模式**:仅捕获 INSERT,保留所有历史插入记录 → 适用于审计、历史记录保留
|
|
46
|
+
- 即使后续 DELETE 了某行,APPEND_ONLY 模式仍保留该行的 INSERT 记录
|
|
47
|
+
- **SHOW_INITIAL_ROWS = TRUE**:首次消费返回建 Stream 时表中已有行
|
|
48
|
+
- **SHOW_INITIAL_ROWS = FALSE**(默认):首次消费仅返回建 Stream 后的新变更
|
|
49
|
+
- 可选:指定起始时间点
|
|
50
|
+
```sql
|
|
51
|
+
-- TIMESTAMP AS OF 用于指定 Stream 的起始读取位点
|
|
52
|
+
-- 注意:此功能在某些场景下可能不稳定,建议优先使用默认行为(从创建时刻开始)
|
|
53
|
+
CREATE TABLE STREAM <stream_name>
|
|
54
|
+
ON TABLE <source_table>
|
|
55
|
+
TIMESTAMP AS OF '<timestamp>'
|
|
56
|
+
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### 步骤 3:准备目标表
|
|
60
|
+
创建与源表结构兼容的目标表:
|
|
61
|
+
- 目标表列定义需包含源表的业务列
|
|
62
|
+
- 建议额外添加元数据列(如 sync_version、sync_timestamp)用于追踪
|
|
63
|
+
|
|
64
|
+
### 步骤 4:查询 Stream 数据(预览,不移动 offset)
|
|
65
|
+
执行 SELECT 预览 Stream 中的变更数据:
|
|
66
|
+
```sql
|
|
67
|
+
SELECT *, __change_type, __commit_version, __commit_timestamp
|
|
68
|
+
FROM <stream_name>;
|
|
69
|
+
```
|
|
70
|
+
- 仅 SELECT 不会移动 offset
|
|
71
|
+
- 元数据字段:`__change_type`(值:`INSERT` / `UPDATE_BEFORE` / `UPDATE_AFTER` / `DELETE`)、`__commit_version`、`__commit_timestamp`
|
|
72
|
+
- **UPDATE 处理要点**:UPDATE 操作产生两条记录:
|
|
73
|
+
- `UPDATE_BEFORE`:更新前的旧值(通常在消费时忽略)
|
|
74
|
+
- `UPDATE_AFTER`:更新后的新值(用于写入目标表)
|
|
75
|
+
- 消费时务必过滤 `__change_type`,避免将 `UPDATE_BEFORE` 旧值误写入目标表
|
|
76
|
+
|
|
77
|
+
### 步骤 5:消费 Stream 数据(移动 offset)
|
|
78
|
+
执行 DML 操作消费数据:
|
|
79
|
+
|
|
80
|
+
#### 方式 A:全量消费(INSERT INTO)
|
|
81
|
+
```sql
|
|
82
|
+
INSERT INTO <target_table>
|
|
83
|
+
SELECT <columns> FROM <stream_name>;
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
#### 方式 B:幂等消费(MERGE,推荐)
|
|
87
|
+
```sql
|
|
88
|
+
MERGE INTO <target_table> t
|
|
89
|
+
USING (SELECT * FROM <stream_name> WHERE __change_type != 'UPDATE_BEFORE') s
|
|
90
|
+
ON t.<pk_column> = s.<pk_column>
|
|
91
|
+
WHEN MATCHED AND s.__change_type IN ('INSERT', 'UPDATE_AFTER') THEN UPDATE SET t.col1 = s.col1, t.col2 = s.col2
|
|
92
|
+
WHEN MATCHED AND s.__change_type = 'DELETE' THEN DELETE
|
|
93
|
+
WHEN NOT MATCHED AND s.__change_type = 'INSERT' THEN INSERT (<columns>) VALUES (s.<columns>);
|
|
94
|
+
```
|
|
95
|
+
- DML 操作(INSERT/UPDATE/MERGE)会移动 offset
|
|
96
|
+
- ⚠️ 即使使用 WHERE 条件过滤,**所有数据的 offset 仍会移动**(不仅是匹配的行)
|
|
97
|
+
- 推荐使用 MERGE 实现幂等性,避免重复消费导致数据重复
|
|
98
|
+
- 在 USING 子查询中过滤掉 `UPDATE_BEFORE`,避免旧值干扰 MERGE 逻辑
|
|
99
|
+
- ⚠️ **MERGE 语法顺序要求**:多个 `WHEN MATCHED` 子句时,**UPDATE 必须在 DELETE 之前**,否则报错(错误信息:`update statement must be before delete statement`)
|
|
100
|
+
|
|
101
|
+
### 步骤 6:验证消费状态
|
|
102
|
+
执行查询确认消费完成:
|
|
103
|
+
```sql
|
|
104
|
+
SELECT COUNT(*) FROM <stream_name>;
|
|
105
|
+
```
|
|
106
|
+
- 消费成功后 COUNT 应为 0 或仅包含新变更
|
|
107
|
+
- 记录最后消费的 `__commit_version` 用于故障恢复
|
|
108
|
+
|
|
109
|
+
## Offset 移动规则
|
|
110
|
+
|
|
111
|
+
| 操作 | 是否移动 offset | 说明 |
|
|
112
|
+
|------|----------------|------|
|
|
113
|
+
| `SELECT * FROM stream` | ❌ 不移动 | 仅预览,可反复查询 |
|
|
114
|
+
| `INSERT INTO target SELECT ... FROM stream` | ✅ 移动 | 消费数据 |
|
|
115
|
+
| `MERGE INTO target USING stream ...` | ✅ 移动 | 消费数据(推荐) |
|
|
116
|
+
| `UPDATE target SET ... FROM stream` | ✅ 移动 | 消费数据 |
|
|
117
|
+
| `DELETE FROM target USING stream` | ✅ 移动 | 消费数据 |
|
|
118
|
+
| 带 WHERE 的 DML | ✅ 全部移动 | 即使 WHERE 过滤了部分行,所有行的 offset 都会移动 |
|
|
119
|
+
|
|
120
|
+
> ⚠️ **关键注意**:offset 移动是全量的。一旦执行 DML 消费 Stream,所有变更记录的 offset 都会前进,无法部分消费。如果 DML 执行失败(如目标表不存在),offset 不会移动。
|
|
121
|
+
|
|
122
|
+
## 模式选择速查
|
|
123
|
+
|
|
124
|
+
| 需求 | 推荐模式 |
|
|
125
|
+
|------|---------|
|
|
126
|
+
| 数据同步(保持目标与源一致) | STANDARD |
|
|
127
|
+
| 增量 ETL 流程 | STANDARD |
|
|
128
|
+
| 审计所有插入记录 | APPEND_ONLY |
|
|
129
|
+
| 历史记录保留 | APPEND_ONLY |
|
|
130
|
+
|
|
131
|
+
## 性能优化要点
|
|
132
|
+
|
|
133
|
+
- 只 SELECT 必要列,避免 `SELECT *`
|
|
134
|
+
- 定期消费 Stream,避免数据累积
|
|
135
|
+
- 高变更率表:更频繁消费;低变更率表:降低频率
|
|
136
|
+
- 大型 Stream 可按主键范围拆分并行处理
|
|
137
|
+
- 在源表上设置适当的数据保留期
|
|
138
|
+
|
|
139
|
+
## 示例
|
|
140
|
+
|
|
141
|
+
### 示例 1:订单表实时同步
|
|
142
|
+
```sql
|
|
143
|
+
-- 1. 开启源表变更跟踪
|
|
144
|
+
ALTER TABLE orders SET PROPERTIES ('change_tracking' = 'true');
|
|
145
|
+
|
|
146
|
+
-- 2. 创建 Table Stream
|
|
147
|
+
CREATE TABLE STREAM orders_stream ON TABLE orders
|
|
148
|
+
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD', 'SHOW_INITIAL_ROWS' = 'FALSE');
|
|
149
|
+
|
|
150
|
+
-- 3. 创建目标表(与源表结构兼容)
|
|
151
|
+
CREATE TABLE orders_sync (order_id INT, status STRING, amount DOUBLE);
|
|
152
|
+
|
|
153
|
+
-- 4. 预览 Stream 数据(不移动 offset)
|
|
154
|
+
SELECT *, __commit_version, __commit_timestamp FROM orders_stream;
|
|
155
|
+
|
|
156
|
+
-- 5. 消费 Stream 数据(移动 offset)
|
|
157
|
+
MERGE INTO orders_sync t
|
|
158
|
+
USING (SELECT * FROM orders_stream WHERE __change_type != 'UPDATE_BEFORE') s
|
|
159
|
+
ON t.order_id = s.order_id
|
|
160
|
+
WHEN MATCHED AND s.__change_type IN ('INSERT', 'UPDATE_AFTER') THEN UPDATE SET t.status = s.status, t.amount = s.amount
|
|
161
|
+
WHEN MATCHED AND s.__change_type = 'DELETE' THEN DELETE
|
|
162
|
+
WHEN NOT MATCHED AND s.__change_type = 'INSERT' THEN INSERT (order_id, status, amount) VALUES (s.order_id, s.status, s.amount);
|
|
163
|
+
|
|
164
|
+
-- 6. 验证消费完成
|
|
165
|
+
SELECT COUNT(*) FROM orders_stream;
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### 示例 2:用户行为审计(保留全部插入历史)
|
|
169
|
+
```sql
|
|
170
|
+
-- 1. 开启源表变更跟踪
|
|
171
|
+
ALTER TABLE user_actions SET PROPERTIES ('change_tracking' = 'true');
|
|
172
|
+
|
|
173
|
+
-- 2. 创建 Table Stream(APPEND_ONLY 模式)
|
|
174
|
+
CREATE TABLE STREAM user_actions_audit_stream ON TABLE user_actions
|
|
175
|
+
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'APPEND_ONLY', 'SHOW_INITIAL_ROWS' = 'TRUE');
|
|
176
|
+
|
|
177
|
+
-- 3. 预览 Stream 数据
|
|
178
|
+
SELECT *, __commit_version, __commit_timestamp FROM user_actions_audit_stream;
|
|
179
|
+
|
|
180
|
+
-- 4. 消费 Stream 数据(INSERT INTO 移动 offset)
|
|
181
|
+
INSERT INTO user_actions_audit
|
|
182
|
+
SELECT *, __commit_version AS audit_version, __commit_timestamp AS audit_time
|
|
183
|
+
FROM user_actions_audit_stream;
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## 故障排除
|
|
187
|
+
|
|
188
|
+
Stream 不捕获变更:
|
|
189
|
+
原因:源表未开启 change_tracking
|
|
190
|
+
解决方案:执行 `ALTER TABLE <table> SET PROPERTIES ('change_tracking' = 'true')`,确认 DML 在 Stream 创建后执行
|
|
191
|
+
|
|
192
|
+
无法区分变更类型:
|
|
193
|
+
原因:未在 MERGE/INSERT 中过滤 `__change_type`,导致 `UPDATE_BEFORE` 旧值也被写入目标表
|
|
194
|
+
解决方案:MERGE 时过滤 `__change_type IN ('UPDATE_AFTER', 'DELETE')`,忽略 `UPDATE_BEFORE` 记录
|
|
195
|
+
|
|
196
|
+
消费后 offset 未移动:
|
|
197
|
+
原因:仅使用 SELECT 查询,未执行 DML
|
|
198
|
+
解决方案:必须通过 INSERT INTO / MERGE INTO / UPDATE 等 DML 操作消费数据
|
|
199
|
+
|
|
200
|
+
重复消费导致目标表数据重复:
|
|
201
|
+
原因:使用 INSERT INTO 而非 MERGE,或消费逻辑非幂等
|
|
202
|
+
解决方案:改用 MERGE 语句;记录最后消费的 `__commit_version` 和 `__commit_timestamp` 用于断点恢复
|
|
203
|
+
|
|
204
|
+
COMMENT 语法错误:
|
|
205
|
+
原因:使用了 `COMMENT = '...'`(带等号)而非 `COMMENT '...'`
|
|
206
|
+
解决方案:正确语法为 `COMMENT '注释内容'`,不带等号
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: clickzetta-vcluster-manager
|
|
3
|
+
description: |
|
|
4
|
+
管理 ClickZetta Lakehouse 计算集群(VCluster)的完整生命周期。
|
|
5
|
+
覆盖创建(通用型/分析型/同步型)、启动/停止、规格调整、弹性扩缩容、
|
|
6
|
+
缓存配置(PRELOAD_TABLES)、查看集群状态等操作。
|
|
7
|
+
当用户说"创建集群"、"计算集群"、"VCluster"、"启动集群"、"停止集群"、
|
|
8
|
+
"调整集群规格"、"集群扩容"、"集群缩容"、"自动停止"、"自动启动"、
|
|
9
|
+
"预加载缓存"、"PRELOAD"、"集群类型"、"GP集群"、"AP集群"、"分析型集群"、
|
|
10
|
+
"通用型集群"、"同步型集群"时触发。
|
|
11
|
+
Keywords: VCluster, compute cluster, create, suspend, resume, resize, auto-scale
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# ClickZetta 计算集群管理
|
|
15
|
+
|
|
16
|
+
阅读 [references/vcluster-ddl.md](references/vcluster-ddl.md) 了解完整语法。
|
|
17
|
+
|
|
18
|
+
## 集群类型选择
|
|
19
|
+
|
|
20
|
+
| 类型 | 关键字 | 适用场景 | 扩缩容方式 |
|
|
21
|
+
|---|---|---|---|
|
|
22
|
+
| 通用型(GP) | `GENERAL` | 离线 ETL、数据摄取、综合场景 | 纵向(规格大小) |
|
|
23
|
+
| 分析型(AP) | `ANALYTICS` | 高并发在线查询、BI 报表、Ad-Hoc | 横向(副本数) |
|
|
24
|
+
| 同步型 | `INTEGRATION` | 数据集成同步任务 | 纵向(规格大小) |
|
|
25
|
+
|
|
26
|
+
**规格单位**:CRU(Compute Resource Unit)
|
|
27
|
+
- 通用型/同步型:1-256 CRU,步长 1(同步型额外支持 0.25、0.5)
|
|
28
|
+
- 分析型:1-256 CRU,须为 2 的 n 次幂(1、2、4、8、16...)
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## 创建集群
|
|
33
|
+
|
|
34
|
+
```sql
|
|
35
|
+
-- 通用型:离线 ETL,8 CRU,作业完成后 60 秒自动停止
|
|
36
|
+
CREATE VCLUSTER IF NOT EXISTS etl_vc
|
|
37
|
+
VCLUSTER_TYPE = GENERAL
|
|
38
|
+
VCLUSTER_SIZE = 8
|
|
39
|
+
AUTO_SUSPEND_IN_SECOND = 60
|
|
40
|
+
AUTO_RESUME = TRUE
|
|
41
|
+
COMMENT '离线ETL专用集群';
|
|
42
|
+
|
|
43
|
+
-- 通用型:弹性规格(1-16 CRU)
|
|
44
|
+
CREATE VCLUSTER IF NOT EXISTS etl_elastic_vc
|
|
45
|
+
VCLUSTER_TYPE = GENERAL
|
|
46
|
+
MIN_VCLUSTER_SIZE = 1
|
|
47
|
+
MAX_VCLUSTER_SIZE = 16
|
|
48
|
+
AUTO_SUSPEND_IN_SECOND = 300;
|
|
49
|
+
|
|
50
|
+
-- 分析型:BI 报表,4 CRU,1-10 副本,最大 80 并发
|
|
51
|
+
CREATE VCLUSTER IF NOT EXISTS bi_vc
|
|
52
|
+
VCLUSTER_TYPE = ANALYTICS
|
|
53
|
+
VCLUSTER_SIZE = 4
|
|
54
|
+
MIN_REPLICAS = 1
|
|
55
|
+
MAX_REPLICAS = 10
|
|
56
|
+
MAX_CONCURRENCY = 8
|
|
57
|
+
AUTO_SUSPEND_IN_SECOND = 1800
|
|
58
|
+
AUTO_RESUME = TRUE
|
|
59
|
+
COMMENT 'BI报表在线查询集群';
|
|
60
|
+
|
|
61
|
+
-- 同步型:数据集成任务
|
|
62
|
+
CREATE VCLUSTER IF NOT EXISTS sync_vc
|
|
63
|
+
VCLUSTER_TYPE = INTEGRATION
|
|
64
|
+
VCLUSTER_SIZE = 1
|
|
65
|
+
AUTO_RESUME = TRUE;
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## 启动 / 停止
|
|
71
|
+
|
|
72
|
+
```sql
|
|
73
|
+
-- 启动集群
|
|
74
|
+
ALTER VCLUSTER IF EXISTS etl_vc RESUME;
|
|
75
|
+
|
|
76
|
+
-- 正常停止(等待当前作业完成)
|
|
77
|
+
ALTER VCLUSTER IF EXISTS etl_vc SUSPEND;
|
|
78
|
+
|
|
79
|
+
-- 强制停止(立即中断所有作业)
|
|
80
|
+
ALTER VCLUSTER IF EXISTS etl_vc SUSPEND FORCE;
|
|
81
|
+
|
|
82
|
+
-- 取消集群中所有作业
|
|
83
|
+
ALTER VCLUSTER IF EXISTS etl_vc CANCEL ALL JOBS;
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 修改集群属性
|
|
89
|
+
|
|
90
|
+
```sql
|
|
91
|
+
-- 调整规格
|
|
92
|
+
ALTER VCLUSTER IF EXISTS etl_vc SET VCLUSTER_SIZE = 16;
|
|
93
|
+
|
|
94
|
+
-- 修改自动停止时间
|
|
95
|
+
ALTER VCLUSTER IF EXISTS etl_vc SET AUTO_SUSPEND_IN_SECOND = 300;
|
|
96
|
+
|
|
97
|
+
-- 分析型:调整副本数和并发
|
|
98
|
+
ALTER VCLUSTER IF EXISTS bi_vc SET
|
|
99
|
+
MIN_REPLICAS = 2
|
|
100
|
+
MAX_REPLICAS = 5
|
|
101
|
+
MAX_CONCURRENCY = 16;
|
|
102
|
+
|
|
103
|
+
-- 修改注释
|
|
104
|
+
ALTER VCLUSTER IF EXISTS etl_vc SET COMMENT '新注释';
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## 缓存配置(仅分析型)
|
|
110
|
+
|
|
111
|
+
阅读 [references/vc-cache.md](references/vc-cache.md) 了解缓存详情。
|
|
112
|
+
|
|
113
|
+
```sql
|
|
114
|
+
-- 设置预加载表(覆盖写,需带上所有已有表)
|
|
115
|
+
ALTER VCLUSTER bi_vc SET PRELOAD_TABLES = "public.orders,public.customers";
|
|
116
|
+
|
|
117
|
+
-- 查看当前集群缓存状态
|
|
118
|
+
SHOW PRELOAD CACHED STATUS;
|
|
119
|
+
|
|
120
|
+
-- 查看指定集群缓存状态
|
|
121
|
+
SHOW VCLUSTER bi_vc PRELOAD CACHED STATUS;
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## 查看集群信息
|
|
127
|
+
|
|
128
|
+
```sql
|
|
129
|
+
-- 列出所有集群
|
|
130
|
+
SHOW VCLUSTERS;
|
|
131
|
+
|
|
132
|
+
-- 按类型过滤
|
|
133
|
+
SHOW VCLUSTERS WHERE vcluster_type = 'ANALYTICS';
|
|
134
|
+
SHOW VCLUSTERS WHERE state = 'SUSPENDED';
|
|
135
|
+
|
|
136
|
+
-- 按名称模糊匹配
|
|
137
|
+
SHOW VCLUSTERS LIKE 'etl%';
|
|
138
|
+
|
|
139
|
+
-- 查看集群详情
|
|
140
|
+
DESC VCLUSTER etl_vc;
|
|
141
|
+
DESC VCLUSTER EXTENDED bi_vc;
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## 删除集群
|
|
147
|
+
|
|
148
|
+
```sql
|
|
149
|
+
-- 等待当前作业完成后删除
|
|
150
|
+
DROP VCLUSTER IF EXISTS etl_vc;
|
|
151
|
+
|
|
152
|
+
-- 立即强制删除(中断正在运行的作业)
|
|
153
|
+
DROP VCLUSTER IF EXISTS etl_vc FORCE;
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## 切换当前会话集群
|
|
159
|
+
|
|
160
|
+
```sql
|
|
161
|
+
USE VCLUSTER bi_vc;
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## 典型场景
|
|
167
|
+
|
|
168
|
+
### 场景 1:离线 ETL 集群
|
|
169
|
+
|
|
170
|
+
```sql
|
|
171
|
+
CREATE VCLUSTER IF NOT EXISTS etl_daily
|
|
172
|
+
VCLUSTER_TYPE = GENERAL
|
|
173
|
+
VCLUSTER_SIZE = 8
|
|
174
|
+
AUTO_SUSPEND_IN_SECOND = 60
|
|
175
|
+
AUTO_RESUME = TRUE
|
|
176
|
+
COMMENT '每日ETL作业,完成后1分钟自动停止';
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### 场景 2:在线 BI 报表集群(高并发)
|
|
180
|
+
|
|
181
|
+
```sql
|
|
182
|
+
CREATE VCLUSTER IF NOT EXISTS bi_online
|
|
183
|
+
VCLUSTER_TYPE = ANALYTICS
|
|
184
|
+
VCLUSTER_SIZE = 4
|
|
185
|
+
MIN_REPLICAS = 1
|
|
186
|
+
MAX_REPLICAS = 10
|
|
187
|
+
MAX_CONCURRENCY = 8
|
|
188
|
+
AUTO_SUSPEND_IN_SECOND = 1800
|
|
189
|
+
AUTO_RESUME = TRUE
|
|
190
|
+
COMMENT 'BI在线查询,最大支持80并发';
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### 场景 3:数据集成同步集群
|
|
194
|
+
|
|
195
|
+
```sql
|
|
196
|
+
CREATE VCLUSTER IF NOT EXISTS cdc_sync
|
|
197
|
+
VCLUSTER_TYPE = INTEGRATION
|
|
198
|
+
VCLUSTER_SIZE = 0.5
|
|
199
|
+
AUTO_RESUME = TRUE
|
|
200
|
+
COMMENT '轻量CDC同步任务';
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## 常见问题
|
|
206
|
+
|
|
207
|
+
| 问题 | 原因 | 解决方案 |
|
|
208
|
+
|---|---|---|
|
|
209
|
+
| 分析型集群规格报错 | 规格须为 2 的 n 次幂 | 使用 1、2、4、8、16、32... |
|
|
210
|
+
| PRELOAD_TABLES 不生效 | 仅 AP 型集群支持 | 确认集群类型为 ANALYTICS |
|
|
211
|
+
| 添加预加载表后原有表消失 | PRELOAD_TABLES 是覆盖写 | 设置时带上所有已有表 |
|
|
212
|
+
| 集群停止后缓存丢失 | 本地缓存随集群停止释放 | 重启后自动重新加载 PRELOAD 表 |
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# 计算集群缓存参考
|
|
2
|
+
|
|
3
|
+
> 来源:https://www.yunqi.tech/documents/vc_cache
|
|
4
|
+
|
|
5
|
+
## 缓存类型
|
|
6
|
+
|
|
7
|
+
Lakehouse 提供三种缓存:
|
|
8
|
+
1. **查询结果缓存(ResultCache)** - 服务层,工作空间内共享
|
|
9
|
+
2. **元数据缓存(MetadataCache)** - 服务层,工作空间内共享
|
|
10
|
+
3. **计算集群本地缓存(Local Disk Cache)** - 保存在集群本地节点,仅使用指定集群时可用
|
|
11
|
+
|
|
12
|
+
## 主动缓存(PRELOAD_TABLES)
|
|
13
|
+
|
|
14
|
+
仅适用于**分析型(AP)**集群。集群每次启动时自动加载预缓存表的最新数据/分区。
|
|
15
|
+
|
|
16
|
+
```sql
|
|
17
|
+
-- 设置预加载表(覆盖写,需带上所有已有表)
|
|
18
|
+
ALTER VCLUSTER default SET PRELOAD_TABLES = "schema1.table1,schema2.table2";
|
|
19
|
+
|
|
20
|
+
-- 添加新表时,必须包含原有表,否则会覆盖
|
|
21
|
+
ALTER VCLUSTER default SET PRELOAD_TABLES = "schema1.table1,schema2.table2,schema3.table3";
|
|
22
|
+
|
|
23
|
+
-- 支持通配符
|
|
24
|
+
ALTER VCLUSTER bi_vc SET PRELOAD_TABLES = "sales.*,public.dim_date";
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
⚠️ 注意:执行缓存命令后,只有新写入的数据才会被缓存。
|
|
28
|
+
|
|
29
|
+
## 被动缓存
|
|
30
|
+
|
|
31
|
+
首次查询时自动缓存读取的文件,后续相同查询直接命中缓存。支持 GP 型和 AP 型集群。
|
|
32
|
+
|
|
33
|
+
## 查看缓存状态
|
|
34
|
+
|
|
35
|
+
```sql
|
|
36
|
+
-- 显示当前集群的预加载表/分区状态
|
|
37
|
+
SHOW PRELOAD CACHED STATUS;
|
|
38
|
+
|
|
39
|
+
-- 显示指定集群的预加载状态
|
|
40
|
+
SHOW VCLUSTER <vc_name> PRELOAD CACHED STATUS;
|
|
41
|
+
|
|
42
|
+
-- 按表名过滤
|
|
43
|
+
SHOW VCLUSTER <vc_name> PRELOAD CACHED STATUS WHERE table LIKE '%table_name%';
|
|
44
|
+
|
|
45
|
+
-- 显示预加载缓存汇总信息
|
|
46
|
+
SHOW EXTENDED PRELOAD CACHED STATUS;
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## 注意事项
|
|
50
|
+
|
|
51
|
+
- 集群停止时,本地缓存自动释放
|
|
52
|
+
- AP 型集群重启时只缓存最新写入的数据或分区
|
|
53
|
+
- `SHOW PRELOAD` 状态更新可能有约 10 分钟延迟,但缓存实际已生效
|
|
54
|
+
- PRELOAD_TABLES 是覆盖写,添加新表时需带上所有已有表
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# CREATE / ALTER / DROP VCLUSTER 参考
|
|
2
|
+
|
|
3
|
+
> 来源:https://www.yunqi.tech/documents/create_cluster 和 alter-vcluster 和 drop-vcluster
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 集群类型选择
|
|
8
|
+
|
|
9
|
+
| 类型 | 关键字 | 适用场景 | 扩缩容方式 |
|
|
10
|
+
|---|---|---|---|
|
|
11
|
+
| 通用型(GP) | `GENERAL` | 离线 ETL、数据摄取、综合场景 | 纵向(规格大小) |
|
|
12
|
+
| 分析型(AP) | `ANALYTICS` | 高并发在线查询、BI 报表、Ad-Hoc | 横向(副本数) |
|
|
13
|
+
| 同步型 | `INTEGRATION` | 数据集成同步任务 | 纵向(规格大小) |
|
|
14
|
+
|
|
15
|
+
**规格单位**:CRU(Compute Resource Unit)
|
|
16
|
+
- 通用型/同步型:1-256 CRU,步长 1(同步型额外支持 0.25、0.5)
|
|
17
|
+
- 分析型:1-256 CRU,须为 2 的 n 次幂(1、2、4、8、16...)
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## CREATE VCLUSTER
|
|
22
|
+
|
|
23
|
+
```sql
|
|
24
|
+
CREATE VCLUSTER [IF NOT EXISTS] <name>
|
|
25
|
+
VCLUSTER_TYPE = GENERAL | ANALYTICS | INTEGRATION
|
|
26
|
+
VCLUSTER_SIZE = num -- 固定规格
|
|
27
|
+
-- 或弹性规格(通用型/同步型)
|
|
28
|
+
MIN_VCLUSTER_SIZE = num
|
|
29
|
+
MAX_VCLUSTER_SIZE = num
|
|
30
|
+
AUTO_SUSPEND_IN_SECOND = num -- 空闲自动停止秒数,-1 表示不停止,默认 600
|
|
31
|
+
AUTO_RESUME = TRUE | FALSE -- 是否自动启动,默认 TRUE
|
|
32
|
+
QUERY_RUNTIME_LIMIT_IN_SECOND = num -- 单作业最大执行时长(秒),默认 86400
|
|
33
|
+
[COMMENT '']
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### 分析型专有参数
|
|
37
|
+
|
|
38
|
+
```sql
|
|
39
|
+
MIN_REPLICAS = num -- 最小实例数(1-10),默认 1
|
|
40
|
+
MAX_REPLICAS = num -- 最大实例数(1-10),默认 1
|
|
41
|
+
MAX_CONCURRENCY = num -- 每实例最大并发数(1-32),默认 8
|
|
42
|
+
PRELOAD_TABLES = "schema.table1,schema.table2" -- 预加载缓存表
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### 示例
|
|
46
|
+
|
|
47
|
+
```sql
|
|
48
|
+
-- 通用型:离线 ETL,8 CRU,作业完成后 60 秒自动停止
|
|
49
|
+
CREATE VCLUSTER IF NOT EXISTS etl_vc
|
|
50
|
+
VCLUSTER_TYPE = GENERAL
|
|
51
|
+
VCLUSTER_SIZE = 8
|
|
52
|
+
AUTO_SUSPEND_IN_SECOND = 60
|
|
53
|
+
AUTO_RESUME = TRUE
|
|
54
|
+
COMMENT '离线ETL专用集群';
|
|
55
|
+
|
|
56
|
+
-- 通用型:弹性规格(1-16 CRU)
|
|
57
|
+
CREATE VCLUSTER IF NOT EXISTS etl_elastic_vc
|
|
58
|
+
VCLUSTER_TYPE = GENERAL
|
|
59
|
+
MIN_VCLUSTER_SIZE = 1
|
|
60
|
+
MAX_VCLUSTER_SIZE = 16
|
|
61
|
+
AUTO_SUSPEND_IN_SECOND = 300;
|
|
62
|
+
|
|
63
|
+
-- 分析型:BI 报表,4 CRU,1-10 副本,最大 80 并发
|
|
64
|
+
CREATE VCLUSTER IF NOT EXISTS bi_vc
|
|
65
|
+
VCLUSTER_TYPE = ANALYTICS
|
|
66
|
+
VCLUSTER_SIZE = 4
|
|
67
|
+
MIN_REPLICAS = 1
|
|
68
|
+
MAX_REPLICAS = 10
|
|
69
|
+
MAX_CONCURRENCY = 8
|
|
70
|
+
AUTO_SUSPEND_IN_SECOND = 1800
|
|
71
|
+
AUTO_RESUME = TRUE
|
|
72
|
+
COMMENT 'BI报表在线查询集群';
|
|
73
|
+
|
|
74
|
+
-- 同步型:数据集成任务
|
|
75
|
+
CREATE VCLUSTER IF NOT EXISTS sync_vc
|
|
76
|
+
VCLUSTER_TYPE = INTEGRATION
|
|
77
|
+
VCLUSTER_SIZE = 1
|
|
78
|
+
AUTO_RESUME = TRUE;
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## ALTER VCLUSTER
|
|
84
|
+
|
|
85
|
+
```sql
|
|
86
|
+
-- 启动集群
|
|
87
|
+
ALTER VCLUSTER [IF EXISTS] <name> RESUME;
|
|
88
|
+
|
|
89
|
+
-- 停止集群
|
|
90
|
+
ALTER VCLUSTER [IF EXISTS] <name> SUSPEND [FORCE];
|
|
91
|
+
|
|
92
|
+
-- 取消集群中所有作业
|
|
93
|
+
ALTER VCLUSTER [IF EXISTS] <name> CANCEL ALL JOBS;
|
|
94
|
+
|
|
95
|
+
-- 修改属性
|
|
96
|
+
ALTER VCLUSTER [IF EXISTS] <name> SET
|
|
97
|
+
VCLUSTER_SIZE = num
|
|
98
|
+
AUTO_SUSPEND_IN_SECOND = num
|
|
99
|
+
AUTO_RESUME = TRUE | FALSE
|
|
100
|
+
MAX_CONCURRENCY = num -- 仅分析型
|
|
101
|
+
MIN_REPLICAS = num -- 仅分析型
|
|
102
|
+
MAX_REPLICAS = num -- 仅分析型
|
|
103
|
+
PRELOAD_TABLES = "schema.table";
|
|
104
|
+
|
|
105
|
+
-- 修改注释
|
|
106
|
+
ALTER VCLUSTER [IF EXISTS] <name> SET COMMENT '新注释';
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## DROP VCLUSTER
|
|
112
|
+
|
|
113
|
+
```sql
|
|
114
|
+
-- 等待当前作业完成后删除
|
|
115
|
+
DROP VCLUSTER [IF EXISTS] <name>;
|
|
116
|
+
|
|
117
|
+
-- 立即强制删除(中断正在运行的作业)
|
|
118
|
+
DROP VCLUSTER [IF EXISTS] <name> FORCE;
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## DESC / SHOW VCLUSTER
|
|
124
|
+
|
|
125
|
+
```sql
|
|
126
|
+
-- 查看集群基本信息
|
|
127
|
+
DESC VCLUSTER <name>;
|
|
128
|
+
|
|
129
|
+
-- 查看扩展信息
|
|
130
|
+
DESC VCLUSTER EXTENDED <name>;
|
|
131
|
+
|
|
132
|
+
-- 列出所有集群
|
|
133
|
+
SHOW VCLUSTERS;
|
|
134
|
+
|
|
135
|
+
-- 按类型过滤
|
|
136
|
+
SHOW VCLUSTERS WHERE vcluster_type = 'GENERAL';
|
|
137
|
+
SHOW VCLUSTERS WHERE state = 'SUSPENDED';
|
|
138
|
+
SHOW VCLUSTERS WHERE vcluster_type = 'ANALYTICS';
|
|
139
|
+
|
|
140
|
+
-- 按名称模糊匹配
|
|
141
|
+
SHOW VCLUSTERS LIKE 'etl%';
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## USE VCLUSTER(切换当前会话集群)
|
|
147
|
+
|
|
148
|
+
```sql
|
|
149
|
+
USE VCLUSTER <name>;
|
|
150
|
+
```
|