@clickzetta/cz-cli-darwin-arm64 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-access-control/eval_cases.jsonl +1 -1
  3. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -0
  4. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -0
  5. package/bin/skills/clickzetta-dba-guide/SKILL.md +542 -0
  6. package/bin/skills/clickzetta-dba-guide/eval_cases.jsonl +3 -0
  7. package/bin/skills/clickzetta-dw-modeling/eval_cases.jsonl +1 -1
  8. package/bin/skills/clickzetta-dynamic-table/eval_cases.jsonl +5 -0
  9. package/bin/skills/clickzetta-file-import-pipeline/eval_cases.jsonl +5 -0
  10. package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +218 -0
  11. package/bin/skills/clickzetta-lakehouse-connect/eval_cases.jsonl +3 -0
  12. package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +35 -0
  13. package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +435 -0
  14. package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +478 -0
  15. package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +225 -0
  16. package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +468 -0
  17. package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +445 -0
  18. package/bin/skills/clickzetta-manage-comments/SKILL.md +219 -0
  19. package/bin/skills/clickzetta-manage-comments/eval_cases.jsonl +3 -0
  20. package/bin/skills/clickzetta-metadata/SKILL.md +483 -0
  21. package/bin/skills/clickzetta-metadata/eval_cases.jsonl +5 -0
  22. package/bin/skills/clickzetta-metadata/references/instance-views-reference.md +276 -0
  23. package/bin/skills/clickzetta-metadata/references/metering-views-reference.md +137 -0
  24. package/bin/skills/clickzetta-metadata/references/show-desc-reference.md +326 -0
  25. package/bin/skills/clickzetta-metadata/references/views-reference.md +271 -0
  26. package/bin/skills/clickzetta-oss-ingest-pipeline/eval_cases.jsonl +5 -0
  27. package/bin/skills/clickzetta-overview/SKILL.md +102 -0
  28. package/bin/skills/clickzetta-overview/eval_cases.jsonl +5 -0
  29. package/bin/skills/clickzetta-overview/references/brands-and-endpoints.md +79 -0
  30. package/bin/skills/clickzetta-overview/references/object-model.md +311 -0
  31. package/bin/skills/clickzetta-overview/references/studio-modules.md +173 -0
  32. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -0
  33. package/bin/skills/clickzetta-sql-pipeline-manager/eval_cases.jsonl +12 -0
  34. package/bin/skills/clickzetta-table-stream-pipeline/eval_cases.jsonl +5 -0
  35. package/bin/skills/clickzetta-vcluster-manager/eval_cases.jsonl +5 -0
  36. package/bin/skills/clickzetta-volume-manager/eval_cases.jsonl +5 -0
  37. package/bin/skills/cz-cli-inner/SKILL.md +5 -4
  38. package/package.json +1 -1
  39. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -220
  40. package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +0 -5
package/bin/cz-cli CHANGED
Binary file
@@ -1,3 +1,3 @@
1
- {"case_id":"002","type":"should_call","user_input":"怎么查看当前工作空间有哪些用户?","expected_skill":"clickzetta-access-control","expected_output_contains":["SHOW USERS"]}
1
+ {"case_id":"002","type":"should_call","user_input":"怎么管理工作空间的用户?新用户加入后需要授予什么角色?","expected_skill":"clickzetta-access-control","expected_output_contains":["角色","授予"]}
2
2
  {"case_id":"003","type":"should_call","user_input":"当前有哪些系统预置角色?各自有什么权限?","expected_skill":"clickzetta-access-control","expected_output_contains":["workspace_admin","workspace_dev"]}
3
3
  {"case_id":"004","type":"should_call","user_input":"怎么给某个用户授予 public schema 下所有表的只读权限?","expected_skill":"clickzetta-access-control","expected_output_contains":["GRANT","SELECT"]}
@@ -0,0 +1,5 @@
1
+ {"case_id":"001","type":"should_call","user_input":"怎么创建离线同步任务把 MySQL 表定期同步到 Lakehouse?","expected_skill":"clickzetta-batch-sync-pipeline","expected_output_contains":["离线同步","Cron","调度"]}
2
+ {"case_id":"002","type":"should_call","user_input":"多表离线同步支持自动建表吗?","expected_skill":"clickzetta-batch-sync-pipeline","expected_output_contains":["自动创建","多表"]}
3
+ {"case_id":"003","type":"should_call","user_input":"离线同步的单表模式和多表模式怎么选?","expected_skill":"clickzetta-batch-sync-pipeline","expected_output_contains":["单表","多表","task_type"]}
4
+ {"case_id":"004","type":"should_call","user_input":"批量同步支持 Schema Evolution 吗?新增字段会自动适配吗?","expected_skill":"clickzetta-batch-sync-pipeline","expected_output_contains":["Schema Evolution","多表"]}
5
+ {"case_id":"005","type":"should_call","user_input":"怎么配置离线同步任务的调度周期?用 Cron 表达式吗?","expected_skill":"clickzetta-batch-sync-pipeline","expected_output_contains":["Cron","调度"]}
@@ -0,0 +1,5 @@
1
+ {"case_id":"001","type":"should_call","user_input":"怎么把 MySQL 整库实时同步到 Lakehouse?","expected_skill":"clickzetta-cdc-sync-pipeline","expected_output_contains":["整库镜像","Binlog","CDC"]}
2
+ {"case_id":"002","type":"should_call","user_input":"分库分表的数据怎么合并同步到 Lakehouse 一张表?","expected_skill":"clickzetta-cdc-sync-pipeline","expected_output_contains":["多表合并","分库分表"]}
3
+ {"case_id":"003","type":"should_call","user_input":"CDC 同步任务 Binlog 位点过期了怎么办?","expected_skill":"clickzetta-cdc-sync-pipeline","expected_output_contains":["Binlog"]}
4
+ {"case_id":"004","type":"should_call","user_input":"多表实时同步怎么配置告警?支持飞书通知吗?","expected_skill":"clickzetta-cdc-sync-pipeline","expected_output_contains":["告警","webhook"]}
5
+ {"case_id":"005","type":"should_call","user_input":"PostgreSQL 整库 CDC 同步需要源端做什么准备?","expected_skill":"clickzetta-cdc-sync-pipeline","expected_output_contains":["WAL","PostgreSQL"]}
@@ -0,0 +1,542 @@
1
+ ---
2
+ name: clickzetta-dba-guide
3
+ description: |
4
+ ClickZetta Lakehouse DBA 日常运维操作手册。集中覆盖 DBA 最常用的 6 类操作:
5
+ 计算集群运维、作业监控与诊断、数据恢复与保护、
6
+ 存储优化与维护、Schema 与对象管理、成本与资源分析。
7
+ 每个操作提供可直接执行的 SQL,并标注 ClickZetta 特有限制。
8
+ 当用户说"启停集群"、"调整集群规格"、
9
+ "取消作业"、"慢查询"、"恢复误删表"、"UNDROP"、"RESTORE"、
10
+ "小文件合并"、"OPTIMIZE"、"ANALYZE TABLE"、
11
+ "成本分析"、"存储用量"、"DBA 操作"、
12
+ "创建 Schema"、"删除 Schema"、"重命名表"、"对象管理"、"Schema 管理"时触发。
13
+ 用户与权限管理、网络策略、数据脱敏等安全治理操作请使用 clickzetta-access-control skill。
14
+ Keywords: DBA, operations, monitoring, troubleshooting, cluster management, cost
15
+ ---
16
+
17
+ # ClickZetta Lakehouse DBA 运维手册
18
+
19
+ ---
20
+
21
+ ## 模块 1:用户与权限管理
22
+
23
+ ### 用户管理
24
+
25
+ ```sql
26
+ -- 创建用户(设置默认集群和 Schema)
27
+ CREATE USER alice DEFAULT_VCLUSTER default_ap DEFAULT_SCHEMA my_schema;
28
+
29
+ -- 修改用户默认集群
30
+ ALTER USER alice SET DEFAULT_VCLUSTER = analytics_cluster;
31
+
32
+ -- 删除用户(从当前工作空间移除)
33
+ DROP USER alice;
34
+
35
+ -- 查看所有用户
36
+ SHOW USERS;
37
+ ```
38
+
39
+ ### 角色管理
40
+
41
+ ```sql
42
+ -- 创建自定义角色(仅工作空间级,仅 SQL)
43
+ CREATE ROLE data_engineer;
44
+
45
+ -- 将角色授予用户
46
+ GRANT ROLE data_engineer TO USER alice;
47
+
48
+ -- 撤销角色
49
+ REVOKE ROLE data_engineer FROM USER alice;
50
+
51
+ -- 查看所有角色
52
+ SHOW ROLES;
53
+
54
+ -- 查看用户权限
55
+ SHOW GRANTS TO USER alice;
56
+ SHOW GRANTS TO ROLE data_engineer;
57
+ ```
58
+
59
+ ### 权限授予
60
+
61
+ ```sql
62
+ -- 授予 Schema 下所有表的读权限
63
+ GRANT SELECT ON ALL TABLES IN SCHEMA my_schema TO ROLE data_engineer;
64
+
65
+ -- 授予单张表的读写权限
66
+ GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE my_schema.orders TO USER alice;
67
+
68
+ -- 授予创建表的权限
69
+ GRANT CREATE TABLE ON SCHEMA my_schema TO ROLE data_engineer;
70
+
71
+ -- 授予使用集群的权限
72
+ GRANT USE ON VCLUSTER default_ap TO ROLE data_engineer;
73
+
74
+ -- 授予 information_schema 查询权限
75
+ GRANT ALL ON ALL VIEWS IN SCHEMA information_schema TO ROLE data_engineer;
76
+
77
+ -- 批量授权(Schema 级别)
78
+ GRANT SELECT ON ALL TABLES IN SCHEMA ods TO ROLE analyst;
79
+ GRANT SELECT ON ALL TABLES IN SCHEMA dwd TO ROLE analyst;
80
+ GRANT SELECT ON ALL TABLES IN SCHEMA dws TO ROLE analyst;
81
+ ```
82
+
83
+ ### 权限撤销
84
+
85
+ ```sql
86
+ -- 撤销表权限
87
+ REVOKE SELECT ON TABLE my_schema.orders FROM USER alice;
88
+
89
+ -- 撤销 Schema 创建权限
90
+ REVOKE CREATE TABLE ON SCHEMA my_schema FROM ROLE data_engineer;
91
+ ```
92
+
93
+ ### 动态脱敏(列级安全,邀测功能)
94
+
95
+ ```sql
96
+ -- 创建脱敏函数(基于角色)
97
+ CREATE FUNCTION my_schema.phone_masking(phone STRING)
98
+ RETURNS STRING
99
+ AS CASE
100
+ WHEN ARRAY_CONTAINS(current_roles(), 'data_admin') THEN phone
101
+ ELSE CONCAT(SUBSTR(phone, 1, 3), '****', SUBSTR(phone, 8, 4))
102
+ END;
103
+
104
+ -- 绑定脱敏策略到列
105
+ ALTER TABLE my_schema.users
106
+ CHANGE COLUMN phone SET MASK my_schema.phone_masking;
107
+
108
+ -- 解除脱敏
109
+ ALTER TABLE my_schema.users
110
+ CHANGE COLUMN phone UNSET MASK;
111
+ ```
112
+
113
+ **ClickZetta 特有限制:**
114
+ - 无超级用户,所有操作必须明确授权
115
+ - `instance_admin` 不能直接操作工作空间数据
116
+ - 自定义角色仅工作空间级,不支持实例级自定义角色
117
+
118
+ ---
119
+
120
+ ## 模块 2:计算集群运维
121
+
122
+ ### 启停与状态
123
+
124
+ ```sql
125
+ -- 启动集群
126
+ ALTER VCLUSTER my_cluster RESUME;
127
+ ALTER VCLUSTER IF EXISTS my_cluster RESUME;
128
+
129
+ -- 停止集群
130
+ ALTER VCLUSTER my_cluster SUSPEND;
131
+ ALTER VCLUSTER my_cluster SUSPEND FORCE; -- 强制停止(中断运行中的作业)
132
+
133
+ -- 取消集群所有作业
134
+ ALTER VCLUSTER my_cluster CANCEL ALL JOBS;
135
+
136
+ -- 查看集群状态
137
+ SHOW VCLUSTERS;
138
+ SHOW VCLUSTERS WHERE state = 'RUNNING';
139
+ SHOW VCLUSTERS WHERE state = 'SUSPENDED';
140
+ DESC VCLUSTER my_cluster;
141
+ DESC VCLUSTER EXTENDED my_cluster;
142
+
143
+ -- 切换当前会话使用的集群
144
+ USE VCLUSTER my_cluster;
145
+ ```
146
+
147
+ ### 调整规格
148
+
149
+ ```sql
150
+ -- 通用型(GP):固定规格
151
+ ALTER VCLUSTER my_gp SET VCLUSTER_SIZE = 8;
152
+
153
+ -- 通用型(GP):弹性规格
154
+ ALTER VCLUSTER my_gp SET MIN_VCLUSTER_SIZE = 2 MAX_VCLUSTER_SIZE = 16;
155
+
156
+ -- 分析型(AP):调整副本数
157
+ ALTER VCLUSTER my_ap SET MIN_REPLICAS = 1 MAX_REPLICAS = 4;
158
+
159
+ -- 分析型(AP):调整最大并发
160
+ ALTER VCLUSTER my_ap SET MAX_CONCURRENCY = 16;
161
+
162
+ -- 设置查询超时(秒,-1 表示无限制)
163
+ ALTER VCLUSTER my_cluster SET QUERY_RUNTIME_LIMIT_IN_SECOND = 3600;
164
+ ```
165
+
166
+ ### 自动停止与启动
167
+
168
+ ```sql
169
+ -- 设置 60 秒无作业自动停止,有作业自动启动
170
+ ALTER VCLUSTER my_cluster SET
171
+ AUTO_SUSPEND_IN_SECOND = 60
172
+ AUTO_RESUME = TRUE;
173
+
174
+ -- 关闭自动停止
175
+ ALTER VCLUSTER my_cluster SET AUTO_SUSPEND_IN_SECOND = -1;
176
+ ```
177
+
178
+ ### AP 集群预加载缓存
179
+
180
+ ```sql
181
+ -- 设置预加载表(集群启动时自动缓存最新数据)
182
+ ALTER VCLUSTER my_ap SET PRELOAD_TABLES = "sales.orders,sales.products";
183
+
184
+ -- 查看缓存状态
185
+ SHOW PRELOAD CACHED STATUS;
186
+ SHOW EXTENDED PRELOAD CACHED STATUS;
187
+ ```
188
+
189
+ **ClickZetta 特有限制:**
190
+ - OPTIMIZE(小文件合并)仅 GP 集群支持,AP 集群不生效
191
+ - 分析型集群规格步长为 2^n(1/2/4/8/16...),通用型步长为 1
192
+
193
+ ---
194
+
195
+ ## 模块 3:作业监控与诊断
196
+
197
+ ### 实时作业查看
198
+
199
+ ```sql
200
+ -- 查看最近作业(最多 7 天,10000 条)
201
+ SHOW JOBS LIMIT 20;
202
+ SHOW JOBS IN VCLUSTER default_ap LIMIT 20;
203
+
204
+ -- 取消指定作业
205
+ CANCEL JOB '2026050118342658136171272';
206
+
207
+ -- 查看执行计划
208
+ EXPLAIN SELECT * FROM orders WHERE order_date = '2024-01-01';
209
+ EXPLAIN EXTENDED SELECT * FROM orders WHERE order_date = '2024-01-01';
210
+ ```
211
+
212
+ ### 历史作业分析(information_schema)
213
+
214
+ ```sql
215
+ -- 慢查询 TOP 20(最近 7 天)
216
+ SELECT job_id, job_creator, execution_time, input_bytes, job_text
217
+ FROM information_schema.job_history
218
+ WHERE pt_date >= CAST(CURRENT_DATE - INTERVAL 7 DAY AS DATE)
219
+ AND status = 'SUCCEED'
220
+ ORDER BY execution_time DESC
221
+ LIMIT 20;
222
+
223
+ -- 失败作业(最近 24 小时)
224
+ SELECT job_id, job_creator, error_message, start_time, job_text
225
+ FROM information_schema.job_history
226
+ WHERE pt_date >= CAST(CURRENT_DATE - INTERVAL 1 DAY AS DATE)
227
+ AND status = 'FAILED'
228
+ ORDER BY start_time DESC;
229
+
230
+ -- 按用户统计 CRU 消耗(最近 30 天)
231
+ SELECT job_creator,
232
+ COUNT(*) AS job_count,
233
+ ROUND(SUM(cru), 2) AS total_cru,
234
+ ROUND(AVG(execution_time), 1) AS avg_exec_sec
235
+ FROM information_schema.job_history
236
+ WHERE pt_date >= CAST(CURRENT_DATE - INTERVAL 30 DAY AS DATE)
237
+ AND status = 'SUCCEED'
238
+ GROUP BY job_creator
239
+ ORDER BY total_cru DESC;
240
+
241
+ -- 按集群统计作业分布
242
+ SELECT virtual_cluster,
243
+ COUNT(*) AS job_count,
244
+ ROUND(SUM(cru), 2) AS total_cru
245
+ FROM information_schema.job_history
246
+ WHERE pt_date >= CAST(CURRENT_DATE - INTERVAL 7 DAY AS DATE)
247
+ GROUP BY virtual_cluster
248
+ ORDER BY total_cru DESC;
249
+ ```
250
+
251
+ ---
252
+
253
+ ## 模块 4:数据恢复与保护
254
+
255
+ ### 恢复误删对象
256
+
257
+ ```sql
258
+ -- 查看已删除的表(delete_time 不为 NULL)
259
+ SHOW TABLES HISTORY IN my_schema;
260
+ SHOW TABLES HISTORY LIKE '%orders%';
261
+
262
+ -- 恢复误删的表/动态表/物化视图
263
+ UNDROP TABLE my_schema.orders;
264
+ UNDROP TABLE my_schema.my_dynamic_table;
265
+ UNDROP TABLE my_schema.my_mv;
266
+ -- ⚠️ 恢复外部函数用 UNDROP FUNCTION,不是 UNDROP EXTERNAL FUNCTION
267
+ UNDROP FUNCTION my_schema.my_ext_function;
268
+ ```
269
+
270
+ ### 回滚到历史版本
271
+
272
+ ```sql
273
+ -- 查看表的版本历史
274
+ DESC HISTORY my_schema.orders;
275
+ -- 返回:version, time, total_rows, total_bytes, user, operation, job_id
276
+
277
+ -- 恢复到指定时间点(覆盖当前数据)
278
+ -- ⚠️ 时间戳必须用 CAST() 或完整毫秒格式,不能用简单字符串
279
+ -- ❌ 错误:RESTORE TABLE t TO TIMESTAMP AS OF '2024-01-15';
280
+ -- ✅ 正确写法:
281
+ RESTORE TABLE my_schema.orders TO TIMESTAMP AS OF CAST('2024-01-15 10:00:00' AS TIMESTAMP);
282
+ RESTORE TABLE my_schema.orders TO TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL '2' HOURS;
283
+ -- 也支持完整毫秒时间戳字符串(从 DESC HISTORY 复制):
284
+ RESTORE TABLE my_schema.orders TO TIMESTAMP AS OF '2024-01-15 10:00:00.123';
285
+
286
+ -- 查询历史数据(不覆盖,仅查看)
287
+ SELECT * FROM my_schema.orders TIMESTAMP AS OF CAST('2024-01-15 10:00:00' AS TIMESTAMP);
288
+ ```
289
+
290
+ ### 设置数据保留周期
291
+
292
+ ```sql
293
+ -- 设置 Time Travel 保留 30 天(范围 0-90)
294
+ ALTER TABLE my_schema.orders SET PROPERTIES ('data_retention_days' = '30');
295
+
296
+ -- 查看当前设置
297
+ SHOW CREATE TABLE my_schema.orders;
298
+ ```
299
+
300
+ **ClickZetta 特有限制:**
301
+ - `RESTORE TABLE` 目标时间点不能早于表创建时间
302
+ - `UNDROP` 需在 `data_retention_days` 保留期内(默认 1 天)
303
+ - 物化视图支持 UNDROP,但不支持 RESTORE
304
+
305
+ ---
306
+
307
+ ## 模块 5:存储优化与维护
308
+
309
+ ### 小文件合并
310
+
311
+ ```sql
312
+ -- 手动触发小文件合并(异步,仅 GP 集群)
313
+ OPTIMIZE my_schema.orders;
314
+
315
+ -- 同步执行(等待完成)
316
+ OPTIMIZE my_schema.orders OPTIONS('cz.sql.optimize.table.async' = 'false');
317
+
318
+ -- 只优化特定分区
319
+ OPTIMIZE my_schema.orders WHERE dt = '2024-01-01';
320
+ OPTIMIZE my_schema.orders WHERE dt = '2024-01-01' AND region = 'cn';
321
+
322
+ -- DML 写入时自动触发合并(GP 集群)
323
+ SET cz.sql.compaction.after.commit = true;
324
+ INSERT INTO my_schema.orders SELECT * FROM staging;
325
+ ```
326
+
327
+ ### 统计信息收集
328
+
329
+ ```sql
330
+ -- 收集表统计信息(优化查询计划)
331
+ ANALYZE TABLE my_schema.orders COMPUTE STATISTICS;
332
+
333
+ -- 仅收集大小,不扫描数据(快速)
334
+ ANALYZE TABLE my_schema.orders COMPUTE STATISTICS NOSCAN;
335
+
336
+ -- 收集指定列的统计信息
337
+ ANALYZE TABLE my_schema.orders COMPUTE STATISTICS FOR COLUMNS order_date, customer_id;
338
+
339
+ -- 收集 Schema 下所有表
340
+ ANALYZE TABLES IN my_schema COMPUTE STATISTICS;
341
+ ```
342
+
343
+ ### 清空数据
344
+
345
+ ```sql
346
+ -- 清空整张表(保留表结构)
347
+ TRUNCATE TABLE my_schema.staging;
348
+
349
+ -- 清空指定分区
350
+ TRUNCATE TABLE my_schema.orders WHERE dt = '2024-01-01';
351
+ ```
352
+
353
+ ### 查看存储用量
354
+
355
+ ```sql
356
+ -- 当前 Schema 下大表排行
357
+ SELECT table_schema, table_name,
358
+ ROUND(bytes / 1024.0 / 1024 / 1024, 2) AS size_gb,
359
+ row_count
360
+ FROM information_schema.tables
361
+ WHERE table_type = 'MANAGED_TABLE'
362
+ ORDER BY bytes DESC
363
+ LIMIT 20;
364
+
365
+ -- Sort Key 推荐(系统自动分析)
366
+ SELECT table_name, col, statement, ratio
367
+ FROM information_schema.sortkey_candidates
368
+ ORDER BY ratio DESC;
369
+ ```
370
+
371
+ ---
372
+
373
+ ## 模块 6:网络策略与安全
374
+
375
+ ### 网络策略管理
376
+
377
+ ```sql
378
+ -- 创建网络策略(白名单)
379
+ CREATE NETWORK POLICY office_policy
380
+ ALLOWED_IP_LIST = ('10.0.0.0/8', '192.168.1.0/24')
381
+ COMMENT '办公网络白名单';
382
+
383
+ -- 创建网络策略(白名单 + 黑名单)
384
+ CREATE NETWORK POLICY strict_policy
385
+ ALLOWED_IP_LIST = ('10.0.0.0/8')
386
+ BLOCKED_IP_LIST = ('10.0.1.100')
387
+ COMMENT '严格访问控制';
388
+
389
+ -- 修改网络策略(覆盖式,必须包含所有 IP)
390
+ ALTER NETWORK POLICY office_policy
391
+ ALLOWED_IP_LIST = ('10.0.0.0/8', '172.16.0.0/12')
392
+ BLOCKED_IP_LIST = ('10.0.1.100');
393
+
394
+ -- 停用/启用策略
395
+ ALTER NETWORK POLICY office_policy INACTIVATE;
396
+ ALTER NETWORK POLICY office_policy ACTIVATE;
397
+
398
+ -- 删除策略
399
+ DROP NETWORK POLICY IF EXISTS office_policy;
400
+
401
+ -- 查看所有策略(注意:单数 POLICY,无 S)
402
+ SHOW NETWORK POLICY;
403
+
404
+ -- 查看策略详情
405
+ DESC NETWORK POLICY office_policy;
406
+ ```
407
+
408
+ **关键规则(Deny 优先):**
409
+ - 无任何策略时:允许所有 IP
410
+ - 有白名单策略时:不在白名单的 IP 被拒绝
411
+ - 黑名单命中时:无论白名单如何,该 IP 被拒绝
412
+ - MySQL 协议:只要有任何生效策略,所有 MySQL 流量均被拦截
413
+ - 策略生效延迟:最多 5 分钟
414
+
415
+ ---
416
+
417
+ ## 模块 7:Schema 与对象管理
418
+
419
+ ### Schema 管理
420
+
421
+ ```sql
422
+ -- 创建 Schema
423
+ CREATE SCHEMA ods;
424
+ CREATE SCHEMA IF NOT EXISTS dwd;
425
+
426
+ -- 修改 Schema 注释
427
+ ALTER SCHEMA ods SET COMMENT 'ODS 原始数据层';
428
+
429
+ -- 重命名 Schema
430
+ ALTER SCHEMA old_name RENAME TO new_name;
431
+
432
+ -- 删除 Schema(级联删除所有对象)
433
+ DROP SCHEMA IF EXISTS temp_schema CASCADE;
434
+
435
+ -- 切换默认 Schema
436
+ USE SCHEMA my_schema;
437
+ ```
438
+
439
+ ### 表管理
440
+
441
+ ```sql
442
+ -- 修改表:加列
443
+ ALTER TABLE my_schema.orders ADD COLUMN (discount DECIMAL(5,2) COMMENT '折扣率');
444
+
445
+ -- 修改表:改列注释
446
+ ALTER TABLE my_schema.orders CHANGE COLUMN order_id SET COMMENT '订单唯一标识';
447
+
448
+ -- 修改表:设置生命周期
449
+ ALTER TABLE my_schema.orders SET PROPERTIES ('data_lifecycle' = '90');
450
+
451
+ -- 修改表:设置 Sort Key
452
+ ALTER TABLE my_schema.orders SET PROPERTIES ('hint.sort.columns' = 'order_date');
453
+
454
+ -- 重命名表
455
+ ALTER TABLE my_schema.orders RENAME TO my_schema.orders_v2;
456
+
457
+ -- 删除表(可 UNDROP 恢复)
458
+ DROP TABLE IF EXISTS my_schema.temp_table;
459
+
460
+ -- 删除动态表
461
+ DROP DYNAMIC TABLE IF EXISTS my_schema.my_dt;
462
+
463
+ -- 删除物化视图
464
+ DROP MATERIALIZED VIEW IF EXISTS my_schema.my_mv;
465
+ ```
466
+
467
+ ### 批量对象查看
468
+
469
+ ```sql
470
+ -- 统计各类型对象数量
471
+ SELECT
472
+ CASE WHEN is_view THEN 'VIEW'
473
+ WHEN is_materialized_view THEN 'MV'
474
+ WHEN is_dynamic THEN 'DT'
475
+ WHEN is_external THEN 'EXTERNAL'
476
+ ELSE 'TABLE' END AS type,
477
+ COUNT(*) AS cnt
478
+ FROM (SHOW TABLES IN my_schema)
479
+ GROUP BY 1;
480
+
481
+ -- 查找大于 30 天未更新的表(潜在废弃表)
482
+ SELECT table_schema, table_name, last_modify_time,
483
+ ROUND(bytes / 1024.0 / 1024 / 1024, 2) AS size_gb
484
+ FROM information_schema.tables
485
+ WHERE table_type = 'MANAGED_TABLE'
486
+ AND last_modify_time < CURRENT_TIMESTAMP - INTERVAL 30 DAY
487
+ ORDER BY bytes DESC;
488
+ ```
489
+
490
+ ---
491
+
492
+ ## 模块 8:成本与资源分析(需 INSTANCE ADMIN)
493
+
494
+ ```sql
495
+ -- 本月各工作空间计算费用
496
+ SELECT workspace_name, sku_name,
497
+ ROUND(SUM(measurements_consumption), 2) AS total_cru,
498
+ ROUND(SUM(amount), 2) AS total_yuan
499
+ FROM SYS.information_schema.instance_usage
500
+ WHERE measurement_start >= DATE_TRUNC('month', CURRENT_DATE)
501
+ GROUP BY workspace_name, sku_name
502
+ ORDER BY total_yuan DESC;
503
+
504
+ -- 本月各工作空间存储费用
505
+ SELECT workspace_name, sku_name,
506
+ ROUND(SUM(measurements_consumption), 4) AS consumption,
507
+ measurements_unit,
508
+ ROUND(SUM(amount), 4) AS total_yuan
509
+ FROM SYS.information_schema.storage_metering
510
+ WHERE measurement_start >= DATE_TRUNC('month', CURRENT_DATE)
511
+ GROUP BY workspace_name, sku_name, measurements_unit
512
+ ORDER BY workspace_name, total_yuan DESC;
513
+
514
+ -- 跨空间存储用量排行
515
+ SELECT workspace_name,
516
+ ROUND(workspace_storage / 1024.0 / 1024 / 1024, 2) AS storage_gb
517
+ FROM SYS.information_schema.workspaces
518
+ WHERE delete_time IS NULL
519
+ ORDER BY workspace_storage DESC;
520
+
521
+ -- 跨空间大表排行(大于 10GB)
522
+ SELECT table_catalog, table_schema, table_name,
523
+ ROUND(bytes / 1024.0 / 1024 / 1024, 2) AS size_gb, row_count
524
+ FROM SYS.information_schema.tables
525
+ WHERE delete_time IS NULL AND bytes > 10 * 1024 * 1024 * 1024
526
+ ORDER BY bytes DESC;
527
+ ```
528
+
529
+ ---
530
+
531
+ ## ClickZetta DBA 特有注意事项
532
+
533
+ | 场景 | 注意事项 |
534
+ |---|---|
535
+ | 权限体系 | 无超级用户;`instance_admin` 不能直接操作工作空间数据 |
536
+ | 自定义角色 | 仅工作空间级,不支持实例级;只能 SQL 创建,不支持 Web 端 |
537
+ | OPTIMIZE | 仅 GP 集群支持;AP 集群不支持小文件合并 |
538
+ | UNDROP | 需在 `data_retention_days` 保留期内(默认 1 天) |
539
+ | RESTORE | 目标时间点不能早于表创建时间 |
540
+ | 网络策略 | Deny 优先;MySQL 协议有任何策略即全部拦截;生效延迟最多 5 分钟 |
541
+ | 动态脱敏 | 邀测功能,需联系技术支持开通 |
542
+ | 集群规格 | AP 集群步长 2^n;GP 集群步长 1;同步型最小 0.25 CRU |
@@ -0,0 +1,3 @@
1
+ {"case_id":"001","type":"should_call","user_input":"DBA 日常怎么启停集群和调整规格?","expected_skill":"clickzetta-dba-guide","expected_output_contains":["SUSPEND","RESUME"]}
2
+ {"case_id":"002","type":"should_call","user_input":"怎么取消正在运行的作业?CANCEL JOB 怎么用?","expected_skill":"clickzetta-dba-guide","expected_output_contains":["CANCEL"]}
3
+ {"case_id":"003","type":"should_call","user_input":"DBA 日常怎么做存储优化?小文件合并和 OPTIMIZE 怎么用?","expected_skill":"clickzetta-dba-guide","expected_output_contains":["OPTIMIZE","小文件"]}
@@ -1,4 +1,4 @@
1
- {"case_id":"001","type":"should_call","user_input":"帮我做数仓分层设计","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["分层"]}
1
+ {"case_id":"001","type":"should_call","user_input":"数仓分层设计的原则是什么?ODS、DWD、DWS 各层的职责?","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["分层"]}
2
2
  {"case_id":"002","type":"should_call","user_input":"ODS/DWD/DWS/ADS 分层怎么设计","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["ODS","DWD","DWS"]}
3
3
  {"case_id":"003","type":"should_call","user_input":"Medallion 架构 Bronze/Silver/Gold 怎么搭建","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["Bronze","Silver","Gold"]}
4
4
  {"case_id":"004","type":"should_call","user_input":"星型模型和雪花模型怎么选","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["星型","雪花"]}
@@ -0,0 +1,5 @@
1
+ {"case_id":"001","type":"should_call","user_input":"帮我创建一个 Dynamic Table,从 public.dim_studio_user_dmin_f 聚合按租户+日期统计用户数,每 60 分钟自动刷新","expected_skill":"clickzetta-dynamic-table","expected_output_contains":["DYNAMIC TABLE","REFRESH"]}
2
+ {"case_id":"002","type":"should_call","user_input":"怎么查看动态表的刷新历史和状态","expected_skill":"clickzetta-dynamic-table","expected_output_contains":["REFRESH HISTORY"]}
3
+ {"case_id":"003","type":"should_call","user_input":"动态表的增量刷新怎么配置?SESSION_CONFIGS 怎么用?","expected_skill":"clickzetta-dynamic-table","expected_output_contains":["SESSION_CONFIGS","增量"]}
4
+ {"case_id":"004","type":"should_call","user_input":"静态分区 DT 和动态分区 DT 有什么区别?该怎么选?","expected_skill":"clickzetta-dynamic-table","expected_output_contains":["静态分区","动态分区"]}
5
+ {"case_id":"005","type":"should_call","user_input":"动态表怎么修改刷新间隔和 vcluster?","expected_skill":"clickzetta-dynamic-table","expected_output_contains":["ALTER","DYNAMIC TABLE"]}
@@ -0,0 +1,5 @@
1
+ {"case_id":"001","type":"should_call","user_input":"从 URL 导入文件到 Lakehouse 的步骤和语法是什么?","expected_skill":"clickzetta-file-import-pipeline","expected_output_contains":["Volume","COPY INTO"]}
2
+ {"case_id":"002","type":"should_call","user_input":"本地文件上传到 Lakehouse 表的流程是什么?需要哪些步骤?","expected_skill":"clickzetta-file-import-pipeline","expected_output_contains":["Volume","COPY INTO"]}
3
+ {"case_id":"003","type":"should_call","user_input":"COPY INTO 导入数据时 append 和 overwrite 写入模式有什么区别?请说明语法","expected_skill":"clickzetta-file-import-pipeline","expected_output_contains":["append","overwrite"]}
4
+ {"case_id":"004","type":"should_call","user_input":"COPY INTO 导入前怎么推断文件格式?有哪些支持的格式类型?","expected_skill":"clickzetta-file-import-pipeline","expected_output_contains":["CSV","JSON","Parquet"]}
5
+ {"case_id":"005","type":"should_call","user_input":"CSV 有自定义分隔符,COPY INTO 的 OPTIONS 怎么写?","expected_skill":"clickzetta-file-import-pipeline","expected_output_contains":["CSV","OPTIONS"]}