@clickzetta/cz-cli-darwin-arm64 0.3.18 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-access-control/SKILL.md +243 -0
  3. package/bin/skills/clickzetta-access-control/eval_cases.jsonl +3 -0
  4. package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +86 -0
  5. package/bin/skills/clickzetta-access-control/references/grant-revoke.md +103 -0
  6. package/bin/skills/clickzetta-access-control/references/role-management.md +66 -0
  7. package/bin/skills/clickzetta-access-control/references/user-management.md +61 -0
  8. package/bin/skills/clickzetta-ai-vector-search/SKILL.md +160 -0
  9. package/bin/skills/clickzetta-ai-vector-search/eval_cases.jsonl +4 -0
  10. package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +155 -0
  11. package/bin/skills/clickzetta-data-retention/SKILL.md +160 -0
  12. package/bin/skills/clickzetta-data-retention/eval_cases.jsonl +5 -0
  13. package/bin/skills/clickzetta-data-retention/references/lifecycle-reference.md +175 -0
  14. package/bin/skills/clickzetta-dw-modeling/SKILL.md +259 -0
  15. package/bin/skills/clickzetta-dw-modeling/eval_cases.jsonl +4 -0
  16. package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +100 -0
  17. package/bin/skills/clickzetta-external-function/SKILL.md +203 -0
  18. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -0
  19. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +171 -0
  20. package/bin/skills/clickzetta-index-manager/SKILL.md +140 -0
  21. package/bin/skills/clickzetta-index-manager/eval_cases.jsonl +5 -0
  22. package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +67 -0
  23. package/bin/skills/clickzetta-index-manager/references/index-management.md +73 -0
  24. package/bin/skills/clickzetta-index-manager/references/inverted-index.md +80 -0
  25. package/bin/skills/clickzetta-index-manager/references/vector-index.md +81 -0
  26. package/bin/skills/clickzetta-monitoring/SKILL.md +199 -0
  27. package/bin/skills/clickzetta-monitoring/eval_cases.jsonl +5 -0
  28. package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +97 -0
  29. package/bin/skills/clickzetta-monitoring/references/show-jobs.md +48 -0
  30. package/bin/skills/clickzetta-query-optimizer/eval_cases.jsonl +5 -0
  31. package/bin/skills/cz-cli/SKILL.md +1 -1
  32. package/bin/skills/cz-cli-inner/SKILL.md +8 -0
  33. package/package.json +1 -1
@@ -0,0 +1,199 @@
1
+ ---
2
+ name: clickzetta-monitoring
3
+ description: |
4
+ 监控和分析 ClickZetta Lakehouse 作业运行状态、性能和资源使用情况,
5
+ 以及通过 INFORMATION_SCHEMA 查询元数据(表、列、Schema、工作空间等)。
6
+ 覆盖 SHOW JOBS 实时查看作业、information_schema.job_history 历史分析、
7
+ 慢查询识别、集群负载分析、缓存命中率统计、失败作业排查、
8
+ information_schema.tables/columns/schemas 元数据查询等完整监控与治理工作流。
9
+ 当用户说"查看作业"、"作业历史"、"SHOW JOBS"、"慢查询"、"查询性能"、
10
+ "集群负载"、"作业失败"、"查询失败"、"监控"、"job history"、
11
+ "information_schema"、"缓存命中率"、"查询耗时"、"作业状态"、
12
+ "元数据查询"、"查看所有表"、"表大小"、"列信息"、"资产盘点"时触发。
13
+ Keywords: monitoring, job status, performance, resource usage, SHOW JOBS, slow query
14
+ ---
15
+
16
+ # ClickZetta 作业监控与分析
17
+
18
+ 阅读 [references/show-jobs.md](references/show-jobs.md) 了解 SHOW JOBS 语法。
19
+ 阅读 [references/job-history-analysis.md](references/job-history-analysis.md) 了解历史分析查询。
20
+
21
+ ---
22
+
23
+ ## 实时查看作业(SHOW JOBS)
24
+
25
+ ```sql
26
+ -- 查看所有作业(最近7天)
27
+ SHOW JOBS;
28
+
29
+ -- 查看指定集群的作业
30
+ SHOW JOBS IN VCLUSTER default_ap;
31
+
32
+ -- 查看执行时间超过2分钟的慢查询
33
+ SHOW JOBS WHERE execution_time > INTERVAL 2 MINUTE;
34
+
35
+ -- 查看失败的作业
36
+ SHOW JOBS WHERE status = 'FAILED';
37
+
38
+ -- 限制返回数量
39
+ SHOW JOBS IN VCLUSTER default_ap LIMIT 50;
40
+ ```
41
+
42
+ ---
43
+
44
+ ## 历史作业分析(information_schema)
45
+
46
+ ### 集群负载分析
47
+
48
+ ```sql
49
+ SELECT
50
+ virtual_cluster,
51
+ COUNT(*) AS job_count,
52
+ AVG(execution_time) AS avg_seconds,
53
+ ROUND(SUM(CASE WHEN status = 'SUCCEED' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS success_rate
54
+ FROM sys.information_schema.job_history
55
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 7 DAY
56
+ GROUP BY virtual_cluster
57
+ ORDER BY job_count DESC;
58
+ ```
59
+
60
+ ### 慢查询 TOP 20
61
+
62
+ ```sql
63
+ SELECT job_id, virtual_cluster, execution_time, status, start_time
64
+ FROM sys.information_schema.job_history
65
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 7 DAY
66
+ ORDER BY execution_time DESC
67
+ LIMIT 20;
68
+ ```
69
+
70
+ ### 失败作业统计
71
+
72
+ ```sql
73
+ SELECT
74
+ virtual_cluster,
75
+ COUNT(*) AS failed_count,
76
+ DATE(start_time) AS date
77
+ FROM sys.information_schema.job_history
78
+ WHERE status = 'FAILED'
79
+ AND start_time >= CURRENT_DATE() - INTERVAL 7 DAY
80
+ GROUP BY virtual_cluster, DATE(start_time)
81
+ ORDER BY date DESC;
82
+ ```
83
+
84
+ ### 高峰期识别
85
+
86
+ ```sql
87
+ SELECT
88
+ HOUR(start_time) AS hour_of_day,
89
+ COUNT(*) AS job_count,
90
+ AVG(execution_time) AS avg_execution_time
91
+ FROM sys.information_schema.job_history
92
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 7 DAY
93
+ GROUP BY HOUR(start_time)
94
+ ORDER BY hour_of_day;
95
+ ```
96
+
97
+ ---
98
+
99
+ ## query_tag 标记与过滤
100
+
101
+ 给作业打标,便于按来源过滤:
102
+
103
+ ```sql
104
+ -- 在 SQL 中设置 query_tag
105
+ SET query_tag = 'etl_daily';
106
+ SELECT * FROM orders;
107
+
108
+ -- 按 query_tag 过滤作业历史
109
+ SELECT job_id, execution_time, status
110
+ FROM sys.information_schema.job_history
111
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 7 DAY
112
+ AND query_tag = 'etl_daily';
113
+ ```
114
+
115
+ JDBC URL 中设置:
116
+ ```
117
+ jdbc:clickzetta://instance.region.api.clickzetta.com/workspace?query_tag=my_app
118
+ ```
119
+
120
+ ---
121
+
122
+ ## 常见问题排查
123
+
124
+ | 现象 | 排查方向 |
125
+ |---|---|
126
+ | 作业长时间"等待执行" | 集群资源不足,考虑扩容 VCluster |
127
+ | 作业长时间"集群启动中" | VCluster 冷启动慢,联系技术支持 |
128
+ | 大量失败作业 | 查看 job_id 详情,检查 SQL 语法或权限 |
129
+ | 平均执行时间突然变长 | 检查数据量变化、索引状态、缓存命中率 |
130
+
131
+ ---
132
+
133
+ ## INFORMATION_SCHEMA 元数据查询
134
+
135
+ 除了 `job_history`,INFORMATION_SCHEMA 还提供丰富的元数据视图,用于资产盘点和治理。
136
+
137
+ ### 空间级视图(当前工作空间)
138
+
139
+ ```sql
140
+ -- 查看当前空间下所有 Schema
141
+ SELECT * FROM information_schema.schemas;
142
+
143
+ -- 查看所有表及其大小、行数
144
+ SELECT table_schema, table_name, table_type, row_count, bytes
145
+ FROM information_schema.tables
146
+ ORDER BY bytes DESC;
147
+
148
+ -- 查看所有列的详细信息(字段名、类型、是否可空、注释)
149
+ SELECT table_schema, table_name, column_name, data_type, is_nullable, comment
150
+ FROM information_schema.columns
151
+ WHERE table_schema = 'public';
152
+
153
+ -- 查看排序列推荐
154
+ SELECT * FROM information_schema.sortkey_candidates;
155
+ ```
156
+
157
+ ### 实例级视图(需要 instance_admin 权限,使用 sys 库)
158
+
159
+ ```sql
160
+ -- 查看实例下所有工作空间
161
+ SELECT * FROM sys.information_schema.workspaces;
162
+
163
+ -- 查看实例下所有 Schema(跨工作空间)
164
+ SELECT * FROM sys.information_schema.schemas;
165
+
166
+ -- 查看实例用量(费用分析)
167
+ SELECT * FROM sys.information_schema.instance_usage
168
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 7 DAY;
169
+ ```
170
+
171
+ ### 常用元数据分析场景
172
+
173
+ ```sql
174
+ -- 找出最大的 10 张表
175
+ SELECT table_schema, table_name, row_count, bytes
176
+ FROM information_schema.tables
177
+ WHERE table_type = 'TABLE'
178
+ ORDER BY bytes DESC
179
+ LIMIT 10;
180
+
181
+ -- 找出没有注释的表
182
+ SELECT table_schema, table_name
183
+ FROM information_schema.tables
184
+ WHERE comment IS NULL OR comment = '';
185
+
186
+ -- 找出没有注释的字段
187
+ SELECT table_schema, table_name, column_name
188
+ FROM information_schema.columns
189
+ WHERE (comment IS NULL OR comment = '')
190
+ AND table_schema NOT IN ('information_schema');
191
+
192
+ -- 统计各 Schema 下的表数量和总存储
193
+ SELECT table_schema,
194
+ COUNT(*) AS table_count,
195
+ SUM(bytes) AS total_storage
196
+ FROM information_schema.tables
197
+ GROUP BY table_schema
198
+ ORDER BY total_storage DESC;
199
+ ```
@@ -0,0 +1,5 @@
1
+ {"case_id":"001","type":"should_call","user_input":"SHOW JOBS 的语法是什么?怎么过滤特定状态的作业?","expected_skill":"clickzetta-monitoring","expected_output_contains":["SHOW JOBS"]}
2
+ {"case_id":"002","type":"should_call","user_input":"怎么查询失败的作业?用什么 SQL?","expected_skill":"clickzetta-monitoring","expected_output_contains":["FAILED"]}
3
+ {"case_id":"003","type":"should_call","user_input":"过去 7 天各集群的作业执行情况怎么查?","expected_skill":"clickzetta-monitoring","expected_output_contains":["job_history","virtual_cluster"]}
4
+ {"case_id":"004","type":"should_call","user_input":"怎么查看慢查询?执行超过 2 分钟的作业有哪些?","expected_skill":"clickzetta-monitoring","expected_output_contains":["execution_time"]}
5
+ {"case_id":"005","type":"should_call","user_input":"怎么通过 information_schema 查看所有表的大小和行数?","expected_skill":"clickzetta-monitoring","expected_output_contains":["information_schema","tables"]}
@@ -0,0 +1,97 @@
1
+ # information_schema 作业历史分析参考
2
+
3
+ > 来源:https://www.yunqi.tech/documents/job_history_analysis_with_information_schema
4
+
5
+ ## 数据源
6
+
7
+ 表名:`sys.information_schema.job_history`
8
+
9
+ ### 关键字段
10
+
11
+ | 字段 | 类型 | 说明 |
12
+ |---|---|---|
13
+ | workspace_name | String | 工作空间名称 |
14
+ | virtual_cluster | String | 计算集群名称 |
15
+ | job_id | String | 作业唯一标识 |
16
+ | execution_time | Float | 执行时长(秒) |
17
+ | start_time | Timestamp | 开始时间 |
18
+ | status | String | 状态(SUCCEED/FAILED/CANCELLED/...) |
19
+ | input_tables | String | 输入表(JSON 格式) |
20
+ | input_bytes | String | 读取字节数 |
21
+ | cache_hit | String | 缓存命中字节数 |
22
+
23
+ ---
24
+
25
+ ## 常用分析查询
26
+
27
+ ### 1. 集群负载分析(近 30 天)
28
+
29
+ ```sql
30
+ SELECT
31
+ virtual_cluster,
32
+ COUNT(*) AS job_count,
33
+ SUM(execution_time) AS total_execution_time,
34
+ AVG(execution_time) AS avg_execution_time,
35
+ ROUND(SUM(CASE WHEN status = 'SUCCEED' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS success_rate
36
+ FROM sys.information_schema.job_history
37
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 30 DAY
38
+ GROUP BY virtual_cluster
39
+ ORDER BY total_execution_time DESC;
40
+ ```
41
+
42
+ ### 2. 慢查询分析(执行时间 TOP 20)
43
+
44
+ ```sql
45
+ SELECT
46
+ job_id,
47
+ virtual_cluster,
48
+ execution_time,
49
+ status,
50
+ start_time
51
+ FROM sys.information_schema.job_history
52
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 7 DAY
53
+ ORDER BY execution_time DESC
54
+ LIMIT 20;
55
+ ```
56
+
57
+ ### 3. 失败作业分析
58
+
59
+ ```sql
60
+ SELECT
61
+ virtual_cluster,
62
+ COUNT(*) AS failed_count,
63
+ DATE(start_time) AS date
64
+ FROM sys.information_schema.job_history
65
+ WHERE status = 'FAILED'
66
+ AND start_time >= CURRENT_DATE() - INTERVAL 7 DAY
67
+ GROUP BY virtual_cluster, DATE(start_time)
68
+ ORDER BY date DESC, failed_count DESC;
69
+ ```
70
+
71
+ ### 4. 缓存命中率分析
72
+
73
+ ```sql
74
+ SELECT
75
+ virtual_cluster,
76
+ SUM(CAST(input_bytes AS BIGINT)) AS total_input_bytes,
77
+ SUM(CAST(cache_hit AS BIGINT)) AS total_cache_hit,
78
+ ROUND(SUM(CAST(cache_hit AS BIGINT)) * 100.0 /
79
+ NULLIF(SUM(CAST(input_bytes AS BIGINT)), 0), 2) AS cache_hit_rate
80
+ FROM sys.information_schema.job_history
81
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 7 DAY
82
+ AND input_bytes IS NOT NULL
83
+ GROUP BY virtual_cluster;
84
+ ```
85
+
86
+ ### 5. 按小时统计作业量(识别高峰期)
87
+
88
+ ```sql
89
+ SELECT
90
+ HOUR(start_time) AS hour_of_day,
91
+ COUNT(*) AS job_count,
92
+ AVG(execution_time) AS avg_execution_time
93
+ FROM sys.information_schema.job_history
94
+ WHERE start_time >= CURRENT_DATE() - INTERVAL 7 DAY
95
+ GROUP BY HOUR(start_time)
96
+ ORDER BY hour_of_day;
97
+ ```
@@ -0,0 +1,48 @@
1
+ # SHOW JOBS 参考
2
+
3
+ > 来源:https://www.yunqi.tech/documents/show-jobs
4
+
5
+ ## 语法
6
+
7
+ ```sql
8
+ SHOW JOBS [IN VCLUSTER vc_name] [LIKE 'pattern'] [WHERE <expr>] [LIMIT num];
9
+ ```
10
+
11
+ ## 参数说明
12
+
13
+ - `IN VCLUSTER vc_name`:(可选)指定计算集群,筛选该集群下的作业
14
+ - `WHERE <expr>`:(可选)按字段过滤,支持 SHOW JOBS 显示的所有字段
15
+ - `LIMIT num`:(可选)限制返回数量,范围 1-10000
16
+ - `LIKE 'pattern'`:(可选)按 job_id 模式匹配(支持 `%` 和 `_`)
17
+
18
+ 默认显示最近 7 天内的作业,最多 10000 条。
19
+
20
+ ## 示例
21
+
22
+ ```sql
23
+ -- 查看所有作业(最近7天)
24
+ SHOW JOBS;
25
+
26
+ -- 查看指定集群的作业
27
+ SHOW JOBS IN VCLUSTER default_ap;
28
+
29
+ -- 查看执行时间超过2分钟的作业
30
+ SHOW JOBS IN VCLUSTER default_ap WHERE execution_time > INTERVAL 2 MINUTE;
31
+
32
+ -- 限制返回100条
33
+ SHOW JOBS LIMIT 100;
34
+
35
+ -- 按 job_id 模糊匹配
36
+ SHOW JOBS LIKE '2024%';
37
+ ```
38
+
39
+ ## 作业状态说明
40
+
41
+ | 状态 | 含义 |
42
+ |---|---|
43
+ | 初始化 | SQL 编译优化阶段 |
44
+ | 集群启动中 | 等待 VCluster 启动 |
45
+ | 等待执行 | 排队等待资源 |
46
+ | 正在执行 | 正在处理数据 |
47
+ | 执行成功 | 运行结束 |
48
+ | 执行失败 | 运行失败 |
@@ -0,0 +1,5 @@
1
+ {"case_id":"001","type":"should_call","user_input":"为什么这个 JOIN 查询这么慢?SELECT t1.user_id, t2.tenant_name FROM public.dim_studio_user_dmin_f t1 JOIN public.dim_studio_tenant_dmin_f t2 ON t1.tenant_id = t2.tenant_id","expected_skill":"clickzetta-query-optimizer","expected_output_contains":["JOIN","优化"]}
2
+ {"case_id":"002","type":"should_call","user_input":"public.dwd_studio_lakehouse_jobs_dd_i 表适合设置什么 Sort Key?","expected_skill":"clickzetta-query-optimizer","expected_output_contains":["sort","key"]}
3
+ {"case_id":"003","type":"should_call","user_input":"怎么开启结果缓存?Result Cache 的使用限制是什么?","expected_skill":"clickzetta-query-optimizer","expected_output_contains":["cache","cz.sql.enable"]}
4
+ {"case_id":"004","type":"should_call","user_input":"表有很多小文件影响查询性能,怎么合并优化?","expected_skill":"clickzetta-query-optimizer","expected_output_contains":["OPTIMIZE","小文件"]}
5
+ {"case_id":"005","type":"should_call","user_input":"怎么用 EXPLAIN 分析执行计划?Map Join 什么时候用?","expected_skill":"clickzetta-query-optimizer","expected_output_contains":["EXPLAIN","Map Join"]}
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: cz-cli-v2
2
+ name: cz-cli
3
3
  description: Route ALL ClickZetta Lakehouse operations to cz-cli: SQL, Studio tasks, tables, pipelines, profiles. Use when user mentions ClickZetta, Lakehouse, cz-cli, or needs profile/connection configuration.
4
4
  ---
5
5
 
@@ -70,6 +70,14 @@ cz-cli job result <job-id> Fetch job result set
70
70
 
71
71
  cz-cli status Check connection status
72
72
  cz-cli profile list List connection profiles
73
+
74
+ cz-cli datasource list [--type <type>] [--name <filter>]
75
+ List external data sources (type: mysql/kafka/redis/postgresql/...)
76
+ cz-cli datasource catalogs <name_or_id> List catalogs (databases/topics/buckets) in a data source
77
+ cz-cli datasource objects <name_or_id> <catalog>
78
+ List objects (tables/topics/collections) in a catalog
79
+ cz-cli datasource describe <name_or_id> <catalog> <object>
80
+ Show object metadata (columns, types)
73
81
  ```
74
82
 
75
83
  ## Output Formats
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@clickzetta/cz-cli-darwin-arm64",
3
- "version": "0.3.18",
3
+ "version": "0.3.19",
4
4
  "description": "cz-cli binary for macOS ARM64 (Apple Silicon)",
5
5
  "os": ["darwin"],
6
6
  "cpu": ["arm64"],