@clickzetta/cz-cli-darwin-x64 0.3.81 → 0.3.84
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-access-control/LICENSE +16 -0
- package/bin/skills/clickzetta-access-control/SKILL.md +243 -0
- package/bin/skills/clickzetta-access-control/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +86 -0
- package/bin/skills/clickzetta-access-control/references/grant-revoke.md +103 -0
- package/bin/skills/clickzetta-access-control/references/role-management.md +66 -0
- package/bin/skills/clickzetta-access-control/references/user-management.md +61 -0
- package/bin/skills/clickzetta-app-python-sdk/LICENSE +16 -0
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +153 -0
- package/bin/skills/clickzetta-app-python-sdk/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +196 -0
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +143 -0
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +122 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/LICENSE +16 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +227 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-bi-connect/LICENSE +16 -0
- package/bin/skills/clickzetta-bi-connect/SKILL.md +176 -0
- package/bin/skills/clickzetta-bi-connect/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +170 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/LICENSE +16 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +633 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/LICENSE +16 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +237 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-data-retention/LICENSE +16 -0
- package/bin/skills/clickzetta-data-retention/SKILL.md +160 -0
- package/bin/skills/clickzetta-data-retention/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-data-retention/references/lifecycle-reference.md +175 -0
- package/bin/skills/clickzetta-data-science/LICENSE +16 -0
- package/bin/skills/clickzetta-data-science/SKILL.md +125 -0
- package/bin/skills/clickzetta-data-science/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +146 -0
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +110 -0
- package/bin/skills/clickzetta-data-science/references/setup.md +160 -0
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +195 -0
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +122 -0
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +156 -0
- package/bin/skills/clickzetta-data-sharing/LICENSE +16 -0
- package/bin/skills/clickzetta-data-sharing/SKILL.md +160 -0
- package/bin/skills/clickzetta-data-sharing/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +134 -0
- package/bin/skills/clickzetta-dba-guide/LICENSE +16 -0
- package/bin/skills/clickzetta-dba-guide/SKILL.md +542 -0
- package/bin/skills/clickzetta-dba-guide/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-dw-modeling/LICENSE +16 -0
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +351 -0
- package/bin/skills/clickzetta-dw-modeling/eval_cases.jsonl +4 -0
- package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +100 -0
- package/bin/skills/clickzetta-dynamic-table/LICENSE +16 -0
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +230 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +253 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +124 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +96 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +109 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +135 -0
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +15 -0
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +185 -0
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +427 -0
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +260 -0
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +80 -0
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +190 -0
- package/bin/skills/clickzetta-dynamic-table/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/SKILL.md +27 -0
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-column-validation-rules.md +118 -0
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-conversion-rules.md +225 -0
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-placeholder-rules.md +182 -0
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-refresh-rules.md +98 -0
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-self-reference-rules.md +76 -0
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-workflow.md +109 -0
- package/bin/skills/clickzetta-external-catalog/LICENSE +16 -0
- package/bin/skills/clickzetta-external-catalog/SKILL.md +123 -0
- package/bin/skills/clickzetta-external-catalog/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +130 -0
- package/bin/skills/clickzetta-external-function/LICENSE +16 -0
- package/bin/skills/clickzetta-external-function/SKILL.md +203 -0
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -0
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +171 -0
- package/bin/skills/clickzetta-file-import-pipeline/LICENSE +16 -0
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +190 -0
- package/bin/skills/clickzetta-file-import-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-index-manager/LICENSE +16 -0
- package/bin/skills/clickzetta-index-manager/SKILL.md +140 -0
- package/bin/skills/clickzetta-index-manager/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +67 -0
- package/bin/skills/clickzetta-index-manager/references/index-management.md +73 -0
- package/bin/skills/clickzetta-index-manager/references/inverted-index.md +80 -0
- package/bin/skills/clickzetta-index-manager/references/vector-index.md +81 -0
- package/bin/skills/clickzetta-java-sdk/LICENSE +16 -0
- package/bin/skills/clickzetta-java-sdk/SKILL.md +186 -0
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +163 -0
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +212 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/LICENSE +16 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +769 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +324 -0
- package/bin/skills/clickzetta-lakehouse-connect/LICENSE +16 -0
- package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +218 -0
- package/bin/skills/clickzetta-lakehouse-connect/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +35 -0
- package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +435 -0
- package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +478 -0
- package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +225 -0
- package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +468 -0
- package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +445 -0
- package/bin/skills/clickzetta-manage-comments/LICENSE +16 -0
- package/bin/skills/clickzetta-manage-comments/SKILL.md +219 -0
- package/bin/skills/clickzetta-manage-comments/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-metadata/LICENSE +16 -0
- package/bin/skills/clickzetta-metadata/SKILL.md +502 -0
- package/bin/skills/clickzetta-metadata/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-metadata/references/instance-views-reference.md +276 -0
- package/bin/skills/clickzetta-metadata/references/metering-views-reference.md +137 -0
- package/bin/skills/clickzetta-metadata/references/show-desc-reference.md +326 -0
- package/bin/skills/clickzetta-metadata/references/views-reference.md +271 -0
- package/bin/skills/clickzetta-monitoring/LICENSE +16 -0
- package/bin/skills/clickzetta-monitoring/SKILL.md +215 -0
- package/bin/skills/clickzetta-monitoring/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +97 -0
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +48 -0
- package/bin/skills/clickzetta-oss-ingest-pipeline/LICENSE +16 -0
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +562 -0
- package/bin/skills/clickzetta-oss-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-overview/LICENSE +16 -0
- package/bin/skills/clickzetta-overview/SKILL.md +102 -0
- package/bin/skills/clickzetta-overview/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-overview/references/brands-and-endpoints.md +79 -0
- package/bin/skills/clickzetta-overview/references/object-model.md +311 -0
- package/bin/skills/clickzetta-overview/references/studio-modules.md +173 -0
- package/bin/skills/clickzetta-pipeline-review/LICENSE +16 -0
- package/bin/skills/clickzetta-pipeline-review/SKILL.md +377 -0
- package/bin/skills/clickzetta-query-optimizer/LICENSE +16 -0
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +156 -0
- package/bin/skills/clickzetta-query-optimizer/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +56 -0
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +78 -0
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +65 -0
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +49 -0
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +42 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/LICENSE +16 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +323 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-semantic-view/LICENSE +16 -0
- package/bin/skills/clickzetta-semantic-view/SKILL.md +207 -0
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +167 -0
- package/bin/skills/clickzetta-spark-flink-connector/LICENSE +16 -0
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +92 -0
- package/bin/skills/clickzetta-spark-flink-connector/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +147 -0
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +132 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/LICENSE +16 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +485 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +166 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +185 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +129 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +222 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +125 -0
- package/bin/skills/clickzetta-sql-syntax-guide/LICENSE +16 -0
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +249 -0
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +3 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +279 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +504 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +260 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +382 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/LICENSE +16 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +652 -0
- package/bin/skills/clickzetta-table-lineage/LICENSE +16 -0
- package/bin/skills/clickzetta-table-lineage/SKILL.md +90 -0
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -0
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +14 -0
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +38 -0
- package/bin/skills/clickzetta-table-lineage/references/table_lineage_standalone.html +562 -0
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +25 -0
- package/bin/skills/clickzetta-table-stream-pipeline/LICENSE +16 -0
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +206 -0
- package/bin/skills/clickzetta-table-stream-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-vcluster-manager/LICENSE +16 -0
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +212 -0
- package/bin/skills/clickzetta-vcluster-manager/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +54 -0
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +150 -0
- package/bin/skills/clickzetta-volume-manager/LICENSE +16 -0
- package/bin/skills/clickzetta-volume-manager/SKILL.md +292 -0
- package/bin/skills/clickzetta-volume-manager/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +199 -0
- package/bin/skills/clickzetta-zettapark/LICENSE +16 -0
- package/bin/skills/clickzetta-zettapark/SKILL.md +248 -0
- package/bin/skills/clickzetta-zettapark/eval_cases.jsonl +12 -0
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +283 -0
- package/bin/skills/cz-cli/SKILL.md +313 -0
- package/bin/skills/cz-cli/references/profile-setup.md +120 -0
- package/package.json +1 -1
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: clickzetta-table-lineage
|
|
3
|
+
description: |
|
|
4
|
+
表血缘可视化工具。从 ClickZetta information_schema.job_history 获取表依赖关系和成本数据,
|
|
5
|
+
导出 CSV 后嵌入 HTML 模板生成交互式血缘图。
|
|
6
|
+
当用户说"表血缘"、"table lineage"、"依赖关系图"、"数据流向"、"上下游分析"、
|
|
7
|
+
"血缘可视化"、"pipeline 可视化"时触发。
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# 表血缘可视化工作流
|
|
11
|
+
|
|
12
|
+
## 参考文件
|
|
13
|
+
|
|
14
|
+
| 文件 | 说明 |
|
|
15
|
+
|------|------|
|
|
16
|
+
| `references/normalize_func.sql` | 归一化 UDF 定义(`__normalize_table` 和 `__normalize_objects`) |
|
|
17
|
+
| `references/table_relation.sql` | 表关系查询 SQL(依赖 UDF,`{N}` 为天数占位符) |
|
|
18
|
+
| `references/table_cost.sql` | 表成本查询 SQL(依赖 UDF,`{N}` 为天数占位符) |
|
|
19
|
+
| `references/table_lineage_standalone.html` | 可视化 HTML 模板 |
|
|
20
|
+
|
|
21
|
+
## 指令
|
|
22
|
+
|
|
23
|
+
### 步骤 0:确定时间范围
|
|
24
|
+
|
|
25
|
+
询问用户需要分析多长时间的血缘数据。默认 1 天。用户可指定天数如 1、7、30 等。
|
|
26
|
+
SQL 中的 `{N}` 占位符将替换为用户指定的天数。
|
|
27
|
+
|
|
28
|
+
### 步骤 1:创建归一化 UDF
|
|
29
|
+
|
|
30
|
+
通过 cz-cli sql -f 执行 `references/normalize_func.sql`(已存在则跳过)。
|
|
31
|
+
|
|
32
|
+
### 步骤 2:导出表关系数据
|
|
33
|
+
|
|
34
|
+
读取 `references/table_relation.sql`,将 `{N}` 替换为用户指定的天数,通过 cz-cli sql --no-limit 执行,将结果保存为 `table_relation.csv`。
|
|
35
|
+
|
|
36
|
+
### 步骤 3:导出表成本数据
|
|
37
|
+
|
|
38
|
+
读取 `references/table_cost.sql`,将 `{N}` 替换为用户指定的天数,通过 cz-cli sql --no-limit 执行,将结果保存为 `table_cost.csv`。
|
|
39
|
+
|
|
40
|
+
### 步骤 4:生成可视化页面
|
|
41
|
+
|
|
42
|
+
1. 读取 `references/table_lineage_standalone.html` 作为模板
|
|
43
|
+
2. 找到注释 `<!-- Data injection point` 所在行,在其**后面**插入:
|
|
44
|
+
|
|
45
|
+
```html
|
|
46
|
+
<script>
|
|
47
|
+
window.LINEAGE_DATA = {
|
|
48
|
+
relation: `...table_relation.csv 原始文本...`,
|
|
49
|
+
cost: `...table_cost.csv 原始文本...`
|
|
50
|
+
};
|
|
51
|
+
</script>
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
3. 将结果写入目标文件(如 `table_lineage.html`),用浏览器打开。
|
|
55
|
+
|
|
56
|
+
页面检测到 `window.LINEAGE_DATA` 后自动渲染,跳过文件选择。
|
|
57
|
+
|
|
58
|
+
### 步骤 5:引导用户使用可视化功能
|
|
59
|
+
|
|
60
|
+
- **点击节点**:高亮上游(橙色)和下游(青色)完整依赖路径
|
|
61
|
+
- **搜索**:顶部搜索框过滤表名(快捷键 `/` 或 `Cmd+K`)
|
|
62
|
+
- **缩放/平移**:鼠标滚轮缩放,拖拽平移,`F` 键适配屏幕
|
|
63
|
+
- **右下角小地图**:点击或拖拽快速导航
|
|
64
|
+
- **主题切换**:支持亮色/暗色主题
|
|
65
|
+
- **悬停查看详情**:DML CRU/day、累计成本、查询成本等指标
|
|
66
|
+
|
|
67
|
+
## 平台特有知识
|
|
68
|
+
|
|
69
|
+
- `information_schema.job_history` 的 `input_objects` 和 `output_objects` 是逗号分隔的表名列表
|
|
70
|
+
- 归一化通过 UDF `public.__normalize_table` 和 `public.__normalize_objects` 完成,首次使用需创建
|
|
71
|
+
- Kafka 源表名格式:`xxx_$kafka$_yyy`,归一化为 `KAFKA.xxx`
|
|
72
|
+
- Volume 源表名格式:`xxx_t_<32位hash>`,归一化为 `VOLUME.xxx`
|
|
73
|
+
- `__delta__`、`__incr__`、`__DIRECTORY__EXTERNAL__` 中间表/目录被过滤
|
|
74
|
+
- `COMPACTION_JOB` 类型作业不参与血缘构建
|
|
75
|
+
- 有 output 的作业视为产出作业(DML),无 output 的视为查询作业
|
|
76
|
+
- 成本数据为日均值:总量除以查询天数
|
|
77
|
+
|
|
78
|
+
## 故障排除
|
|
79
|
+
|
|
80
|
+
可视化为空
|
|
81
|
+
原因:缺少作业运行历史
|
|
82
|
+
解决方案:首先确认表关系和表成本 sql 正确运行,若结果为空,是正常现象。
|
|
83
|
+
|
|
84
|
+
节点过多导致卡顿
|
|
85
|
+
原因:浏览器渲染大量 DOM 节点
|
|
86
|
+
解决方案:在 SQL 查询中添加 schema 过滤条件,缩小分析范围
|
|
87
|
+
|
|
88
|
+
查询 job_history 超时
|
|
89
|
+
原因:数据量过大
|
|
90
|
+
解决方案:缩短时间窗口,如 `interval 30 day` 改为 `interval 1 day`
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"case_id":"001","type":"should_call","user_input":"分析过去 7 天的表血缘关系,生成可视化页面","expected_skill":"clickzetta-table-lineage"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
CREATE OR REPLACE FUNCTION public.__normalize_table(t STRING)
|
|
2
|
+
RETURNS STRING
|
|
3
|
+
RETURN case when contains(t, '__delta__') or contains(t, '__incr__') then NULL -- remove delta/incr tables
|
|
4
|
+
when contains(t, '__DIRECTORY__EXTERNAL__') then NULL -- show volume directory
|
|
5
|
+
when contains(t, '_$kafka$_') then regexp_replace(t, r'([\w\.\-]+)_\$kafka\$_\w+$', r'KAFKA.$1') -- kafka pipe
|
|
6
|
+
when t rlike r'_t_\w{32}$' then regexp_replace(t, r'([\w\.]+)_t_\w{32}$', r'VOLUME.$1') -- volume
|
|
7
|
+
else t -- as it is
|
|
8
|
+
end
|
|
9
|
+
;
|
|
10
|
+
|
|
11
|
+
CREATE OR REPLACE FUNCTION public.__normalize_objects(ts ARRAY<STRING>)
|
|
12
|
+
RETURNS ARRAY<STRING>
|
|
13
|
+
RETURN TRANSFORM(FILTER(ts, x -> x is not null and x != ''), x -> public.__normalize_table(x))
|
|
14
|
+
;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
-- 根据过去 {N} 天的作业运行情况,计算表的产出代价和查询量
|
|
2
|
+
with raw as (
|
|
3
|
+
select cru, split(input_objects, ',') as input, split(output_objects, ',') as output
|
|
4
|
+
from information_schema.job_history
|
|
5
|
+
where start_time>=now() - interval {N} day
|
|
6
|
+
),
|
|
7
|
+
normalized as (
|
|
8
|
+
select cru,
|
|
9
|
+
public.__normalize_objects(input) as input,
|
|
10
|
+
public.__normalize_objects(output) as output
|
|
11
|
+
from raw
|
|
12
|
+
),
|
|
13
|
+
as_output (
|
|
14
|
+
select table_name, sum(cru) as dml_cru, count(1) as dml_job_cnt
|
|
15
|
+
from (
|
|
16
|
+
select explode(output) as table_name, cru
|
|
17
|
+
from normalized
|
|
18
|
+
where output is not null and size(output) > 0 -- 有 output 的作业认为是产出作业
|
|
19
|
+
)
|
|
20
|
+
group by table_name
|
|
21
|
+
),
|
|
22
|
+
as_input (
|
|
23
|
+
select table_name, sum(cru) as query_cru, count(1) as query_job_cnt
|
|
24
|
+
from (
|
|
25
|
+
select explode(input) as table_name, cru
|
|
26
|
+
from normalized
|
|
27
|
+
where output is null or size(output) == 0 -- 没有 output 的作业认为是查询作业
|
|
28
|
+
)
|
|
29
|
+
where not contains(table_name, '__dql__') -- 过滤掉 show tables/pipes 之类查询
|
|
30
|
+
and not starts_with(table_name, 'system_meta_warehouse.information_schema.') -- 过滤掉查询 information_schema
|
|
31
|
+
group by table_name
|
|
32
|
+
)
|
|
33
|
+
select coalesce(a.table_name, b.table_name) as table_name,
|
|
34
|
+
-- per day
|
|
35
|
+
round(dml_cru / {N}, 3) as dml_cru, dml_job_cnt / {N} as dml_job_cnt,
|
|
36
|
+
round(query_cru / {N}, 3) as query_cru, query_job_cnt / {N} as query_job_cnt
|
|
37
|
+
from as_output a full join as_input b on a.table_name = b.table_name
|
|
38
|
+
;
|