@clickzetta/cz-cli-darwin-x64 0.3.78 → 0.3.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-access-control/LICENSE +0 -16
- package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
- package/bin/skills/clickzetta-access-control/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
- package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
- package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
- package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
- package/bin/skills/clickzetta-app-python-sdk/LICENSE +0 -16
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
- package/bin/skills/clickzetta-app-python-sdk/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
- package/bin/skills/clickzetta-batch-sync-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -227
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-bi-connect/LICENSE +0 -16
- package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
- package/bin/skills/clickzetta-bi-connect/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
- package/bin/skills/clickzetta-cdc-sync-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -633
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-data-ingest-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -237
- package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-data-retention/LICENSE +0 -16
- package/bin/skills/clickzetta-data-retention/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-retention/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-data-retention/references/lifecycle-reference.md +0 -175
- package/bin/skills/clickzetta-data-science/LICENSE +0 -16
- package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
- package/bin/skills/clickzetta-data-science/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
- package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
- package/bin/skills/clickzetta-data-sharing/LICENSE +0 -16
- package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-sharing/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
- package/bin/skills/clickzetta-dba-guide/LICENSE +0 -16
- package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -542
- package/bin/skills/clickzetta-dba-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-dw-modeling/LICENSE +0 -16
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -351
- package/bin/skills/clickzetta-dw-modeling/eval_cases.jsonl +0 -4
- package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
- package/bin/skills/clickzetta-dynamic-table/LICENSE +0 -16
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -230
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -253
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -427
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
- package/bin/skills/clickzetta-dynamic-table/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/SKILL.md +0 -27
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-column-validation-rules.md +0 -118
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-conversion-rules.md +0 -225
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-placeholder-rules.md +0 -182
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-refresh-rules.md +0 -98
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-self-reference-rules.md +0 -76
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-workflow.md +0 -109
- package/bin/skills/clickzetta-external-catalog/LICENSE +0 -16
- package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -123
- package/bin/skills/clickzetta-external-catalog/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
- package/bin/skills/clickzetta-external-function/LICENSE +0 -16
- package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +0 -4
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
- package/bin/skills/clickzetta-file-import-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -190
- package/bin/skills/clickzetta-file-import-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-index-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
- package/bin/skills/clickzetta-index-manager/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
- package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
- package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
- package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
- package/bin/skills/clickzetta-java-sdk/LICENSE +0 -16
- package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
- package/bin/skills/clickzetta-kafka-ingest-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -769
- package/bin/skills/clickzetta-kafka-ingest-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
- package/bin/skills/clickzetta-lakehouse-connect/LICENSE +0 -16
- package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
- package/bin/skills/clickzetta-lakehouse-connect/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
- package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
- package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
- package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
- package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
- package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
- package/bin/skills/clickzetta-manage-comments/LICENSE +0 -16
- package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
- package/bin/skills/clickzetta-manage-comments/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-metadata/LICENSE +0 -16
- package/bin/skills/clickzetta-metadata/SKILL.md +0 -502
- package/bin/skills/clickzetta-metadata/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-metadata/references/instance-views-reference.md +0 -276
- package/bin/skills/clickzetta-metadata/references/metering-views-reference.md +0 -137
- package/bin/skills/clickzetta-metadata/references/show-desc-reference.md +0 -326
- package/bin/skills/clickzetta-metadata/references/views-reference.md +0 -271
- package/bin/skills/clickzetta-monitoring/LICENSE +0 -16
- package/bin/skills/clickzetta-monitoring/SKILL.md +0 -215
- package/bin/skills/clickzetta-monitoring/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
- package/bin/skills/clickzetta-oss-ingest-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -562
- package/bin/skills/clickzetta-oss-ingest-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-overview/LICENSE +0 -16
- package/bin/skills/clickzetta-overview/SKILL.md +0 -102
- package/bin/skills/clickzetta-overview/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-overview/references/brands-and-endpoints.md +0 -79
- package/bin/skills/clickzetta-overview/references/object-model.md +0 -311
- package/bin/skills/clickzetta-overview/references/studio-modules.md +0 -173
- package/bin/skills/clickzetta-pipeline-review/LICENSE +0 -16
- package/bin/skills/clickzetta-pipeline-review/SKILL.md +0 -377
- package/bin/skills/clickzetta-query-optimizer/LICENSE +0 -16
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
- package/bin/skills/clickzetta-query-optimizer/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
- package/bin/skills/clickzetta-realtime-sync-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -323
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-semantic-view/LICENSE +0 -16
- package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
- package/bin/skills/clickzetta-spark-flink-connector/LICENSE +0 -16
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
- package/bin/skills/clickzetta-spark-flink-connector/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
- package/bin/skills/clickzetta-sql-pipeline-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -485
- package/bin/skills/clickzetta-sql-pipeline-manager/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
- package/bin/skills/clickzetta-sql-syntax-guide/LICENSE +0 -16
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- package/bin/skills/clickzetta-studio-task-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +0 -652
- package/bin/skills/clickzetta-table-lineage/LICENSE +0 -16
- package/bin/skills/clickzetta-table-lineage/SKILL.md +0 -90
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +0 -1
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +0 -14
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +0 -38
- package/bin/skills/clickzetta-table-lineage/references/table_lineage_standalone.html +0 -562
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +0 -25
- package/bin/skills/clickzetta-table-stream-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
- package/bin/skills/clickzetta-table-stream-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-vcluster-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
- package/bin/skills/clickzetta-vcluster-manager/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
- package/bin/skills/clickzetta-volume-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
- package/bin/skills/clickzetta-volume-manager/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
- package/bin/skills/clickzetta-zettapark/LICENSE +0 -16
- package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
- package/bin/skills/clickzetta-zettapark/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
- package/bin/skills/cz-cli/SKILL.md +0 -311
- package/bin/skills/cz-cli/references/profile-setup.md +0 -120
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: clickzetta-table-lineage
|
|
3
|
-
description: |
|
|
4
|
-
表血缘可视化工具。从 ClickZetta information_schema.job_history 获取表依赖关系和成本数据,
|
|
5
|
-
导出 CSV 后嵌入 HTML 模板生成交互式血缘图。
|
|
6
|
-
当用户说"表血缘"、"table lineage"、"依赖关系图"、"数据流向"、"上下游分析"、
|
|
7
|
-
"血缘可视化"、"pipeline 可视化"时触发。
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
# 表血缘可视化工作流
|
|
11
|
-
|
|
12
|
-
## 参考文件
|
|
13
|
-
|
|
14
|
-
| 文件 | 说明 |
|
|
15
|
-
|------|------|
|
|
16
|
-
| `references/normalize_func.sql` | 归一化 UDF 定义(`__normalize_table` 和 `__normalize_objects`) |
|
|
17
|
-
| `references/table_relation.sql` | 表关系查询 SQL(依赖 UDF,`{N}` 为天数占位符) |
|
|
18
|
-
| `references/table_cost.sql` | 表成本查询 SQL(依赖 UDF,`{N}` 为天数占位符) |
|
|
19
|
-
| `references/table_lineage_standalone.html` | 可视化 HTML 模板 |
|
|
20
|
-
|
|
21
|
-
## 指令
|
|
22
|
-
|
|
23
|
-
### 步骤 0:确定时间范围
|
|
24
|
-
|
|
25
|
-
询问用户需要分析多长时间的血缘数据。默认 1 天。用户可指定天数如 1、7、30 等。
|
|
26
|
-
SQL 中的 `{N}` 占位符将替换为用户指定的天数。
|
|
27
|
-
|
|
28
|
-
### 步骤 1:创建归一化 UDF
|
|
29
|
-
|
|
30
|
-
通过 cz-cli sql -f 执行 `references/normalize_func.sql`(已存在则跳过)。
|
|
31
|
-
|
|
32
|
-
### 步骤 2:导出表关系数据
|
|
33
|
-
|
|
34
|
-
读取 `references/table_relation.sql`,将 `{N}` 替换为用户指定的天数,通过 cz-cli sql --no-limit 执行,将结果保存为 `table_relation.csv`。
|
|
35
|
-
|
|
36
|
-
### 步骤 3:导出表成本数据
|
|
37
|
-
|
|
38
|
-
读取 `references/table_cost.sql`,将 `{N}` 替换为用户指定的天数,通过 cz-cli sql --no-limit 执行,将结果保存为 `table_cost.csv`。
|
|
39
|
-
|
|
40
|
-
### 步骤 4:生成可视化页面
|
|
41
|
-
|
|
42
|
-
1. 读取 `references/table_lineage_standalone.html` 作为模板
|
|
43
|
-
2. 找到注释 `<!-- Data injection point` 所在行,在其**后面**插入:
|
|
44
|
-
|
|
45
|
-
```html
|
|
46
|
-
<script>
|
|
47
|
-
window.LINEAGE_DATA = {
|
|
48
|
-
relation: `...table_relation.csv 原始文本...`,
|
|
49
|
-
cost: `...table_cost.csv 原始文本...`
|
|
50
|
-
};
|
|
51
|
-
</script>
|
|
52
|
-
```
|
|
53
|
-
|
|
54
|
-
3. 将结果写入目标文件(如 `table_lineage.html`),用浏览器打开。
|
|
55
|
-
|
|
56
|
-
页面检测到 `window.LINEAGE_DATA` 后自动渲染,跳过文件选择。
|
|
57
|
-
|
|
58
|
-
### 步骤 5:引导用户使用可视化功能
|
|
59
|
-
|
|
60
|
-
- **点击节点**:高亮上游(橙色)和下游(青色)完整依赖路径
|
|
61
|
-
- **搜索**:顶部搜索框过滤表名(快捷键 `/` 或 `Cmd+K`)
|
|
62
|
-
- **缩放/平移**:鼠标滚轮缩放,拖拽平移,`F` 键适配屏幕
|
|
63
|
-
- **右下角小地图**:点击或拖拽快速导航
|
|
64
|
-
- **主题切换**:支持亮色/暗色主题
|
|
65
|
-
- **悬停查看详情**:DML CRU/day、累计成本、查询成本等指标
|
|
66
|
-
|
|
67
|
-
## 平台特有知识
|
|
68
|
-
|
|
69
|
-
- `information_schema.job_history` 的 `input_objects` 和 `output_objects` 是逗号分隔的表名列表
|
|
70
|
-
- 归一化通过 UDF `public.__normalize_table` 和 `public.__normalize_objects` 完成,首次使用需创建
|
|
71
|
-
- Kafka 源表名格式:`xxx_$kafka$_yyy`,归一化为 `KAFKA.xxx`
|
|
72
|
-
- Volume 源表名格式:`xxx_t_<32位hash>`,归一化为 `VOLUME.xxx`
|
|
73
|
-
- `__delta__`、`__incr__`、`__DIRECTORY__EXTERNAL__` 中间表/目录被过滤
|
|
74
|
-
- `COMPACTION_JOB` 类型作业不参与血缘构建
|
|
75
|
-
- 有 output 的作业视为产出作业(DML),无 output 的视为查询作业
|
|
76
|
-
- 成本数据为日均值:总量除以查询天数
|
|
77
|
-
|
|
78
|
-
## 故障排除
|
|
79
|
-
|
|
80
|
-
可视化为空
|
|
81
|
-
原因:缺少作业运行历史
|
|
82
|
-
解决方案:首先确认表关系和表成本 sql 正确运行,若结果为空,是正常现象。
|
|
83
|
-
|
|
84
|
-
节点过多导致卡顿
|
|
85
|
-
原因:浏览器渲染大量 DOM 节点
|
|
86
|
-
解决方案:在 SQL 查询中添加 schema 过滤条件,缩小分析范围
|
|
87
|
-
|
|
88
|
-
查询 job_history 超时
|
|
89
|
-
原因:数据量过大
|
|
90
|
-
解决方案:缩短时间窗口,如 `interval 30 day` 改为 `interval 1 day`
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"case_id":"001","type":"should_call","user_input":"分析过去 7 天的表血缘关系,生成可视化页面","expected_skill":"clickzetta-table-lineage"}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
CREATE OR REPLACE FUNCTION public.__normalize_table(t STRING)
|
|
2
|
-
RETURNS STRING
|
|
3
|
-
RETURN case when contains(t, '__delta__') or contains(t, '__incr__') then NULL -- remove delta/incr tables
|
|
4
|
-
when contains(t, '__DIRECTORY__EXTERNAL__') then NULL -- show volume directory
|
|
5
|
-
when contains(t, '_$kafka$_') then regexp_replace(t, r'([\w\.\-]+)_\$kafka\$_\w+$', r'KAFKA.$1') -- kafka pipe
|
|
6
|
-
when t rlike r'_t_\w{32}$' then regexp_replace(t, r'([\w\.]+)_t_\w{32}$', r'VOLUME.$1') -- volume
|
|
7
|
-
else t -- as it is
|
|
8
|
-
end
|
|
9
|
-
;
|
|
10
|
-
|
|
11
|
-
CREATE OR REPLACE FUNCTION public.__normalize_objects(ts ARRAY<STRING>)
|
|
12
|
-
RETURNS ARRAY<STRING>
|
|
13
|
-
RETURN TRANSFORM(FILTER(ts, x -> x is not null and x != ''), x -> public.__normalize_table(x))
|
|
14
|
-
;
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
-- 根据过去 {N} 天的作业运行情况,计算表的产出代价和查询量
|
|
2
|
-
with raw as (
|
|
3
|
-
select cru, split(input_objects, ',') as input, split(output_objects, ',') as output
|
|
4
|
-
from information_schema.job_history
|
|
5
|
-
where start_time>=now() - interval {N} day
|
|
6
|
-
),
|
|
7
|
-
normalized as (
|
|
8
|
-
select cru,
|
|
9
|
-
public.__normalize_objects(input) as input,
|
|
10
|
-
public.__normalize_objects(output) as output
|
|
11
|
-
from raw
|
|
12
|
-
),
|
|
13
|
-
as_output (
|
|
14
|
-
select table_name, sum(cru) as dml_cru, count(1) as dml_job_cnt
|
|
15
|
-
from (
|
|
16
|
-
select explode(output) as table_name, cru
|
|
17
|
-
from normalized
|
|
18
|
-
where output is not null and size(output) > 0 -- 有 output 的作业认为是产出作业
|
|
19
|
-
)
|
|
20
|
-
group by table_name
|
|
21
|
-
),
|
|
22
|
-
as_input (
|
|
23
|
-
select table_name, sum(cru) as query_cru, count(1) as query_job_cnt
|
|
24
|
-
from (
|
|
25
|
-
select explode(input) as table_name, cru
|
|
26
|
-
from normalized
|
|
27
|
-
where output is null or size(output) == 0 -- 没有 output 的作业认为是查询作业
|
|
28
|
-
)
|
|
29
|
-
where not contains(table_name, '__dql__') -- 过滤掉 show tables/pipes 之类查询
|
|
30
|
-
and not starts_with(table_name, 'system_meta_warehouse.information_schema.') -- 过滤掉查询 information_schema
|
|
31
|
-
group by table_name
|
|
32
|
-
)
|
|
33
|
-
select coalesce(a.table_name, b.table_name) as table_name,
|
|
34
|
-
-- per day
|
|
35
|
-
round(dml_cru / {N}, 3) as dml_cru, dml_job_cnt / {N} as dml_job_cnt,
|
|
36
|
-
round(query_cru / {N}, 3) as query_cru, query_job_cnt / {N} as query_job_cnt
|
|
37
|
-
from as_output a full join as_input b on a.table_name = b.table_name
|
|
38
|
-
;
|