@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
  3. package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
  4. package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
  5. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
  6. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
  7. package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
  8. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
  9. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
  10. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
  11. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
  12. package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
  13. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
  14. package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
  15. package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
  16. package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
  17. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
  18. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
  19. package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
  20. package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
  21. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
  22. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
  23. package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
  24. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
  25. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
  26. package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
  27. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
  28. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
  29. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
  30. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
  31. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
  32. package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
  33. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
  34. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
  35. package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
  36. package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
  37. package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
  38. package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
  39. package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
  40. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
  41. package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
  42. package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
  43. package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
  44. package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
  45. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
  46. package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
  47. package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
  48. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
  49. package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
  50. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
  51. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
  52. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
  53. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
  54. package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
  55. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
  56. package/package.json +1 -1
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  58. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  59. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  60. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
  61. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  62. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  63. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  64. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  65. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  66. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  67. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  68. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  69. /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
@@ -1,90 +1,92 @@
1
1
  ---
2
2
  name: clickzetta-table-lineage
3
3
  description: |
4
- 表血缘可视化工具。从 ClickZetta information_schema.job_history 获取表依赖关系和成本数据,
5
- 导出 CSV 后嵌入 HTML 模板生成交互式血缘图。
6
- 当用户说"表血缘"、"table lineage"、"依赖关系图"、"数据流向"、"上下游分析"、
7
- "血缘可视化""pipeline 可视化"时触发。
4
+ Table lineage visualization tool for Clickzetta lakehouse only.
5
+ Retrieves table dependency relationships and cost data by analyzing actual jobs in lakehouse(information_schema.job_history),
6
+ exports CSV and embeds into an HTML template to generate an interactive lineage graph.
7
+ Trigger when user says "table lineage", "dependency graph", "data flow", "upstream/downstream analysis",
8
+ "lineage visualization", or "pipeline visualization".
8
9
  ---
9
10
 
10
- # 表血缘可视化工作流
11
+ # Table Lineage Visualization Workflow
11
12
 
12
- ## 参考文件
13
+ ## Reference Files
13
14
 
14
- | 文件 | 说明 |
15
- |------|------|
16
- | `references/normalize_func.sql` | 归一化 UDF 定义(`__normalize_table` `__normalize_objects`) |
17
- | `references/table_relation.sql` | 表关系查询 SQL(依赖 UDF,`{N}` 为天数占位符) |
18
- | `references/table_cost.sql` | 表成本查询 SQL(依赖 UDF,`{N}` 为天数占位符) |
19
- | `references/table_lineage_standalone.html` | 可视化 HTML 模板 |
15
+ | File | Description |
16
+ |------|-------------|
17
+ | `references/normalize_func.sql` | Normalization UDF definitions (`__normalize_table` and `__normalize_objects`) |
18
+ | `references/table_relation.sql` | Table relationship query SQL (depends on UDF, `{N}` is a day-count placeholder) |
19
+ | `references/table_cost.sql` | Table cost query SQL (depends on UDF, `{N}` is a day-count placeholder) |
20
+ | `references/table_lineage_standalone.html` | Visualization HTML template |
20
21
 
21
- ## 指令
22
+ ## Instructions
22
23
 
23
- ### 步骤 0:确定时间范围
24
+ ### Step 0: Determine Time Range
24
25
 
25
- 询问用户需要分析多长时间的血缘数据。默认 1 天。用户可指定天数如 1730 等。
26
- SQL 中的 `{N}` 占位符将替换为用户指定的天数。
26
+ Ask the user how many days of lineage data to analyze. Default is 1 day. User can specify days such as 1, 7, 30, etc.
27
+ The `{N}` placeholder in SQL will be replaced with the user-specified number of days.
27
28
 
28
- ### 步骤 1:创建归一化 UDF
29
+ ### Step 1: Create and Validate Normalization UDFs
29
30
 
30
- 通过 cz-cli sql -f 执行 `references/normalize_func.sql`(已存在则跳过)。
31
+ Create UDFs using `references/normalize_func.sql` (skip if already exists).
32
+ Validate UDF using sql `select public.__normalize_table('foo.bar.ods_rt_$kafka$_a9f5be53aeacae016431332a528d11bd')` should return 'KAFKA.foo.bar.ods_t'.
31
33
 
32
- ### 步骤 2:导出表关系数据
34
+ ### Step 2: Export Table Relationship Data
33
35
 
34
- 读取 `references/table_relation.sql`,将 `{N}` 替换为用户指定的天数,通过 cz-cli sql --no-limit 执行,将结果保存为 `table_relation.csv`。
36
+ Read `references/table_relation.sql`, replace `{N}` with the user-specified number of days, execute via cz-cli sql --no-limit, and save the result as `table_relation.csv`.
35
37
 
36
- ### 步骤 3:导出表成本数据
38
+ ### Step 3: Export Table Cost Data
37
39
 
38
- 读取 `references/table_cost.sql`,将 `{N}` 替换为用户指定的天数,通过 cz-cli sql --no-limit 执行,将结果保存为 `table_cost.csv`。
40
+ Read `references/table_cost.sql`, replace `{N}` with the user-specified number of days, execute via cz-cli sql --no-limit, and save the result as `table_cost.csv`.
39
41
 
40
- ### 步骤 4:生成可视化页面
42
+ ### Step 4: Generate Visualization Page
41
43
 
42
- 1. 读取 `references/table_lineage_standalone.html` 作为模板
43
- 2. 找到注释 `<!-- Data injection point` 所在行,在其**后面**插入:
44
+ 1. Read `references/table_lineage_standalone.html` as the template
45
+ 2. Find the line containing the comment `<!-- Data injection point`, and insert **after** it:
44
46
 
45
47
  ```html
46
48
  <script>
47
49
  window.LINEAGE_DATA = {
48
- relation: `...table_relation.csv 原始文本...`,
49
- cost: `...table_cost.csv 原始文本...`
50
+ relation: `...table_relation.csv raw text...`,
51
+ cost: `...table_cost.csv raw text...`
50
52
  };
51
53
  </script>
52
54
  ```
53
55
 
54
- 3. 将结果写入目标文件(如 `table_lineage.html`),用浏览器打开。
56
+ 3. Write the result to the target file (e.g., `table_lineage.html`) and open it in a browser.
55
57
 
56
- 页面检测到 `window.LINEAGE_DATA` 后自动渲染,跳过文件选择。
58
+ The page detects `window.LINEAGE_DATA` and renders automatically, skipping the file picker.
57
59
 
58
- ### 步骤 5:引导用户使用可视化功能
60
+ ### Step 5: Guide User Through Visualization Features
59
61
 
60
- - **点击节点**:高亮上游(橙色)和下游(青色)完整依赖路径
61
- - **搜索**:顶部搜索框过滤表名(快捷键 `/` `Cmd+K`)
62
- - **缩放/平移**:鼠标滚轮缩放,拖拽平移,`F` 键适配屏幕
63
- - **右下角小地图**:点击或拖拽快速导航
64
- - **主题切换**:支持亮色/暗色主题
65
- - **悬停查看详情**:DML CRU/day、累计成本、查询成本等指标
62
+ - **Click a node**: Highlights the full upstream (orange) and downstream (cyan) dependency paths
63
+ - **Search**: Top search box filters table names (shortcut `/` or `Cmd+K`)
64
+ - **Zoom/Pan**: Mouse wheel to zoom, drag to pan, `F` key to fit screen
65
+ - **Minimap (bottom-right)**: Click or drag for quick navigation
66
+ - **Theme toggle**: Supports light/dark themes
67
+ - **Hover for details**: DML CRU/day, cumulative cost, query cost metrics
66
68
 
67
- ## 平台特有知识
69
+ ## Platform-Specific Knowledge
68
70
 
69
- - `information_schema.job_history` `input_objects` `output_objects` 是逗号分隔的表名列表
70
- - 归一化通过 UDF `public.__normalize_table` `public.__normalize_objects` 完成,首次使用需创建
71
- - Kafka 源表名格式:`xxx_$kafka$_yyy`,归一化为 `KAFKA.xxx`
72
- - Volume 源表名格式:`xxx_t_<32hash>`,归一化为 `VOLUME.xxx`
73
- - `__delta__`、`__incr__`、`__DIRECTORY__EXTERNAL__` 中间表/目录被过滤
74
- - `COMPACTION_JOB` 类型作业不参与血缘构建
75
- - output 的作业视为产出作业(DML),无 output 的视为查询作业
76
- - 成本数据为日均值:总量除以查询天数
71
+ - `information_schema.job_history`'s `input_objects` and `output_objects` are comma-separated table name lists
72
+ - Normalization is done via UDFs `public.__normalize_table` and `public.__normalize_objects`; must be created before first use
73
+ - Kafka source table name format: `xxx_$kafka$_yyy`, normalized to `KAFKA.xxx`
74
+ - Volume source table name format: `xxx_t_<32-char hash>`, normalized to `VOLUME.xxx`
75
+ - Intermediate tables/directories `__delta__`, `__incr__`, `__DIRECTORY__EXTERNAL__` are filtered out
76
+ - `COMPACTION_JOB` type jobs are excluded from lineage construction
77
+ - Jobs with output are treated as production jobs (DML); jobs without output are treated as query jobs
78
+ - Cost data is a daily average: total divided by the number of queried days
77
79
 
78
- ## 故障排除
80
+ ## Troubleshooting
79
81
 
80
- 可视化为空
81
- 原因:缺少作业运行历史
82
- 解决方案:首先确认表关系和表成本 sql 正确运行,若结果为空,是正常现象。
82
+ Visualization is empty
83
+ Cause: No job execution history available
84
+ Solution: First confirm that the table relationship and table cost SQL queries run correctly. If results are empty, this is expected behavior.
83
85
 
84
- 节点过多导致卡顿
85
- 原因:浏览器渲染大量 DOM 节点
86
- 解决方案:在 SQL 查询中添加 schema 过滤条件,缩小分析范围
86
+ Too many nodes causing lag
87
+ Cause: Browser rendering too many DOM nodes
88
+ Solution: Add schema filter conditions to the SQL queries to narrow the analysis scope
87
89
 
88
- 查询 job_history 超时
89
- 原因:数据量过大
90
- 解决方案:缩短时间窗口,如 `interval 30 day` 改为 `interval 1 day`
90
+ job_history query timeout
91
+ Cause: Data volume too large
92
+ Solution: Shorten the time window, e.g., change `interval 30 day` to `interval 1 day`
@@ -1 +1 @@
1
- {"case_id":"001","type":"should_call","user_input":"分析过去 7 天的表血缘关系,生成可视化页面","expected_skill":"clickzetta-table-lineage"}
1
+ {"case_id":"001","type":"should_call","user_input":"Analyze table lineage over the past 7 days and generate a visualization page","expected_skill":"clickzetta-table-lineage"}
@@ -1,10 +1,10 @@
1
1
  CREATE OR REPLACE FUNCTION public.__normalize_table(t STRING)
2
2
  RETURNS STRING
3
- RETURN case when contains(t, '__delta__') or contains(t, '__incr__') then NULL -- remove delta/incr tables
4
- when contains(t, '__DIRECTORY__EXTERNAL__') then NULL -- show volume directory
5
- when contains(t, '_$kafka$_') then regexp_replace(t, r'([\w\.\-]+)_\$kafka\$_\w+$', r'KAFKA.$1') -- kafka pipe
6
- when t rlike r'_t_\w{32}$' then regexp_replace(t, r'([\w\.]+)_t_\w{32}$', r'VOLUME.$1') -- volume
7
- else t -- as it is
3
+ RETURN case when contains(t, '__delta__') or contains(t, '__incr__') then NULL -- remove delta/incr intermediate tables
4
+ when contains(t, '__DIRECTORY__EXTERNAL__') then NULL -- volume directory listing
5
+ when contains(t, '_$kafka$_') then regexp_replace(t, r'([\w\.\-]+)_\$kafka\$_\w+$', r'KAFKA.$1') -- kafka pipe source
6
+ when t rlike r'_t_\w{32}$' then regexp_replace(t, r'([\w\.]+)_t_\w{32}$', r'VOLUME.$1') -- volume source
7
+ else t -- keep as is
8
8
  end
9
9
  ;
10
10
 
@@ -1,4 +1,4 @@
1
- -- 根据过去 {N} 天的作业运行情况,计算表的产出代价和查询量
1
+ -- Calculate table production cost and query volume based on job execution over the past {N} days
2
2
  with raw as (
3
3
  select cru, split(input_objects, ',') as input, split(output_objects, ',') as output
4
4
  from information_schema.job_history
@@ -15,7 +15,7 @@ as_output (
15
15
  from (
16
16
  select explode(output) as table_name, cru
17
17
  from normalized
18
- where output is not null and size(output) > 0 -- output 的作业认为是产出作业
18
+ where output is not null and size(output) > 0 -- jobs with output are considered production jobs
19
19
  )
20
20
  group by table_name
21
21
  ),
@@ -24,14 +24,14 @@ as_input (
24
24
  from (
25
25
  select explode(input) as table_name, cru
26
26
  from normalized
27
- where output is null or size(output) == 0 -- 没有 output 的作业认为是查询作业
27
+ where output is null or size(output) == 0 -- jobs without output are considered query jobs
28
28
  )
29
- where not contains(table_name, '__dql__') -- 过滤掉 show tables/pipes 之类查询
30
- and not starts_with(table_name, 'system_meta_warehouse.information_schema.') -- 过滤掉查询 information_schema
29
+ where not contains(table_name, '__dql__') -- filter out show tables/pipes queries
30
+ and not starts_with(table_name, 'system_meta_warehouse.information_schema.') -- filter out information_schema queries
31
31
  group by table_name
32
32
  )
33
33
  select coalesce(a.table_name, b.table_name) as table_name,
34
- -- per day
34
+ -- per day averages
35
35
  round(dml_cru / {N}, 3) as dml_cru, dml_job_cnt / {N} as dml_job_cnt,
36
36
  round(query_cru / {N}, 3) as query_cru, query_job_cnt / {N} as query_job_cnt
37
37
  from as_output a full join as_input b on a.table_name = b.table_name
@@ -1,10 +1,10 @@
1
- -- 根据过去 {N} 天的作业运行情况,构建作业涉及的表的产出血缘关系图
1
+ -- Build table production lineage graph based on job execution over the past {N} days
2
2
  with raw as (
3
3
  select split(input_objects, ',') as input, split(output_objects, ',') as output
4
4
  from information_schema.job_history
5
5
  where start_time>=now() - interval {N} day
6
6
  and output_objects is not null
7
- and job_type != 'COMPACTION_JOB' -- 去掉 compaction 作业,对构建血缘关系是干扰项
7
+ and job_type != 'COMPACTION_JOB' -- exclude compaction jobs as they add noise to lineage
8
8
  ),
9
9
  normalized as (
10
10
  select public.__normalize_objects(input) as input,