@clickzetta/cz-cli-darwin-x64 0.3.91 → 0.3.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
- package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
- package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
- package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
- package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
- package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
- package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
- package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
- package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
- package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
- package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
- package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
- package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
- package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
- package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
- package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
- package/package.json +1 -1
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
|
@@ -1,90 +1,92 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: clickzetta-table-lineage
|
|
3
3
|
description: |
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
"
|
|
4
|
+
Table lineage visualization tool for Clickzetta lakehouse only.
|
|
5
|
+
Retrieves table dependency relationships and cost data by analyzing actual jobs in lakehouse(information_schema.job_history),
|
|
6
|
+
exports CSV and embeds into an HTML template to generate an interactive lineage graph.
|
|
7
|
+
Trigger when user says "table lineage", "dependency graph", "data flow", "upstream/downstream analysis",
|
|
8
|
+
"lineage visualization", or "pipeline visualization".
|
|
8
9
|
---
|
|
9
10
|
|
|
10
|
-
#
|
|
11
|
+
# Table Lineage Visualization Workflow
|
|
11
12
|
|
|
12
|
-
##
|
|
13
|
+
## Reference Files
|
|
13
14
|
|
|
14
|
-
|
|
|
15
|
-
|
|
16
|
-
| `references/normalize_func.sql` |
|
|
17
|
-
| `references/table_relation.sql` |
|
|
18
|
-
| `references/table_cost.sql` |
|
|
19
|
-
| `references/table_lineage_standalone.html` |
|
|
15
|
+
| File | Description |
|
|
16
|
+
|------|-------------|
|
|
17
|
+
| `references/normalize_func.sql` | Normalization UDF definitions (`__normalize_table` and `__normalize_objects`) |
|
|
18
|
+
| `references/table_relation.sql` | Table relationship query SQL (depends on UDF, `{N}` is a day-count placeholder) |
|
|
19
|
+
| `references/table_cost.sql` | Table cost query SQL (depends on UDF, `{N}` is a day-count placeholder) |
|
|
20
|
+
| `references/table_lineage_standalone.html` | Visualization HTML template |
|
|
20
21
|
|
|
21
|
-
##
|
|
22
|
+
## Instructions
|
|
22
23
|
|
|
23
|
-
###
|
|
24
|
+
### Step 0: Determine Time Range
|
|
24
25
|
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
Ask the user how many days of lineage data to analyze. Default is 1 day. User can specify days such as 1, 7, 30, etc.
|
|
27
|
+
The `{N}` placeholder in SQL will be replaced with the user-specified number of days.
|
|
27
28
|
|
|
28
|
-
###
|
|
29
|
+
### Step 1: Create and Validate Normalization UDFs
|
|
29
30
|
|
|
30
|
-
|
|
31
|
+
Create UDFs using `references/normalize_func.sql` (skip if already exists).
|
|
32
|
+
Validate UDF using sql `select public.__normalize_table('foo.bar.ods_rt_$kafka$_a9f5be53aeacae016431332a528d11bd')` should return 'KAFKA.foo.bar.ods_t'.
|
|
31
33
|
|
|
32
|
-
###
|
|
34
|
+
### Step 2: Export Table Relationship Data
|
|
33
35
|
|
|
34
|
-
|
|
36
|
+
Read `references/table_relation.sql`, replace `{N}` with the user-specified number of days, execute via cz-cli sql --no-limit, and save the result as `table_relation.csv`.
|
|
35
37
|
|
|
36
|
-
###
|
|
38
|
+
### Step 3: Export Table Cost Data
|
|
37
39
|
|
|
38
|
-
|
|
40
|
+
Read `references/table_cost.sql`, replace `{N}` with the user-specified number of days, execute via cz-cli sql --no-limit, and save the result as `table_cost.csv`.
|
|
39
41
|
|
|
40
|
-
###
|
|
42
|
+
### Step 4: Generate Visualization Page
|
|
41
43
|
|
|
42
|
-
1.
|
|
43
|
-
2.
|
|
44
|
+
1. Read `references/table_lineage_standalone.html` as the template
|
|
45
|
+
2. Find the line containing the comment `<!-- Data injection point`, and insert **after** it:
|
|
44
46
|
|
|
45
47
|
```html
|
|
46
48
|
<script>
|
|
47
49
|
window.LINEAGE_DATA = {
|
|
48
|
-
relation: `...table_relation.csv
|
|
49
|
-
cost: `...table_cost.csv
|
|
50
|
+
relation: `...table_relation.csv raw text...`,
|
|
51
|
+
cost: `...table_cost.csv raw text...`
|
|
50
52
|
};
|
|
51
53
|
</script>
|
|
52
54
|
```
|
|
53
55
|
|
|
54
|
-
3.
|
|
56
|
+
3. Write the result to the target file (e.g., `table_lineage.html`) and open it in a browser.
|
|
55
57
|
|
|
56
|
-
|
|
58
|
+
The page detects `window.LINEAGE_DATA` and renders automatically, skipping the file picker.
|
|
57
59
|
|
|
58
|
-
###
|
|
60
|
+
### Step 5: Guide User Through Visualization Features
|
|
59
61
|
|
|
60
|
-
-
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
64
|
-
-
|
|
65
|
-
-
|
|
62
|
+
- **Click a node**: Highlights the full upstream (orange) and downstream (cyan) dependency paths
|
|
63
|
+
- **Search**: Top search box filters table names (shortcut `/` or `Cmd+K`)
|
|
64
|
+
- **Zoom/Pan**: Mouse wheel to zoom, drag to pan, `F` key to fit screen
|
|
65
|
+
- **Minimap (bottom-right)**: Click or drag for quick navigation
|
|
66
|
+
- **Theme toggle**: Supports light/dark themes
|
|
67
|
+
- **Hover for details**: DML CRU/day, cumulative cost, query cost metrics
|
|
66
68
|
|
|
67
|
-
##
|
|
69
|
+
## Platform-Specific Knowledge
|
|
68
70
|
|
|
69
|
-
- `information_schema.job_history`
|
|
70
|
-
-
|
|
71
|
-
- Kafka
|
|
72
|
-
- Volume
|
|
73
|
-
- `__delta__
|
|
74
|
-
- `COMPACTION_JOB`
|
|
75
|
-
-
|
|
76
|
-
-
|
|
71
|
+
- `information_schema.job_history`'s `input_objects` and `output_objects` are comma-separated table name lists
|
|
72
|
+
- Normalization is done via UDFs `public.__normalize_table` and `public.__normalize_objects`; must be created before first use
|
|
73
|
+
- Kafka source table name format: `xxx_$kafka$_yyy`, normalized to `KAFKA.xxx`
|
|
74
|
+
- Volume source table name format: `xxx_t_<32-char hash>`, normalized to `VOLUME.xxx`
|
|
75
|
+
- Intermediate tables/directories `__delta__`, `__incr__`, `__DIRECTORY__EXTERNAL__` are filtered out
|
|
76
|
+
- `COMPACTION_JOB` type jobs are excluded from lineage construction
|
|
77
|
+
- Jobs with output are treated as production jobs (DML); jobs without output are treated as query jobs
|
|
78
|
+
- Cost data is a daily average: total divided by the number of queried days
|
|
77
79
|
|
|
78
|
-
##
|
|
80
|
+
## Troubleshooting
|
|
79
81
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
82
|
+
Visualization is empty
|
|
83
|
+
Cause: No job execution history available
|
|
84
|
+
Solution: First confirm that the table relationship and table cost SQL queries run correctly. If results are empty, this is expected behavior.
|
|
83
85
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
86
|
+
Too many nodes causing lag
|
|
87
|
+
Cause: Browser rendering too many DOM nodes
|
|
88
|
+
Solution: Add schema filter conditions to the SQL queries to narrow the analysis scope
|
|
87
89
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
90
|
+
job_history query timeout
|
|
91
|
+
Cause: Data volume too large
|
|
92
|
+
Solution: Shorten the time window, e.g., change `interval 30 day` to `interval 1 day`
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"case_id":"001","type":"should_call","user_input":"
|
|
1
|
+
{"case_id":"001","type":"should_call","user_input":"Analyze table lineage over the past 7 days and generate a visualization page","expected_skill":"clickzetta-table-lineage"}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
CREATE OR REPLACE FUNCTION public.__normalize_table(t STRING)
|
|
2
2
|
RETURNS STRING
|
|
3
|
-
RETURN case when contains(t, '__delta__') or contains(t, '__incr__') then NULL -- remove delta/incr tables
|
|
4
|
-
when contains(t, '__DIRECTORY__EXTERNAL__') then NULL --
|
|
5
|
-
when contains(t, '_$kafka$_') then regexp_replace(t, r'([\w\.\-]+)_\$kafka\$_\w+$', r'KAFKA.$1') -- kafka pipe
|
|
6
|
-
when t rlike r'_t_\w{32}$' then regexp_replace(t, r'([\w\.]+)_t_\w{32}$', r'VOLUME.$1') -- volume
|
|
7
|
-
else t -- as
|
|
3
|
+
RETURN case when contains(t, '__delta__') or contains(t, '__incr__') then NULL -- remove delta/incr intermediate tables
|
|
4
|
+
when contains(t, '__DIRECTORY__EXTERNAL__') then NULL -- volume directory listing
|
|
5
|
+
when contains(t, '_$kafka$_') then regexp_replace(t, r'([\w\.\-]+)_\$kafka\$_\w+$', r'KAFKA.$1') -- kafka pipe source
|
|
6
|
+
when t rlike r'_t_\w{32}$' then regexp_replace(t, r'([\w\.]+)_t_\w{32}$', r'VOLUME.$1') -- volume source
|
|
7
|
+
else t -- keep as is
|
|
8
8
|
end
|
|
9
9
|
;
|
|
10
10
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
--
|
|
1
|
+
-- Calculate table production cost and query volume based on job execution over the past {N} days
|
|
2
2
|
with raw as (
|
|
3
3
|
select cru, split(input_objects, ',') as input, split(output_objects, ',') as output
|
|
4
4
|
from information_schema.job_history
|
|
@@ -15,7 +15,7 @@ as_output (
|
|
|
15
15
|
from (
|
|
16
16
|
select explode(output) as table_name, cru
|
|
17
17
|
from normalized
|
|
18
|
-
where output is not null and size(output) > 0 --
|
|
18
|
+
where output is not null and size(output) > 0 -- jobs with output are considered production jobs
|
|
19
19
|
)
|
|
20
20
|
group by table_name
|
|
21
21
|
),
|
|
@@ -24,14 +24,14 @@ as_input (
|
|
|
24
24
|
from (
|
|
25
25
|
select explode(input) as table_name, cru
|
|
26
26
|
from normalized
|
|
27
|
-
where output is null or size(output) == 0 --
|
|
27
|
+
where output is null or size(output) == 0 -- jobs without output are considered query jobs
|
|
28
28
|
)
|
|
29
|
-
where not contains(table_name, '__dql__') --
|
|
30
|
-
and not starts_with(table_name, 'system_meta_warehouse.information_schema.') --
|
|
29
|
+
where not contains(table_name, '__dql__') -- filter out show tables/pipes queries
|
|
30
|
+
and not starts_with(table_name, 'system_meta_warehouse.information_schema.') -- filter out information_schema queries
|
|
31
31
|
group by table_name
|
|
32
32
|
)
|
|
33
33
|
select coalesce(a.table_name, b.table_name) as table_name,
|
|
34
|
-
-- per day
|
|
34
|
+
-- per day averages
|
|
35
35
|
round(dml_cru / {N}, 3) as dml_cru, dml_job_cnt / {N} as dml_job_cnt,
|
|
36
36
|
round(query_cru / {N}, 3) as query_cru, query_job_cnt / {N} as query_job_cnt
|
|
37
37
|
from as_output a full join as_input b on a.table_name = b.table_name
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
--
|
|
1
|
+
-- Build table production lineage graph based on job execution over the past {N} days
|
|
2
2
|
with raw as (
|
|
3
3
|
select split(input_objects, ',') as input, split(output_objects, ',') as output
|
|
4
4
|
from information_schema.job_history
|
|
5
5
|
where start_time>=now() - interval {N} day
|
|
6
6
|
and output_objects is not null
|
|
7
|
-
and job_type != 'COMPACTION_JOB' --
|
|
7
|
+
and job_type != 'COMPACTION_JOB' -- exclude compaction jobs as they add noise to lineage
|
|
8
8
|
),
|
|
9
9
|
normalized as (
|
|
10
10
|
select public.__normalize_objects(input) as input,
|