@clickzetta/cz-cli-darwin-x64 0.3.17 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +386 -0
  3. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +548 -0
  4. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +220 -0
  5. package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +5 -0
  6. package/bin/skills/clickzetta-dynamic-table/SKILL.md +112 -0
  7. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +257 -0
  8. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +124 -0
  9. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +96 -0
  10. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +109 -0
  11. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +156 -0
  12. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +751 -0
  13. package/bin/skills/clickzetta-kafka-ingest-pipeline/eval_cases.jsonl +5 -0
  14. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +324 -0
  15. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +537 -0
  16. package/bin/skills/clickzetta-query-optimizer/SKILL.md +156 -0
  17. package/bin/skills/clickzetta-query-optimizer/references/explain.md +56 -0
  18. package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +78 -0
  19. package/bin/skills/clickzetta-query-optimizer/references/optimize.md +65 -0
  20. package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +49 -0
  21. package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +42 -0
  22. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +276 -0
  23. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +379 -0
  24. package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +166 -0
  25. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +185 -0
  26. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +129 -0
  27. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +222 -0
  28. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +125 -0
  29. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +206 -0
  30. package/bin/skills/clickzetta-vcluster-manager/SKILL.md +212 -0
  31. package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +54 -0
  32. package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +150 -0
  33. package/bin/skills/clickzetta-volume-manager/SKILL.md +292 -0
  34. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +199 -0
  35. package/package.json +1 -1
  36. /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/SKILL.md +0 -0
  37. /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/dt-declaration-strategy.md +0 -0
  38. /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/incremental-config-reference.md +0 -0
  39. /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/refresh-history-guide.md +0 -0
  40. /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/sql-limitations.md +0 -0
  41. /package/bin/skills/{dynamic-table-alter → clickzetta-dynamic-table/dynamic-table-alter}/SKILL.md +0 -0
@@ -0,0 +1,185 @@
1
+ # Dynamic Table(动态表)SQL 参考
2
+
3
+ > **⚠️ ClickZetta 特有语法**
4
+ > - 刷新调度写法:`REFRESH INTERVAL 5 MINUTE vcluster default`(不是 `TARGET_LAG`)
5
+ > - 修改调度周期或计算集群必须用 `CREATE OR REPLACE`,`ALTER` 不支持
6
+ > - `ALTER DYNAMIC TABLE` 只支持:SUSPEND / RESUME / SET COMMENT / RENAME COLUMN / CHANGE COLUMN COMMENT / SET/UNSET PROPERTIES
7
+ > - 删除用 `DROP DYNAMIC TABLE`(不是 `DROP TABLE`)
8
+ > - 恢复用 `UNDROP TABLE`(不是 `UNDROP DYNAMIC TABLE`)
9
+ > - DESC 用 `DESC TABLE name`(不支持 `DESC DYNAMIC TABLE name EXTENDED`)
10
+
11
+ 动态表是 ClickZetta Lakehouse 的核心增量计算对象。通过 SQL 查询定义,自动增量刷新,无需手动调度。
12
+
13
+ ## CREATE DYNAMIC TABLE
14
+
15
+ ```sql
16
+ CREATE [ OR REPLACE ] DYNAMIC TABLE <name>
17
+ [ (<column_list>) ]
18
+ [ PARTITIONED BY (<col_name>) ]
19
+ [ CLUSTERED BY (<col_name>) ]
20
+ [ COMMENT <comment> ]
21
+ [ PROPERTIES ( data_lifecycle = <day_num> ) ]
22
+ REFRESH [ START WITH TIMESTAMP '<timestamp>' ] INTERVAL <n> { SECOND | MINUTE | HOUR | DAY }
23
+ vcluster <vcluster_name>
24
+ AS
25
+ <query>;
26
+ ```
27
+
28
+ **关键参数:**
29
+ - `REFRESH INTERVAL <n> MINUTE`:刷新间隔,最小 1 分钟
30
+ - `vcluster`:运行刷新任务的计算集群名称(直接跟名称,不带等号和引号)
31
+ - `OR REPLACE`:若同名动态表已存在则替换(修改 SQL 逻辑或调度配置必须用此方式)
32
+ - 建议使用 GP 型集群(如 `default`),AP 型集群不支持小文件合并
33
+
34
+ **示例:**
35
+ ```sql
36
+ -- 基础示例:每 5 分钟刷新一次订单汇总
37
+ CREATE OR REPLACE DYNAMIC TABLE dw.order_summary
38
+ REFRESH INTERVAL 5 MINUTE vcluster default
39
+ AS
40
+ SELECT
41
+ date_trunc('hour', created_at) AS hour,
42
+ region,
43
+ COUNT(*) AS order_cnt,
44
+ SUM(amount) AS total_amount
45
+ FROM ods.orders
46
+ GROUP BY 1, 2;
47
+
48
+ -- 修改调度周期(必须用 CREATE OR REPLACE)
49
+ CREATE OR REPLACE DYNAMIC TABLE dw.order_summary
50
+ REFRESH INTERVAL 10 MINUTE vcluster default
51
+ AS
52
+ SELECT
53
+ date_trunc('hour', created_at) AS hour,
54
+ region,
55
+ COUNT(*) AS order_cnt,
56
+ SUM(amount) AS total_amount
57
+ FROM ods.orders
58
+ GROUP BY 1, 2;
59
+ ```
60
+
61
+ ## ALTER DYNAMIC TABLE
62
+
63
+ ```sql
64
+ -- 暂停刷新
65
+ ALTER DYNAMIC TABLE <name> SUSPEND;
66
+
67
+ -- 恢复刷新
68
+ ALTER DYNAMIC TABLE <name> RESUME;
69
+
70
+ -- 修改注释
71
+ ALTER DYNAMIC TABLE <name> SET COMMENT '<comment>';
72
+
73
+ -- 修改列名
74
+ ALTER DYNAMIC TABLE <name> RENAME COLUMN <old_col> TO <new_col>;
75
+
76
+ -- 修改列注释(注意用 CHANGE COLUMN)
77
+ ALTER DYNAMIC TABLE <name> CHANGE COLUMN <col_name> COMMENT '<comment>';
78
+
79
+ -- 修改属性
80
+ ALTER DYNAMIC TABLE <name> SET PROPERTIES ('key' = 'value');
81
+ ALTER DYNAMIC TABLE <name> UNSET PROPERTIES ('key');
82
+ ```
83
+
84
+ > 注意:修改调度周期、计算集群、SQL 查询逻辑,必须用 `CREATE OR REPLACE DYNAMIC TABLE`,ALTER 不支持这些操作。
85
+
86
+ ## REFRESH DYNAMIC TABLE(手动触发)
87
+
88
+ ```sql
89
+ -- 手动触发一次刷新
90
+ REFRESH DYNAMIC TABLE <name>;
91
+ ```
92
+
93
+ ## DROP DYNAMIC TABLE
94
+
95
+ ```sql
96
+ -- ⚠️ 必须用 DROP DYNAMIC TABLE,不能用 DROP TABLE
97
+ DROP DYNAMIC TABLE [ IF EXISTS ] <name>;
98
+
99
+ -- 恢复已删除的动态表(⚠️ 用 UNDROP TABLE,不是 UNDROP DYNAMIC TABLE)
100
+ UNDROP TABLE <name>;
101
+ ```
102
+
103
+ ## SHOW / DESC
104
+
105
+ ```sql
106
+ -- 列出当前 schema 下所有动态表
107
+ SHOW TABLES WHERE is_dynamic = true;
108
+
109
+ -- 列出指定 schema 下的动态表
110
+ SHOW TABLES IN <schema_name> WHERE is_dynamic = true;
111
+
112
+ -- 查看动态表结构
113
+ DESC TABLE <name>;
114
+
115
+ -- 查看完整建表语句
116
+ SHOW CREATE TABLE <name>;
117
+
118
+ -- 查看刷新历史(状态、耗时、触发方式、增量行数)
119
+ SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = '<dt_name>' LIMIT 20;
120
+ ```
121
+
122
+ > ⚠️ **DESC 注意**:动态表用 `DESC TABLE name`,不支持 `DESC DYNAMIC TABLE name EXTENDED`(EXTENDED 会报错)。
123
+
124
+ ## 注意事项
125
+
126
+ - 修改 SQL 逻辑、调度周期、计算集群 → 用 `CREATE OR REPLACE`,不能用 `ALTER`
127
+ - 刷新间隔最小 1 分钟
128
+ - 删除用 `DROP DYNAMIC TABLE`(不是 `DROP TABLE`)
129
+ - 恢复用 `UNDROP TABLE`(不是 `UNDROP DYNAMIC TABLE`)
130
+ - 刷新失败不影响表的可查询性(返回上次成功版本的数据)
131
+ - 非简单加列/减列的 `CREATE OR REPLACE` 会触发一次全量刷新
132
+ - 建议使用 GP 型集群(如 `default`),AP 型集群不支持小文件合并
133
+
134
+ ## 参数化动态表(SESSION_CONFIGS)
135
+
136
+ 通过 `SESSION_CONFIGS()` 函数定义参数化查询,在刷新时传入分区值控制刷新范围:
137
+
138
+ ```sql
139
+ -- 创建参数化动态表
140
+ CREATE OR REPLACE DYNAMIC TABLE dwd.orders_partitioned
141
+ REFRESH INTERVAL 30 MINUTE vcluster default
142
+ AS
143
+ SELECT order_id, user_id, amount, dt
144
+ FROM ods.orders
145
+ WHERE dt = SESSION_CONFIGS('target_date', CAST(CURRENT_DATE() AS STRING));
146
+
147
+ -- 手动触发刷新并传入参数
148
+ REFRESH DYNAMIC TABLE dwd.orders_partitioned
149
+ WITH PROPERTIES ('target_date' = '2024-06-15');
150
+ ```
151
+
152
+ 适用场景:传统按天全量 ETL 改造为增量任务,用 SESSION_CONFIGS 替换调度变量。
153
+
154
+ ## 动态表 DML 操作
155
+
156
+ 动态表默认不支持 DML,需先开启参数(每次 DML 前都需要 SET):
157
+
158
+ ```sql
159
+ -- ⚠️ 必须在同一会话/批次中先执行 SET,再执行 DML
160
+ SET cz.sql.dt.allow.dml = true;
161
+ INSERT INTO <name> VALUES (...);
162
+
163
+ -- 删除
164
+ SET cz.sql.dt.allow.dml = true;
165
+ DELETE FROM <name> WHERE ...;
166
+ ```
167
+
168
+ > ⚠️ **DML 注意事项**:
169
+ > - `SET cz.sql.dt.allow.dml = true` 必须与 DML 语句在同一执行批次中
170
+ > - 执行 DML 后,下一次自动刷新会触发**全量刷新**(而非增量),可能耗时较长
171
+ > - UPDATE 可能因内部隐藏列(`MV__KEY`)报错,建议改用 DELETE + INSERT
172
+ > - 仅在数据修正等特殊场景使用 DML
173
+
174
+ ## 参考文档
175
+
176
+ - [CREATE DYNAMIC TABLE](https://www.yunqi.tech/documents/create-dynamic-table)
177
+ - [ALTER DYNAMIC TABLE](https://www.yunqi.tech/documents/alter-dynamic-table)
178
+ - [DROP DYNAMIC TABLE](https://www.yunqi.tech/documents/drop-dynamic-table)
179
+ - [SHOW DYNAMIC TABLES](https://www.yunqi.tech/documents/show-dynamic-table)
180
+ - [SHOW DYNAMIC TABLE REFRESH HISTORY](https://www.yunqi.tech/documents/refresh-history)
181
+ - [动态表简介](https://www.yunqi.tech/documents/dynamic_table_summary)
182
+ - [查看动态表刷新模式](https://www.yunqi.tech/documents/dynamic-table-incre)
183
+ - [传统离线任务转增量实践](https://www.yunqi.tech/documents/transformt-dt)
184
+ - [动态表支持参数化定义](https://www.yunqi.tech/documents/dynamicTable-parmaters)
185
+ - [动态表支持DML语句修改](https://www.yunqi.tech/documents/dynamicTable-dml)
@@ -0,0 +1,129 @@
1
+ # Materialized View(物化视图)SQL 参考
2
+
3
+ > **⚠️ ClickZetta 特有语法**
4
+ > - 定时刷新:`REFRESH INTERVAL 10 MINUTE vcluster default`(与动态表语法相同)
5
+ > - 手动刷新:`REFRESH MATERIALIZED VIEW <name>;`
6
+ > - 修改注释用 `ALTER TABLE`,不是 `ALTER MATERIALIZED VIEW`
7
+
8
+ 物化视图将查询结果预计算并物理存储,适合固定维度的聚合加速场景。与动态表的区别:物化视图支持手动或定时刷新,不支持增量刷新。
9
+
10
+ ## CREATE MATERIALIZED VIEW
11
+
12
+ ```sql
13
+ CREATE [ OR REPLACE ] MATERIALIZED VIEW <name>
14
+ [ COMMENT = '<comment>' ]
15
+ [ BUILD DEFERRED ]
16
+ [ REFRESH INTERVAL <N> { SECOND | MINUTE | HOUR | DAY } vcluster <vcluster_name> ]
17
+ [ DISABLE QUERY REWRITE ]
18
+ AS
19
+ <query>;
20
+ ```
21
+
22
+ **关键参数:**
23
+ - `REFRESH INTERVAL 10 MINUTE vcluster default`:定时自动刷新(与动态表语法相同)
24
+ - 不写 REFRESH 子句:只能手动触发 `REFRESH MATERIALIZED VIEW <name>;`
25
+ - `BUILD DEFERRED`:延迟构建,创建时不立即计算结果
26
+ - `DISABLE QUERY REWRITE`:禁用查询改写(不自动用 MV 加速查询)
27
+
28
+ **示例:**
29
+ ```sql
30
+ -- 定时自动刷新的物化视图(每 10 分钟)
31
+ CREATE MATERIALIZED VIEW mv_dept_stats
32
+ REFRESH INTERVAL 10 MINUTE vcluster default
33
+ AS
34
+ SELECT
35
+ d.dept_id,
36
+ d.dept_name,
37
+ COUNT(e.emp_id) AS emp_count,
38
+ AVG(e.salary) AS avg_salary
39
+ FROM departments d
40
+ JOIN employees e ON d.dept_id = e.dept_id
41
+ GROUP BY d.dept_id, d.dept_name;
42
+
43
+ -- 修改刷新周期(需要 CREATE OR REPLACE)
44
+ CREATE OR REPLACE MATERIALIZED VIEW mv_dept_stats
45
+ BUILD DEFERRED
46
+ REFRESH INTERVAL 20 MINUTE vcluster default
47
+ DISABLE QUERY REWRITE
48
+ AS
49
+ SELECT
50
+ d.dept_id,
51
+ d.dept_name,
52
+ d.location,
53
+ ANY_VALUE(d.col1) AS col1,
54
+ COUNT(e.emp_id) AS emp_count,
55
+ AVG(e.salary) AS avg_salary
56
+ FROM departments d
57
+ JOIN employees e ON d.dept_id = e.dept_id
58
+ GROUP BY d.dept_id, d.dept_name, d.location;
59
+
60
+ -- 手动刷新
61
+ REFRESH MATERIALIZED VIEW mv_dept_stats;
62
+ ```
63
+
64
+ ## ALTER MATERIALIZED VIEW
65
+
66
+ ```sql
67
+ -- 暂停自动刷新
68
+ ALTER MATERIALIZED VIEW <name> SUSPEND;
69
+
70
+ -- 恢复自动刷新
71
+ ALTER MATERIALIZED VIEW <name> RESUME;
72
+
73
+ -- 修改注释
74
+ ALTER TABLE <mv_name> SET COMMENT '<comment>';
75
+
76
+ -- 修改列注释(物化视图用 ALTER TABLE 语法)
77
+ ALTER TABLE <mv_name> CHANGE COLUMN <col_name> COMMENT '<comment>';
78
+ ```
79
+
80
+ > 注意:物化视图的注释修改使用 `ALTER TABLE`,不是 `ALTER MATERIALIZED VIEW`。
81
+
82
+ ## REFRESH MATERIALIZED VIEW
83
+
84
+ ```sql
85
+ -- 手动触发全量刷新
86
+ REFRESH MATERIALIZED VIEW <name>;
87
+ ```
88
+
89
+ ## DROP MATERIALIZED VIEW
90
+
91
+ ```sql
92
+ DROP MATERIALIZED VIEW [ IF EXISTS ] <name>;
93
+ ```
94
+
95
+ ## SHOW / DESC
96
+
97
+ ```sql
98
+ -- 列出当前 schema 下所有物化视图
99
+ SHOW TABLES WHERE is_materialized_view = true;
100
+
101
+ -- 按名称过滤
102
+ SHOW TABLES LIKE 'mv_%' WHERE is_materialized_view = true;
103
+
104
+ -- 查看物化视图结构
105
+ DESC MATERIALIZED VIEW <name>;
106
+ DESCRIBE MATERIALIZED VIEW <name> EXTENDED;
107
+
108
+ -- 查看完整建表语句
109
+ SHOW CREATE TABLE <name>;
110
+ ```
111
+
112
+ ## 动态表 vs 物化视图 选择指南
113
+
114
+ | 场景 | 推荐 |
115
+ |---|---|
116
+ | 需要秒/分钟级自动增量刷新 | Dynamic Table |
117
+ | 固定聚合,手动或低频刷新 | Materialized View |
118
+ | 需要 CDC 变更感知 | Dynamic Table + Table Stream |
119
+ | 加速 BI 查询,数据不要求实时 | Materialized View |
120
+
121
+ ## 参考文档
122
+
123
+ - [CREATE MATERIALIZED VIEW](https://www.yunqi.tech/documents/CREATEMATERIALIZEDVIEW)
124
+ - [ALTER MATERIALIZED VIEW](https://www.yunqi.tech/documents/alter-materialzied-view)
125
+ - [REFRESH MATERIALIZED VIEW](https://www.yunqi.tech/documents/REFRESH)
126
+ - [DROP MATERIALIZED VIEW](https://www.yunqi.tech/documents/DROPMATERIALIZEDVIEW)
127
+ - [SHOW MATERIALIZED VIEWS](https://www.yunqi.tech/documents/show-materialized-view)
128
+ - [物化视图概念与场景](https://www.yunqi.tech/documents/MATERIALIZEDVIEW)
129
+ - [物化视图 DDL 汇总](https://www.yunqi.tech/documents/materialized_ddl)
@@ -0,0 +1,222 @@
1
+ # Pipe SQL 参考
2
+
3
+ > **⚠️ ClickZetta 特有语法**
4
+ > - Kafka 读取函数是 `read_kafka(...)`,使用**位置参数**(不是命名参数 `=>`)
5
+ > - JSON 字段提取用 `parse_json(value::string)['field']::TYPE` 语法
6
+ > - Pipe 创建后默认自动启动,无需手动 RESUME
7
+ > - OSS Pipe 的 `PURGE=true` 紧跟在 `USING <format>` 之后(如 `USING CSV PURGE=true`)
8
+
9
+ Pipe 是 ClickZetta Lakehouse 的持续数据导入对象,通过 SQL 定义从 Kafka 或对象存储(OSS/S3/COS)自动、持续地将数据导入目标表,无需外部调度。
10
+
11
+ ## CREATE PIPE — 从 Kafka 导入
12
+
13
+ ```sql
14
+ CREATE [ OR REPLACE ] PIPE <pipe_name>
15
+ VIRTUAL_CLUSTER = '<vcluster_name>'
16
+ [ BATCH_INTERVAL_IN_SECONDS = '<seconds>' ]
17
+ [ BATCH_SIZE_PER_KAFKA_PARTITION = '<count>' ]
18
+ [ RESET_KAFKA_GROUP_OFFSETS = '<none|valid|earliest|latest|timestamp_ms>' ]
19
+ [ COPY_JOB_HINT = '<json>' ]
20
+ AS
21
+ COPY INTO <target_table> FROM (
22
+ SELECT <expr> [, ...]
23
+ FROM read_kafka(
24
+ '<bootstrap_servers>', -- 必填:Kafka 集群地址
25
+ '<topic>', -- 必填:Topic 名称
26
+ '', -- 保留(填空字符串)
27
+ '<group_id>', -- 必填:持久消费者组 ID
28
+ '', '', '', '', -- 位置参数留空,由 Pipe 自动管理
29
+ 'raw', -- key 格式(目前只支持 raw)
30
+ 'raw', -- value 格式(目前只支持 raw)
31
+ 0, -- max_errors
32
+ MAP(<kafka_config>) -- Kafka 配置参数
33
+ )
34
+ );
35
+ ```
36
+
37
+ **示例:**
38
+ ```sql
39
+ -- 从 Kafka 持续导入 JSON 数据
40
+ CREATE OR REPLACE PIPE kafka_orders_pipe
41
+ VIRTUAL_CLUSTER = 'default'
42
+ BATCH_INTERVAL_IN_SECONDS = '60'
43
+ AS
44
+ COPY INTO ods.orders FROM (
45
+ SELECT
46
+ j['order_id']::STRING AS order_id,
47
+ j['user_id']::STRING AS user_id,
48
+ j['amount']::DECIMAL(10,2) AS amount,
49
+ j['created_at']::TIMESTAMP AS created_at,
50
+ CAST(`timestamp` AS TIMESTAMP) AS kafka_ts
51
+ FROM (
52
+ SELECT `timestamp`, parse_json(value::string) AS j
53
+ FROM read_kafka(
54
+ 'kafka.example.com:9092',
55
+ 'orders',
56
+ '',
57
+ 'lakehouse_consumer',
58
+ '', '', '', '',
59
+ 'raw', 'raw', 0,
60
+ MAP('kafka.security.protocol', 'PLAINTEXT')
61
+ )
62
+ )
63
+ );
64
+
65
+ -- SASL 认证
66
+ CREATE PIPE kafka_secure_pipe
67
+ VIRTUAL_CLUSTER = 'pipe_vc'
68
+ BATCH_INTERVAL_IN_SECONDS = '60'
69
+ AS
70
+ COPY INTO ods.secure_events FROM (
71
+ SELECT parse_json(value::string)['id']::STRING AS id,
72
+ CAST(`timestamp` AS TIMESTAMP) AS kafka_ts
73
+ FROM read_kafka(
74
+ 'kafka.example.com:9092', 'secure_events', '', 'cz_secure',
75
+ '', '', '', '', 'raw', 'raw', 0,
76
+ MAP(
77
+ 'kafka.security.protocol', 'SASL_PLAINTEXT',
78
+ 'kafka.sasl.mechanism', 'PLAIN',
79
+ 'kafka.sasl.username', 'my_user',
80
+ 'kafka.sasl.password', 'my_password'
81
+ )
82
+ )
83
+ );
84
+ ```
85
+
86
+ ## 验证 Kafka 连接(创建 Pipe 前)
87
+
88
+ 独立使用 `read_kafka` 探查数据时,可以在 MAP 中设置 `kafka.auto.offset.reset`:
89
+
90
+ ```sql
91
+ -- 验证连接和数据格式
92
+ SELECT value::string
93
+ FROM read_kafka(
94
+ 'kafka.example.com:9092',
95
+ 'orders',
96
+ '',
97
+ 'test_explore',
98
+ '', '', '', '',
99
+ 'raw', 'raw', 0,
100
+ MAP('kafka.security.protocol', 'PLAINTEXT', 'kafka.auto.offset.reset', 'earliest')
101
+ )
102
+ LIMIT 10;
103
+ ```
104
+
105
+ > ⚠️ **独立探查 vs Pipe 中的区别**:
106
+ > - 独立探查:可在 MAP 中设置 `kafka.auto.offset.reset` 为 `earliest` 读取历史数据
107
+ > - Pipe 中:位置参数必须留空,消费位点由 Pipe 的 `RESET_KAFKA_GROUP_OFFSETS` 参数控制
108
+
109
+ ## CREATE PIPE — 从对象存储导入
110
+
111
+ ```sql
112
+ CREATE [ OR REPLACE ] PIPE [ IF NOT EXISTS ] <pipe_name>
113
+ VIRTUAL_CLUSTER = '<virtual_cluster_name>'
114
+ INGEST_MODE = 'LIST_PURGE' | 'EVENT_NOTIFICATION'
115
+ [ COMMENT '<comment>' ]
116
+ [ COPY_JOB_HINT = '<hint>' ]
117
+ AS
118
+ COPY INTO <target_table>
119
+ FROM VOLUME <volume_name>
120
+ USING <csv | parquet | orc | json> [OPTIONS ('<key>' = '<value>', ...)] PURGE=true;
121
+ ```
122
+
123
+ **关键参数:**
124
+ - `VIRTUAL_CLUSTER`:指定虚拟集群名称(OSS Pipe 必填)
125
+ - `INGEST_MODE = 'LIST_PURGE'`:通用模式,定期扫描文件列表,必须设置 `PURGE=true`
126
+ - `INGEST_MODE = 'EVENT_NOTIFICATION'`:事件通知模式,低延迟(仅阿里云 OSS + AWS S3),不需要 `PURGE=true`
127
+ - `COMMENT 'text'`:不带等号(`COMMENT = 'text'` 会报错)
128
+ - `PURGE=true`:放在最后,OPTIONS 在其之前:`USING CSV OPTIONS (...) PURGE=true`
129
+ - PIPE 中的 COPY 语句不支持 `files`、`regexp`、`subdirectory` 参数
130
+
131
+ **示例:**
132
+ ```sql
133
+ -- LIST_PURGE 模式(带 OPTIONS)
134
+ CREATE OR REPLACE PIPE oss_events_pipe
135
+ VIRTUAL_CLUSTER = 'default'
136
+ INGEST_MODE = 'LIST_PURGE'
137
+ COMMENT 'OSS events pipeline'
138
+ AS
139
+ COPY INTO ods.events
140
+ FROM VOLUME my_oss_volume
141
+ USING PARQUET PURGE=true;
142
+
143
+ -- CSV 格式带 OPTIONS(OPTIONS 在 PURGE 之前)
144
+ CREATE PIPE oss_csv_pipe
145
+ VIRTUAL_CLUSTER = 'default'
146
+ INGEST_MODE = 'LIST_PURGE'
147
+ AS
148
+ COPY INTO ods.csv_data
149
+ FROM VOLUME my_csv_volume
150
+ USING CSV OPTIONS ('header' = 'true', 'sep' = ',') PURGE=true;
151
+
152
+ -- EVENT_NOTIFICATION 模式(不需要 PURGE)
153
+ CREATE PIPE oss_event_pipe
154
+ VIRTUAL_CLUSTER = 'default'
155
+ INGEST_MODE = 'EVENT_NOTIFICATION'
156
+ ALICLOUD_MNS_QUEUE = 'my-mns-queue-name'
157
+ AS
158
+ COPY INTO ods.events
159
+ FROM VOLUME my_oss_event_volume
160
+ USING PARQUET;
161
+ ```
162
+
163
+ ## 启停 Pipe
164
+
165
+ ```sql
166
+ -- 暂停 Pipe
167
+ ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = true;
168
+
169
+ -- 恢复 Pipe
170
+ ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = false;
171
+ ```
172
+
173
+ ## 修改 Pipe 属性
174
+
175
+ ```sql
176
+ -- 每次只能修改一个属性
177
+ ALTER PIPE <pipe_name> SET VIRTUAL_CLUSTER = 'new_vc';
178
+ ALTER PIPE <pipe_name> SET COPY_JOB_HINT = '{"cz.sql.split.kafka.strategy":"size","cz.mapper.kafka.message.size":"200000"}';
179
+ ```
180
+
181
+ > ⚠️ **ALTER PIPE 支持的属性**:
182
+ > - ✅ `PIPE_EXECUTION_PAUSED`
183
+ > - ✅ `VIRTUAL_CLUSTER`
184
+ > - ✅ `COPY_JOB_HINT`
185
+ > - ❌ `BATCH_INTERVAL_IN_SECONDS`(不支持修改,需删除重建)
186
+ > - ❌ `BATCH_SIZE_PER_KAFKA_PARTITION`(不支持修改,需删除重建)
187
+ >
188
+ > 不支持修改 COPY/INSERT 语句逻辑,需删除 Pipe 后重建。
189
+ > `COPY_JOB_HINT` 修改会覆盖所有已有 hints,需一次性设置全部参数。
190
+
191
+ ## DROP PIPE
192
+
193
+ ```sql
194
+ DROP PIPE [ IF EXISTS ] <pipe_name>;
195
+ ```
196
+
197
+ ## SHOW PIPE
198
+
199
+ ```sql
200
+ -- 列出当前 schema 下所有 Pipe
201
+ SHOW PIPES;
202
+
203
+ -- 查看 Pipe 详情(状态、延迟、定义)
204
+ DESC PIPE <pipe_name>;
205
+ DESC PIPE EXTENDED <pipe_name>;
206
+ ```
207
+
208
+ ## 注意事项
209
+
210
+ - Pipe 创建后默认自动启动,无需手动 RESUME
211
+ - Kafka Pipe 使用 consumer group 管理 offset,重建 Pipe 时保持相同 group_id 可从上次位点继续
212
+ - 对象存储 Pipe 通过文件列表或事件通知检测新文件,`load_history` 去重记录保留 7 天
213
+ - Pipe 不支持修改 AS 子句,需要删除后重建(不是 `CREATE OR REPLACE`)
214
+ - Kafka Pipe 仅支持 PLAINTEXT 和 SASL_PLAINTEXT 安全协议,不支持 SSL
215
+
216
+ ## 参考文档
217
+
218
+ - [Pipe 简介](https://www.yunqi.tech/documents/pipe-summary)
219
+ - [借助 read_kafka 函数持续导入](https://www.yunqi.tech/documents/pipe-kafka)
220
+ - [借助 Kafka 外表 Table Stream 持续导入](https://www.yunqi.tech/documents/pipe-kafka-table-stream)
221
+ - [最佳实践:使用 Pipe 高效接入 Kafka 数据](https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1)
222
+ - [使用 Pipe 持续导入对象存储数据](https://www.yunqi.tech/documents/pipe-storage-object)
@@ -0,0 +1,125 @@
1
+ # Table Stream(表流)SQL 参考
2
+
3
+ > **⚠️ ClickZetta 特有语法**
4
+ > - 创建语法是 `CREATE TABLE STREAM`,参数放在 `WITH PROPERTIES (...)` 里
5
+ > - 元数据字段是 `__change_type`(双下划线),值:`INSERT` / `UPDATE_BEFORE` / `UPDATE_AFTER` / `DELETE`
6
+ > - UPDATE 产生两条记录:`UPDATE_BEFORE`(更新前)和 `UPDATE_AFTER`(更新后)
7
+ > - 通常只需要 `UPDATE_AFTER` 和 `INSERT`,忽略 `UPDATE_BEFORE`
8
+
9
+ Table Stream 捕获源表的变更数据(INSERT / UPDATE / DELETE),是构建 CDC 管道的核心对象。通常与 Dynamic Table 或 SQL 任务配合消费变更数据。
10
+
11
+ ## CREATE TABLE STREAM
12
+
13
+ ```sql
14
+ CREATE [ OR REPLACE ] TABLE STREAM [ IF NOT EXISTS ] <stream_name>
15
+ ON TABLE <source_name>
16
+ [ TIMESTAMP AS OF <timestamp_expr> ]
17
+ [ COMMENT '<comment>' ]
18
+ WITH PROPERTIES (
19
+ 'TABLE_STREAM_MODE' = 'STANDARD | APPEND_ONLY',
20
+ 'SHOW_INITIAL_ROWS' = 'TRUE | FALSE'
21
+ );
22
+ ```
23
+
24
+ **关键参数:**
25
+ - `TABLE_STREAM_MODE = STANDARD`(默认):捕获 INSERT、UPDATE、DELETE 所有变更,每行附带 `__change_type` 字段(`INSERT` / `UPDATE_BEFORE` / `UPDATE_AFTER` / `DELETE`)
26
+ - `TABLE_STREAM_MODE = APPEND_ONLY`:只捕获 INSERT,性能更好,适合仅追加写入的源表
27
+ - `SHOW_INITIAL_ROWS = TRUE`:首次消费返回建 Stream 时表中已有行;`FALSE`(默认)仅返回建 Stream 后的新变更
28
+ - `TIMESTAMP AS OF`:指定 Stream 从哪个时间点开始捕获变更
29
+
30
+ **示例:**
31
+ ```sql
32
+ -- 在普通表上创建标准流(捕获所有变更,需先开启 change_tracking)
33
+ ALTER TABLE ods.orders SET PROPERTIES ('change_tracking' = 'true');
34
+
35
+ CREATE TABLE STREAM orders_stream
36
+ ON TABLE ods.orders
37
+ WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
38
+
39
+ -- 仅追加流
40
+ CREATE TABLE STREAM events_stream
41
+ ON TABLE dw.events
42
+ COMMENT '事件流,仅追加'
43
+ WITH PROPERTIES ('TABLE_STREAM_MODE' = 'APPEND_ONLY');
44
+
45
+ -- 从指定时间点开始捕获
46
+ CREATE TABLE STREAM orders_stream_from_ts
47
+ ON TABLE ods.orders
48
+ TIMESTAMP AS OF '2024-01-01 00:00:00'
49
+ WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD', 'SHOW_INITIAL_ROWS' = 'TRUE');
50
+ ```
51
+
52
+ ## 消费 Table Stream
53
+
54
+ Table Stream 的 offset 通过 DML 操作移动。**仅 SELECT 不会移动 offset**,可以反复查询预览。执行 DML(INSERT INTO / MERGE INTO / UPDATE / DELETE)消费数据后,offset 前进。
55
+
56
+ ```sql
57
+ -- 查看当前未消费的变更数据(不移动 offset)
58
+ SELECT * FROM orders_stream;
59
+
60
+ -- 变更数据包含的系统字段
61
+ -- __change_type: INSERT | UPDATE_BEFORE | UPDATE_AFTER | DELETE
62
+ -- __commit_version: 变更版本号
63
+ -- __commit_timestamp: 变更发生时间
64
+
65
+ -- 典型用法:将变更数据 MERGE 到目标表(过滤掉 UPDATE_BEFORE)
66
+ MERGE INTO dw.orders_dim AS target
67
+ USING (
68
+ SELECT * FROM orders_stream
69
+ WHERE __change_type != 'UPDATE_BEFORE'
70
+ ) AS src
71
+ ON target.order_id = src.order_id
72
+ WHEN MATCHED AND src.__change_type = 'UPDATE_AFTER' THEN UPDATE SET target.status = src.status, target.amount = src.amount
73
+ WHEN MATCHED AND src.__change_type = 'DELETE' THEN DELETE
74
+ WHEN NOT MATCHED AND src.__change_type IN ('INSERT', 'UPDATE_AFTER') THEN INSERT (order_id, status, amount) VALUES (src.order_id, src.status, src.amount);
75
+
76
+ -- 配合 Dynamic Table 自动消费(推荐)
77
+ CREATE OR REPLACE DYNAMIC TABLE dw.orders_processed
78
+ REFRESH INTERVAL 1 MINUTE vcluster default
79
+ AS
80
+ SELECT order_id, status, amount, __change_type, __commit_timestamp
81
+ FROM orders_stream
82
+ WHERE __change_type IN ('INSERT', 'UPDATE_AFTER');
83
+ ```
84
+
85
+ ## DROP TABLE STREAM
86
+
87
+ ```sql
88
+ DROP TABLE STREAM [ IF EXISTS ] <stream_name>;
89
+ ```
90
+
91
+ ## SHOW / DESC
92
+
93
+ ```sql
94
+ -- 列出当前 schema 下所有 Table Stream
95
+ SHOW TABLE STREAMS;
96
+
97
+ -- 列出指定 schema 下的 Table Stream
98
+ SHOW TABLE STREAMS IN <schema_name>;
99
+
100
+ -- 按名称过滤
101
+ SHOW TABLE STREAMS LIKE 'orders%';
102
+
103
+ -- 查看 Table Stream 详情(源表、模式、创建时间)
104
+ DESC TABLE STREAM <stream_name>;
105
+ ```
106
+
107
+ ## 注意事项
108
+
109
+ - 仅 SELECT 不会移动 offset,可反复查询预览
110
+ - DML 操作(INSERT INTO / MERGE INTO / UPDATE / DELETE)会移动 offset
111
+ - ⚠️ 即使 DML 带 WHERE 条件过滤了部分行,**所有行的 offset 都会移动**
112
+ - 若长时间不消费,超出源表的 `data_retention_days` 后数据会丢失
113
+ - `STANDARD` 模式下 UPDATE 会产生两条记录:`UPDATE_BEFORE`(更新前)和 `UPDATE_AFTER`(更新后)
114
+ - 消费时通常过滤 `__change_type != 'UPDATE_BEFORE'`,忽略旧值
115
+ - 源表需先开启 `change_tracking`:`ALTER TABLE name SET PROPERTIES ('change_tracking' = 'true')`
116
+
117
+ ## 参考文档
118
+
119
+ - [CREATE TABLE STREAM](https://www.yunqi.tech/documents/create-table-stream)
120
+ - [DESC TABLE STREAM](https://www.yunqi.tech/documents/desc-table-stream)
121
+ - [SHOW TABLE STREAMS](https://www.yunqi.tech/documents/show-table-streams)
122
+ - [DROP TABLE STREAM](https://www.yunqi.tech/documents/drop-table-stream)
123
+ - [TABLE STREAM 简介](https://www.yunqi.tech/documents/tablestream_summary)
124
+ - [Table Stream 变化数据捕获](https://www.yunqi.tech/documents/table_stream)
125
+ - [Table Stream 最佳实践](https://www.yunqi.tech/documents/lakehouse-table-stream-best-practices)