@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
  3. package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
  4. package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
  5. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
  6. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
  7. package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
  8. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
  9. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
  10. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
  11. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
  12. package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
  13. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
  14. package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
  15. package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
  16. package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
  17. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
  18. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
  19. package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
  20. package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
  21. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
  22. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
  23. package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
  24. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
  25. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
  26. package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
  27. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
  28. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
  29. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
  30. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
  31. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
  32. package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
  33. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
  34. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
  35. package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
  36. package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
  37. package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
  38. package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
  39. package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
  40. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
  41. package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
  42. package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
  43. package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
  44. package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
  45. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
  46. package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
  47. package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
  48. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
  49. package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
  50. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
  51. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
  52. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
  53. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
  54. package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
  55. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
  56. package/package.json +1 -1
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  58. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  59. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  60. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
  61. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  62. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  63. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  64. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  65. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  66. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  67. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  68. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  69. /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
@@ -1,220 +1,161 @@
1
- # Kafka Pipe SQL 语法参考
1
+ # Kafka Pipe SQL Syntax Reference
2
2
 
3
- > 来源:https://www.yunqi.tech/documents/pipe-kafka https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1
3
+ > Canonical syntax reference for ClickZetta Kafka Pipe operations.
4
+ > For workflow guidance, see `SKILL.md`.
4
5
 
5
- > **⚠️ ClickZetta READ_KAFKA 使用位置参数(positional parameters)**
6
- > - ❌ 不支持 `=>` 命名参数语法(如 `KAFKA_BROKER => 'host:port'`)
7
- > - ❌ 不支持 `TABLE(READ_KAFKA(...))` 包装
8
- > - ✅ 正确:`FROM read_kafka('broker', 'topic', '', 'group', '', '', '', '', 'raw', 'raw', 0, MAP(...))`
6
+ ---
9
7
 
10
- ## CREATE PIPE(READ_KAFKA 方式)
8
+ ## READ_KAFKA Function Signature
11
9
 
12
10
  ```sql
13
- CREATE [ OR REPLACE ] PIPE <pipe_name>
11
+ read_kafka(
12
+ '<bootstrap_servers>', -- Pos 1: Kafka broker addresses (required)
13
+ '<topic_name>', -- Pos 2: Topic name (required)
14
+ '', -- Pos 3: Topic pattern (RESERVED — always empty string)
15
+ '<group_id>', -- Pos 4: Consumer group ID (required)
16
+ '<starting_offsets>', -- Pos 5: Starting offsets (empty in Pipe; 'earliest'/'latest' standalone)
17
+ '<ending_offsets>', -- Pos 6: Ending offsets (typically empty)
18
+ '<starting_timestamp>', -- Pos 7: Starting timestamp (typically empty)
19
+ '<ending_timestamp>', -- Pos 8: Ending timestamp (typically empty)
20
+ '<key_format>', -- Pos 9: Key format (only 'raw' supported)
21
+ '<value_format>', -- Pos 10: Value format (only 'raw' supported)
22
+ <max_errors>, -- Pos 11: Max errors (integer, typically 0)
23
+ MAP(<kafka_config>) -- Pos 12: Kafka configuration key-value pairs
24
+ )
25
+ ```
26
+
27
+ > ⚠️ **Positional parameters only.**
28
+ > - ❌ `=>` named parameters not supported
29
+ > - ❌ `TABLE(READ_KAFKA(...))` wrapper not supported
30
+ > - ✅ `FROM read_kafka('broker','topic','','group','','','','','raw','raw',0,MAP(...))`
31
+
32
+ ### Output Columns
33
+
34
+ | Column | Type | Description |
35
+ |--------|------|-------------|
36
+ | `key` | BINARY | Message key |
37
+ | `value` | BINARY | Message value (payload) |
38
+ | `topic` | STRING | Source topic name |
39
+ | `partition` | INT | Partition number |
40
+ | `offset` | BIGINT | Message offset |
41
+ | `timestamp` | TIMESTAMP | Message timestamp |
42
+ | `timestamp_type` | STRING | Timestamp type |
43
+
44
+ ### Behavior: Standalone vs. Inside Pipe
45
+
46
+ | Aspect | Standalone | Inside Pipe |
47
+ |--------|-----------|-------------|
48
+ | Consumer group | Temporary, destroyed after query | Persistent, offset committed |
49
+ | Offset management | Via MAP `kafka.auto.offset.reset` | Pipe manages; positions 5–8 **must be empty** |
50
+ | Execution | One-shot query | Continuously scheduled |
51
+ | Default start | latest (override in MAP) | latest (override via `RESET_KAFKA_GROUP_OFFSETS`) |
52
+
53
+ ---
54
+
55
+ ## MAP Configuration Parameters
56
+
57
+ | Key | Values | Description |
58
+ |-----|--------|-------------|
59
+ | `kafka.security.protocol` | `PLAINTEXT`, `SASL_PLAINTEXT` | Security protocol (SSL not supported) |
60
+ | `kafka.sasl.mechanism` | `PLAIN` | SASL mechanism |
61
+ | `kafka.sasl.username` | string | SASL username |
62
+ | `kafka.sasl.password` | string | SASL password |
63
+ | `kafka.auto.offset.reset` | `earliest`, `latest` | Standalone exploration only; ignored in Pipe |
64
+ | `cz.kafka.fetch.retry.enable` | `true`, `false` | Enable fetch retry |
65
+ | `cz.kafka.fetch.retry.times` | integer | Retry count |
66
+ | `cz.kafka.fetch.retry.intervalMs` | integer | Retry interval (ms) |
67
+
68
+ ---
69
+
70
+ ## CREATE PIPE (READ_KAFKA)
71
+
72
+ ```sql
73
+ CREATE PIPE <pipe_name>
14
74
  VIRTUAL_CLUSTER = '<vcluster_name>'
15
75
  [ BATCH_INTERVAL_IN_SECONDS = '<seconds>' ]
16
76
  [ BATCH_SIZE_PER_KAFKA_PARTITION = '<count>' ]
17
77
  [ MAX_SKIP_BATCH_COUNT_ON_ERROR = '<count>' ]
18
78
  [ INITIAL_DELAY_IN_SECONDS = '<seconds>' ]
19
79
  [ RESET_KAFKA_GROUP_OFFSETS = '<offset_value>' ]
20
- [ COPY_JOB_HINT = '<json>' ]
80
+ [ COPY_JOB_HINT = '<json_string>' ]
21
81
  AS
22
- COPY INTO <target_table> FROM (
23
- SELECT <expr> [, ...]
24
- FROM read_kafka(
25
- '<bootstrap_servers>', -- 位置 1:Kafka 集群地址(必填)
26
- '<topic_name>', -- 位置 2:Topic 名称(必填)
27
- '', -- 位置 3:Topic pattern(保留,填空字符串)
28
- '<group_id>', -- 位置 4:消费者组 ID(必填)
29
- '', -- 位置 5:starting_offsets(Pipe 中留空)
30
- '', -- 位置 6:ending_offsets(Pipe 中留空)
31
- '', -- 位置 7:starting_timestamp(Pipe 中留空)
32
- '', -- 位置 8:ending_timestamp(Pipe 中留空)
33
- 'raw', -- 位置 9:key 格式(目前只支持 raw)
34
- 'raw', -- 位置 10:value 格式(目前只支持 raw)
35
- 0, -- 位置 11:max_errors
36
- MAP(<kafka_config>) -- 位置 12:Kafka 配置参数
82
+ COPY INTO <schema>.<table> FROM (
83
+ SELECT <expressions>
84
+ FROM (
85
+ SELECT `timestamp`, parse_json(value::string) AS j
86
+ FROM read_kafka(...)
37
87
  )
38
88
  );
39
89
  ```
40
90
 
41
- ### Pipe 参数说明
42
-
43
- | 参数 | 必填 | 默认值 | 说明 |
44
- |------|------|--------|------|
45
- | `VIRTUAL_CLUSTER` | 是 | — | 执行 Pipe 任务的计算集群 |
46
- | `BATCH_INTERVAL_IN_SECONDS` | 否 | 60 | 批处理间隔(秒),即数据新鲜度 |
47
- | `BATCH_SIZE_PER_KAFKA_PARTITION` | 否 | 500000 | 每个 Kafka 分区每批最大消息数 |
48
- | `MAX_SKIP_BATCH_COUNT_ON_ERROR` | 否 | 30 | 出错时跳过批次的最大重试次数 |
49
- | `INITIAL_DELAY_IN_SECONDS` | 否 | 0 | 首个作业调度延迟 |
50
- | `RESET_KAFKA_GROUP_OFFSETS` | 否 | — | 启动时消费位点(仅创建时生效) |
51
- | `COPY_JOB_HINT` | 否 | — | JSON 格式的作业参数 |
52
-
53
- ### RESET_KAFKA_GROUP_OFFSETS 可选值
54
-
55
- | 值 | 说明 |
56
- |----|------|
57
- | `'none'` | 无操作,使用 Kafka `auto.offset.reset`(默认 latest) |
58
- | `'valid'` | 检查当前位点是否过期,将过期分区重置到 earliest |
59
- | `'earliest'` | 重置到最早位点 |
60
- | `'latest'` | 重置到最新位点 |
61
- | `'<毫秒时间戳>'` | 重置到指定时间戳对应位点(如 `'1737789688000'`) |
62
-
63
- ### READ_KAFKA 参数(在 Pipe 中 vs 独立使用)
64
-
65
- | 特性 | 独立使用 read_kafka | 在 Pipe 中使用 |
66
- |------|-------------------|---------------|
67
- | 消费者组 | 临时,执行完即销毁 | 持久,保持消费位置 |
68
- | 位置管理 | 在 MAP 中设置 `kafka.auto.offset.reset` | Pipe 自动管理,位置参数**必须留空** |
69
- | 执行方式 | 一次性查询 | 持续调度执行 |
70
- | 默认起始位置 | latest(可在 MAP 中改为 earliest) | latest(由 RESET_KAFKA_GROUP_OFFSETS 控制) |
71
-
72
- ### MAP 配置参数
73
-
74
- | 参数 | 说明 |
75
- |------|------|
76
- | `kafka.security.protocol` | 安全协议:`PLAINTEXT` 或 `SASL_PLAINTEXT` |
77
- | `kafka.sasl.mechanism` | SASL 机制:`PLAIN` |
78
- | `kafka.sasl.username` | SASL 用户名 |
79
- | `kafka.sasl.password` | SASL 密码 |
80
- | `kafka.auto.offset.reset` | 独立探查时的起始位点(`earliest` / `latest`) |
81
- | `cz.kafka.fetch.retry.enable` | 启用 fetch 重试(`true`/`false`) |
82
- | `cz.kafka.fetch.retry.times` | 重试次数 |
83
- | `cz.kafka.fetch.retry.intervalMs` | 重试间隔(毫秒) |
84
-
85
- ### JSON 字段提取语法
91
+ > `CREATE OR REPLACE PIPE` is **not supported**. Use `DROP PIPE` + `CREATE PIPE`.
86
92
 
87
- ```sql
88
- -- key 和 value 都是 binary 类型,需要先转换
89
- value::string -- 转为字符串
90
- parse_json(value::string) -- 解析为 JSON 对象
91
- parse_json(value::string)['field']::TYPE -- 提取顶层字段
92
- parse_json(value::string)['nested']['key']::TYPE -- 提取嵌套字段
93
-
94
- -- 推荐模式:在子查询中先 parse_json,外层直接用 j['field']
95
- SELECT j['order_id']::STRING, j['amount']::DECIMAL(10,2)
96
- FROM (
97
- SELECT parse_json(value::string) AS j
98
- FROM read_kafka(...)
99
- )
100
- ```
93
+ ### Pipe Parameters
101
94
 
102
- ### 完整示例
95
+ | Parameter | Required | Default | Description |
96
+ |-----------|----------|---------|-------------|
97
+ | `VIRTUAL_CLUSTER` | Yes | — | Compute cluster for Pipe execution |
98
+ | `BATCH_INTERVAL_IN_SECONDS` | No | `'60'` | Batch interval = data freshness (seconds) |
99
+ | `BATCH_SIZE_PER_KAFKA_PARTITION` | No | `'500000'` | Max messages per partition per batch |
100
+ | `MAX_SKIP_BATCH_COUNT_ON_ERROR` | No | `'30'` | Consecutive error batches before Pipe pauses |
101
+ | `INITIAL_DELAY_IN_SECONDS` | No | `'0'` | Delay before first scheduled job |
102
+ | `RESET_KAFKA_GROUP_OFFSETS` | No | — | Initial offset (creation-time only) |
103
+ | `COPY_JOB_HINT` | No | — | JSON job hints |
103
104
 
104
- ```sql
105
- -- 无认证 Kafka Pipe
106
- CREATE PIPE kafka_orders_pipe
107
- VIRTUAL_CLUSTER = 'default'
108
- BATCH_INTERVAL_IN_SECONDS = '60'
109
- AS
110
- COPY INTO ods.orders FROM (
111
- SELECT
112
- j['order_id']::STRING AS order_id,
113
- j['user_id']::STRING AS user_id,
114
- j['amount']::DECIMAL(10,2) AS amount,
115
- CAST(`timestamp` AS TIMESTAMP) AS kafka_ts
116
- FROM (
117
- SELECT `timestamp`, parse_json(value::string) AS j
118
- FROM read_kafka(
119
- 'kafka.example.com:9092',
120
- 'orders',
121
- '',
122
- 'lakehouse_orders',
123
- '', '', '', '',
124
- 'raw', 'raw', 0,
125
- MAP('kafka.security.protocol', 'PLAINTEXT')
126
- )
127
- )
128
- );
105
+ ### RESET_KAFKA_GROUP_OFFSETS Values
129
106
 
130
- -- SASL 认证 + 指定时间点消费
131
- CREATE PIPE kafka_secure_pipe
132
- VIRTUAL_CLUSTER = 'pipe_vc'
133
- BATCH_INTERVAL_IN_SECONDS = '60'
134
- RESET_KAFKA_GROUP_OFFSETS = '1737789688000'
135
- AS
136
- COPY INTO ods.secure_events FROM (
137
- SELECT
138
- j['id']::STRING AS event_id,
139
- j['payload']::STRING AS payload,
140
- CAST(`timestamp` AS TIMESTAMP) AS kafka_ts
141
- FROM (
142
- SELECT `timestamp`, parse_json(value::string) AS j
143
- FROM read_kafka(
144
- 'kafka.example.com:9092',
145
- 'secure_events',
146
- '',
147
- 'cz_secure',
148
- '', '', '', '',
149
- 'raw', 'raw', 0,
150
- MAP(
151
- 'kafka.security.protocol', 'SASL_PLAINTEXT',
152
- 'kafka.sasl.mechanism', 'PLAIN',
153
- 'kafka.sasl.username', 'my_user',
154
- 'kafka.sasl.password', 'my_password'
155
- )
156
- )
157
- )
158
- );
159
- ```
107
+ | Value | Effect |
108
+ |-------|--------|
109
+ | `'none'` | No reset; use Kafka default (`auto.offset.reset` = latest) |
110
+ | `'valid'` | Reset only expired partitions to earliest |
111
+ | `'earliest'` | Consume from beginning |
112
+ | `'latest'` | Consume only new messages |
113
+ | `'<epoch_millis>'` | Consume from specific timestamp (e.g., `'1737789688000'`) |
114
+
115
+ ### COPY_JOB_HINT Keys
116
+
117
+ | Key | Default | Description |
118
+ |-----|---------|-------------|
119
+ | `cz.sql.split.kafka.strategy` | `simple` | `simple` = 1 task/partition; `size` = split by message count |
120
+ | `cz.mapper.kafka.message.size` | `1000000` | Messages per task when strategy = `size` |
121
+
122
+ > Must be valid JSON: `'{"key":"value","key2":"value2"}'`. Setting overwrites all previous hints.
160
123
 
161
124
  ---
162
125
 
163
- ## 独立探查(验证连接和数据格式)
126
+ ## CREATE PIPE (Table Stream)
164
127
 
165
128
  ```sql
166
- -- 无认证
167
- SELECT value::string
168
- FROM read_kafka(
169
- 'kafka.example.com:9092',
170
- 'orders',
171
- '',
172
- 'test_explore',
173
- '', '', '', '',
174
- 'raw', 'raw', 0,
175
- MAP('kafka.security.protocol', 'PLAINTEXT', 'kafka.auto.offset.reset', 'earliest')
176
- )
177
- LIMIT 10;
178
-
179
- -- SASL 认证
180
- SELECT value::string
181
- FROM read_kafka(
182
- 'kafka.example.com:9092',
183
- 'orders',
184
- '',
185
- 'test_explore',
186
- '', '', '', '',
187
- 'raw', 'raw', 0,
188
- MAP(
189
- 'kafka.security.protocol', 'SASL_PLAINTEXT',
190
- 'kafka.sasl.mechanism', 'PLAIN',
191
- 'kafka.sasl.username', 'my_user',
192
- 'kafka.sasl.password', 'my_password',
193
- 'kafka.auto.offset.reset', 'earliest'
194
- )
195
- )
196
- LIMIT 10;
129
+ CREATE PIPE <pipe_name>
130
+ VIRTUAL_CLUSTER = '<vcluster_name>'
131
+ [ BATCH_INTERVAL_IN_SECONDS = '<seconds>' ]
132
+ AS
133
+ INSERT INTO <schema>.<table>
134
+ SELECT <expressions>
135
+ FROM <stream_name>;
197
136
  ```
198
137
 
199
- ---
138
+ > Table Stream Pipe uses `INSERT INTO ... SELECT`, **not** `COPY INTO`.
200
139
 
201
- ## CREATE PIPE(Kafka 外部表 + Table Stream 方式)
140
+ ---
202
141
 
203
- ### 步骤 1:创建 Kafka Storage Connection
142
+ ## CREATE STORAGE CONNECTION
204
143
 
205
144
  ```sql
206
- CREATE STORAGE CONNECTION IF NOT EXISTS <conn_name>
145
+ CREATE STORAGE CONNECTION [ IF NOT EXISTS ] <conn_name>
207
146
  TYPE KAFKA
208
147
  BOOTSTRAP_SERVERS = ['<host1>:<port1>', '<host2>:<port2>']
209
- SECURITY_PROTOCOL = 'PLAINTEXT';
148
+ SECURITY_PROTOCOL = '<PLAINTEXT | SASL_PLAINTEXT>';
210
149
  ```
211
150
 
212
- ### 步骤 2:创建 Kafka 外部表
151
+ Drop: `DROP CONNECTION [ IF EXISTS ] <conn_name>;`
152
+
153
+ ---
154
+
155
+ ## CREATE EXTERNAL TABLE (Kafka)
213
156
 
214
157
  ```sql
215
- -- ⚠️ 必须显式指定列定义(不能省略)
216
- -- ⚠️ offset 是保留字,必须用反引号转义
217
- CREATE EXTERNAL TABLE <ext_table_name> (
158
+ CREATE EXTERNAL TABLE <table_name> (
218
159
  topic STRING,
219
160
  partition INT,
220
161
  `offset` BIGINT,
@@ -233,92 +174,117 @@ OPTIONS (
233
174
  CONNECTION <conn_name>;
234
175
  ```
235
176
 
236
- > **注意**:
237
- > - 列定义是**必须的**,省略会报错 `failed to detect columns`
238
- > - `offset` `timestamp` 是保留字,需要反引号转义
239
- > - 删除外部表用 `DROP TABLE`(不是 `DROP EXTERNAL TABLE`)
177
+ > - Column definitions **required** (error: `failed to detect columns` if omitted)
178
+ > - `offset`, `timestamp` are reserved words — backtick-escape always
179
+ > - Drop with `DROP TABLE` (not `DROP EXTERNAL TABLE`)
240
180
 
241
- ### 步骤 3:创建 Table Stream
181
+ ---
182
+
183
+ ## CREATE TABLE STREAM
242
184
 
243
185
  ```sql
244
186
  CREATE TABLE STREAM <stream_name>
245
- ON TABLE <ext_table_name>
187
+ ON TABLE <source_table>
246
188
  WITH PROPERTIES ('TABLE_STREAM_MODE' = 'APPEND_ONLY');
247
189
  ```
248
190
 
249
- ### 步骤 4:创建 Pipe
191
+ ---
192
+
193
+ ## ALTER PIPE
250
194
 
251
195
  ```sql
252
- CREATE PIPE <pipe_name>
253
- VIRTUAL_CLUSTER = '<vcluster_name>'
254
- BATCH_INTERVAL_IN_SECONDS = '60'
255
- AS
256
- COPY INTO <target_table>
257
- SELECT <expr> [, ...]
258
- FROM <stream_name>;
196
+ ALTER PIPE <pipe_name> SET <property> = <value>;
259
197
  ```
260
198
 
199
+ Supported properties (one per ALTER):
200
+
201
+ | Property | Alterable | Notes |
202
+ |----------|-----------|-------|
203
+ | `PIPE_EXECUTION_PAUSED` | ✅ | `true` / `false` |
204
+ | `VIRTUAL_CLUSTER` | ✅ | New VCluster name |
205
+ | `COPY_JOB_HINT` | ✅ | JSON string; overwrites all hints |
206
+ | `BATCH_INTERVAL_IN_SECONDS` | ❌ | Drop + recreate |
207
+ | `BATCH_SIZE_PER_KAFKA_PARTITION` | ❌ | Drop + recreate |
208
+ | SELECT logic | ❌ | Drop + recreate |
209
+
261
210
  ---
262
211
 
263
- ## ALTER PIPE
212
+ ## DROP PIPE
213
+
214
+ ```sql
215
+ DROP PIPE [ IF EXISTS ] <pipe_name>;
216
+ ```
217
+
218
+ ---
219
+
220
+ ## Monitoring Queries
264
221
 
265
222
  ```sql
266
- -- 暂停
267
- ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = true;
223
+ -- Pipe details (includes pipe_latency JSON)
224
+ DESC PIPE EXTENDED <pipe_name>;
268
225
 
269
- -- 恢复
270
- ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = false;
226
+ -- List all Pipes
227
+ SHOW PIPES;
271
228
 
272
- -- 修改 VCluster
273
- ALTER PIPE <pipe_name> SET VIRTUAL_CLUSTER = 'new_vc';
229
+ -- Load history (retained 7 days)
230
+ SELECT * FROM load_history('<schema>.<table>') ORDER BY last_load_time DESC LIMIT 20;
274
231
 
275
- -- 修改 COPY_JOB_HINT
276
- ALTER PIPE <pipe_name> SET COPY_JOB_HINT = '{"cz.sql.split.kafka.strategy":"size","cz.mapper.kafka.message.size":"200000"}';
232
+ -- Pipe jobs by query_tag
233
+ SHOW JOBS WHERE query_tag = 'pipe.<workspace>.<schema>.<pipe_name>';
277
234
  ```
278
235
 
279
- > ⚠️ **ALTER PIPE 支持的属性**:
280
- > - ✅ `PIPE_EXECUTION_PAUSED`
281
- > - `VIRTUAL_CLUSTER`
282
- > - ✅ `COPY_JOB_HINT`
283
- > - `BATCH_INTERVAL_IN_SECONDS`(不支持,需删除重建)
284
- > - `BATCH_SIZE_PER_KAFKA_PARTITION`(不支持,需删除重建)
285
- >
286
- > 不支持修改 COPY/INSERT 语句逻辑,需删除 Pipe 后重建。
287
- > 修改 `COPY_JOB_HINT` 会覆盖所有已有 hints,需一次性设置全部参数。
236
+ ### pipe_latency Fields
237
+
238
+ | Field | Description |
239
+ |-------|-------------|
240
+ | `lastConsumeTimestamp` | Timestamp of last consumed offset |
241
+ | `offsetLag` | Number of unconsumed messages |
242
+ | `timeLag` | Consumer lag in ms (-1 = abnormal) |
288
243
 
289
244
  ---
290
245
 
291
- ## 监控
246
+ ## JSON Field Extraction Patterns
292
247
 
293
248
  ```sql
294
- -- 查看 Pipe 详情(含延迟信息 pipe_latency)
295
- DESC PIPE EXTENDED <pipe_name>;
249
+ -- Binary String
250
+ value::string
296
251
 
297
- -- 查看所有 Pipe
298
- SHOW PIPES;
252
+ -- String → JSON object
253
+ parse_json(value::string)
254
+
255
+ -- Extract top-level field
256
+ parse_json(value::string)['field']::TYPE
299
257
 
300
- -- 查看加载历史
301
- SELECT * FROM load_history('<schema>.<table>')
302
- ORDER BY last_load_time DESC LIMIT 20;
258
+ -- Extract nested field
259
+ parse_json(value::string)['parent']['child']::TYPE
303
260
 
304
- -- 通过 query_tag 查看 Pipe 作业
305
- -- 格式:pipe.<workspace_name>.<schema_name>.<pipe_name>
306
- SHOW JOBS WHERE query_tag = 'pipe.my_workspace.ods.kafka_orders_pipe';
261
+ -- Deeply nested (string-within-string)
262
+ parse_json(parse_json(value::string)['outer']::STRING)['inner']::TYPE
263
+
264
+ -- Recommended: parse once in subquery
265
+ SELECT j['id']::STRING, j['amount']::DECIMAL(10,2)
266
+ FROM (SELECT parse_json(value::string) AS j FROM read_kafka(...))
307
267
  ```
308
268
 
309
269
  ---
310
270
 
311
- ## DROP PIPE
271
+ ## CSV Field Extraction Pattern
312
272
 
313
273
  ```sql
314
- DROP PIPE [ IF EXISTS ] <pipe_name>;
274
+ split(value::string, ',')[0]::STRING -- first field
275
+ split(value::string, ',')[1]::STRING -- second field
276
+ CAST(split(value::string, ',')[2] AS DECIMAL(10,2)) -- with type cast
315
277
  ```
316
278
 
317
- ## 参考文档
279
+ ---
280
+
281
+ ## Reference Links
318
282
 
319
- - [Pipe 简介](https://www.yunqi.tech/documents/pipe-summary)
320
- - [借助 read_kafka 函数持续导入](https://www.yunqi.tech/documents/pipe-kafka)
321
- - [借助 Kafka 外表 Table Stream 持续导入](https://www.yunqi.tech/documents/pipe-kafka-table-stream)
322
- - [最佳实践:使用 Pipe 高效接入 Kafka 数据](https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1)
323
- - [Kafka 外部表](https://www.yunqi.tech/documents/kafka-external-table)
283
+ - [Pipe Overview](https://www.yunqi.tech/documents/pipe-summary)
284
+ - [read_kafka Continuous Import](https://www.yunqi.tech/documents/pipe-kafka)
285
+ - [Kafka External Table + Table Stream](https://www.yunqi.tech/documents/pipe-kafka-table-stream)
286
+ - [Kafka Pipe Best Practice](https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1)
287
+ - [read_kafka Function](https://www.yunqi.tech/documents/read_kafka)
288
+ - [Kafka External Table](https://www.yunqi.tech/documents/kafka-external-table)
324
289
  - [Kafka Storage Connection](https://www.yunqi.tech/documents/Kafka_connection)
290
+ - [PIPE Syntax](https://www.yunqi.tech/documents/pipe-syntax)