@clickzetta/cz-cli-darwin-arm64 0.3.92 → 0.3.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
- package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
- package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
- package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
- package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
- package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
- package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
- package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
- package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
- package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
- package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
- package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
- package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
- package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
- package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
- package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
- package/package.json +1 -1
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
|
@@ -1,220 +1,161 @@
|
|
|
1
|
-
# Kafka Pipe SQL
|
|
1
|
+
# Kafka Pipe SQL Syntax Reference
|
|
2
2
|
|
|
3
|
-
>
|
|
3
|
+
> Canonical syntax reference for ClickZetta Kafka Pipe operations.
|
|
4
|
+
> For workflow guidance, see `SKILL.md`.
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
> - ❌ 不支持 `=>` 命名参数语法(如 `KAFKA_BROKER => 'host:port'`)
|
|
7
|
-
> - ❌ 不支持 `TABLE(READ_KAFKA(...))` 包装
|
|
8
|
-
> - ✅ 正确:`FROM read_kafka('broker', 'topic', '', 'group', '', '', '', '', 'raw', 'raw', 0, MAP(...))`
|
|
6
|
+
---
|
|
9
7
|
|
|
10
|
-
##
|
|
8
|
+
## READ_KAFKA Function Signature
|
|
11
9
|
|
|
12
10
|
```sql
|
|
13
|
-
|
|
11
|
+
read_kafka(
|
|
12
|
+
'<bootstrap_servers>', -- Pos 1: Kafka broker addresses (required)
|
|
13
|
+
'<topic_name>', -- Pos 2: Topic name (required)
|
|
14
|
+
'', -- Pos 3: Topic pattern (RESERVED — always empty string)
|
|
15
|
+
'<group_id>', -- Pos 4: Consumer group ID (required)
|
|
16
|
+
'<starting_offsets>', -- Pos 5: Starting offsets (empty in Pipe; 'earliest'/'latest' standalone)
|
|
17
|
+
'<ending_offsets>', -- Pos 6: Ending offsets (typically empty)
|
|
18
|
+
'<starting_timestamp>', -- Pos 7: Starting timestamp (typically empty)
|
|
19
|
+
'<ending_timestamp>', -- Pos 8: Ending timestamp (typically empty)
|
|
20
|
+
'<key_format>', -- Pos 9: Key format (only 'raw' supported)
|
|
21
|
+
'<value_format>', -- Pos 10: Value format (only 'raw' supported)
|
|
22
|
+
<max_errors>, -- Pos 11: Max errors (integer, typically 0)
|
|
23
|
+
MAP(<kafka_config>) -- Pos 12: Kafka configuration key-value pairs
|
|
24
|
+
)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
> ⚠️ **Positional parameters only.**
|
|
28
|
+
> - ❌ `=>` named parameters not supported
|
|
29
|
+
> - ❌ `TABLE(READ_KAFKA(...))` wrapper not supported
|
|
30
|
+
> - ✅ `FROM read_kafka('broker','topic','','group','','','','','raw','raw',0,MAP(...))`
|
|
31
|
+
|
|
32
|
+
### Output Columns
|
|
33
|
+
|
|
34
|
+
| Column | Type | Description |
|
|
35
|
+
|--------|------|-------------|
|
|
36
|
+
| `key` | BINARY | Message key |
|
|
37
|
+
| `value` | BINARY | Message value (payload) |
|
|
38
|
+
| `topic` | STRING | Source topic name |
|
|
39
|
+
| `partition` | INT | Partition number |
|
|
40
|
+
| `offset` | BIGINT | Message offset |
|
|
41
|
+
| `timestamp` | TIMESTAMP | Message timestamp |
|
|
42
|
+
| `timestamp_type` | STRING | Timestamp type |
|
|
43
|
+
|
|
44
|
+
### Behavior: Standalone vs. Inside Pipe
|
|
45
|
+
|
|
46
|
+
| Aspect | Standalone | Inside Pipe |
|
|
47
|
+
|--------|-----------|-------------|
|
|
48
|
+
| Consumer group | Temporary, destroyed after query | Persistent, offset committed |
|
|
49
|
+
| Offset management | Via MAP `kafka.auto.offset.reset` | Pipe manages; positions 5–8 **must be empty** |
|
|
50
|
+
| Execution | One-shot query | Continuously scheduled |
|
|
51
|
+
| Default start | latest (override in MAP) | latest (override via `RESET_KAFKA_GROUP_OFFSETS`) |
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## MAP Configuration Parameters
|
|
56
|
+
|
|
57
|
+
| Key | Values | Description |
|
|
58
|
+
|-----|--------|-------------|
|
|
59
|
+
| `kafka.security.protocol` | `PLAINTEXT`, `SASL_PLAINTEXT` | Security protocol (SSL not supported) |
|
|
60
|
+
| `kafka.sasl.mechanism` | `PLAIN` | SASL mechanism |
|
|
61
|
+
| `kafka.sasl.username` | string | SASL username |
|
|
62
|
+
| `kafka.sasl.password` | string | SASL password |
|
|
63
|
+
| `kafka.auto.offset.reset` | `earliest`, `latest` | Standalone exploration only; ignored in Pipe |
|
|
64
|
+
| `cz.kafka.fetch.retry.enable` | `true`, `false` | Enable fetch retry |
|
|
65
|
+
| `cz.kafka.fetch.retry.times` | integer | Retry count |
|
|
66
|
+
| `cz.kafka.fetch.retry.intervalMs` | integer | Retry interval (ms) |
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## CREATE PIPE (READ_KAFKA)
|
|
71
|
+
|
|
72
|
+
```sql
|
|
73
|
+
CREATE PIPE <pipe_name>
|
|
14
74
|
VIRTUAL_CLUSTER = '<vcluster_name>'
|
|
15
75
|
[ BATCH_INTERVAL_IN_SECONDS = '<seconds>' ]
|
|
16
76
|
[ BATCH_SIZE_PER_KAFKA_PARTITION = '<count>' ]
|
|
17
77
|
[ MAX_SKIP_BATCH_COUNT_ON_ERROR = '<count>' ]
|
|
18
78
|
[ INITIAL_DELAY_IN_SECONDS = '<seconds>' ]
|
|
19
79
|
[ RESET_KAFKA_GROUP_OFFSETS = '<offset_value>' ]
|
|
20
|
-
[ COPY_JOB_HINT = '<
|
|
80
|
+
[ COPY_JOB_HINT = '<json_string>' ]
|
|
21
81
|
AS
|
|
22
|
-
COPY INTO <
|
|
23
|
-
SELECT <
|
|
24
|
-
FROM
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
'', -- 位置 3:Topic pattern(保留,填空字符串)
|
|
28
|
-
'<group_id>', -- 位置 4:消费者组 ID(必填)
|
|
29
|
-
'', -- 位置 5:starting_offsets(Pipe 中留空)
|
|
30
|
-
'', -- 位置 6:ending_offsets(Pipe 中留空)
|
|
31
|
-
'', -- 位置 7:starting_timestamp(Pipe 中留空)
|
|
32
|
-
'', -- 位置 8:ending_timestamp(Pipe 中留空)
|
|
33
|
-
'raw', -- 位置 9:key 格式(目前只支持 raw)
|
|
34
|
-
'raw', -- 位置 10:value 格式(目前只支持 raw)
|
|
35
|
-
0, -- 位置 11:max_errors
|
|
36
|
-
MAP(<kafka_config>) -- 位置 12:Kafka 配置参数
|
|
82
|
+
COPY INTO <schema>.<table> FROM (
|
|
83
|
+
SELECT <expressions>
|
|
84
|
+
FROM (
|
|
85
|
+
SELECT `timestamp`, parse_json(value::string) AS j
|
|
86
|
+
FROM read_kafka(...)
|
|
37
87
|
)
|
|
38
88
|
);
|
|
39
89
|
```
|
|
40
90
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
| 参数 | 必填 | 默认值 | 说明 |
|
|
44
|
-
|------|------|--------|------|
|
|
45
|
-
| `VIRTUAL_CLUSTER` | 是 | — | 执行 Pipe 任务的计算集群 |
|
|
46
|
-
| `BATCH_INTERVAL_IN_SECONDS` | 否 | 60 | 批处理间隔(秒),即数据新鲜度 |
|
|
47
|
-
| `BATCH_SIZE_PER_KAFKA_PARTITION` | 否 | 500000 | 每个 Kafka 分区每批最大消息数 |
|
|
48
|
-
| `MAX_SKIP_BATCH_COUNT_ON_ERROR` | 否 | 30 | 出错时跳过批次的最大重试次数 |
|
|
49
|
-
| `INITIAL_DELAY_IN_SECONDS` | 否 | 0 | 首个作业调度延迟 |
|
|
50
|
-
| `RESET_KAFKA_GROUP_OFFSETS` | 否 | — | 启动时消费位点(仅创建时生效) |
|
|
51
|
-
| `COPY_JOB_HINT` | 否 | — | JSON 格式的作业参数 |
|
|
52
|
-
|
|
53
|
-
### RESET_KAFKA_GROUP_OFFSETS 可选值
|
|
54
|
-
|
|
55
|
-
| 值 | 说明 |
|
|
56
|
-
|----|------|
|
|
57
|
-
| `'none'` | 无操作,使用 Kafka `auto.offset.reset`(默认 latest) |
|
|
58
|
-
| `'valid'` | 检查当前位点是否过期,将过期分区重置到 earliest |
|
|
59
|
-
| `'earliest'` | 重置到最早位点 |
|
|
60
|
-
| `'latest'` | 重置到最新位点 |
|
|
61
|
-
| `'<毫秒时间戳>'` | 重置到指定时间戳对应位点(如 `'1737789688000'`) |
|
|
62
|
-
|
|
63
|
-
### READ_KAFKA 参数(在 Pipe 中 vs 独立使用)
|
|
64
|
-
|
|
65
|
-
| 特性 | 独立使用 read_kafka | 在 Pipe 中使用 |
|
|
66
|
-
|------|-------------------|---------------|
|
|
67
|
-
| 消费者组 | 临时,执行完即销毁 | 持久,保持消费位置 |
|
|
68
|
-
| 位置管理 | 在 MAP 中设置 `kafka.auto.offset.reset` | Pipe 自动管理,位置参数**必须留空** |
|
|
69
|
-
| 执行方式 | 一次性查询 | 持续调度执行 |
|
|
70
|
-
| 默认起始位置 | latest(可在 MAP 中改为 earliest) | latest(由 RESET_KAFKA_GROUP_OFFSETS 控制) |
|
|
71
|
-
|
|
72
|
-
### MAP 配置参数
|
|
73
|
-
|
|
74
|
-
| 参数 | 说明 |
|
|
75
|
-
|------|------|
|
|
76
|
-
| `kafka.security.protocol` | 安全协议:`PLAINTEXT` 或 `SASL_PLAINTEXT` |
|
|
77
|
-
| `kafka.sasl.mechanism` | SASL 机制:`PLAIN` |
|
|
78
|
-
| `kafka.sasl.username` | SASL 用户名 |
|
|
79
|
-
| `kafka.sasl.password` | SASL 密码 |
|
|
80
|
-
| `kafka.auto.offset.reset` | 独立探查时的起始位点(`earliest` / `latest`) |
|
|
81
|
-
| `cz.kafka.fetch.retry.enable` | 启用 fetch 重试(`true`/`false`) |
|
|
82
|
-
| `cz.kafka.fetch.retry.times` | 重试次数 |
|
|
83
|
-
| `cz.kafka.fetch.retry.intervalMs` | 重试间隔(毫秒) |
|
|
84
|
-
|
|
85
|
-
### JSON 字段提取语法
|
|
91
|
+
> `CREATE OR REPLACE PIPE` is **not supported**. Use `DROP PIPE` + `CREATE PIPE`.
|
|
86
92
|
|
|
87
|
-
|
|
88
|
-
-- key 和 value 都是 binary 类型,需要先转换
|
|
89
|
-
value::string -- 转为字符串
|
|
90
|
-
parse_json(value::string) -- 解析为 JSON 对象
|
|
91
|
-
parse_json(value::string)['field']::TYPE -- 提取顶层字段
|
|
92
|
-
parse_json(value::string)['nested']['key']::TYPE -- 提取嵌套字段
|
|
93
|
-
|
|
94
|
-
-- 推荐模式:在子查询中先 parse_json,外层直接用 j['field']
|
|
95
|
-
SELECT j['order_id']::STRING, j['amount']::DECIMAL(10,2)
|
|
96
|
-
FROM (
|
|
97
|
-
SELECT parse_json(value::string) AS j
|
|
98
|
-
FROM read_kafka(...)
|
|
99
|
-
)
|
|
100
|
-
```
|
|
93
|
+
### Pipe Parameters
|
|
101
94
|
|
|
102
|
-
|
|
95
|
+
| Parameter | Required | Default | Description |
|
|
96
|
+
|-----------|----------|---------|-------------|
|
|
97
|
+
| `VIRTUAL_CLUSTER` | Yes | — | Compute cluster for Pipe execution |
|
|
98
|
+
| `BATCH_INTERVAL_IN_SECONDS` | No | `'60'` | Batch interval = data freshness (seconds) |
|
|
99
|
+
| `BATCH_SIZE_PER_KAFKA_PARTITION` | No | `'500000'` | Max messages per partition per batch |
|
|
100
|
+
| `MAX_SKIP_BATCH_COUNT_ON_ERROR` | No | `'30'` | Consecutive error batches before Pipe pauses |
|
|
101
|
+
| `INITIAL_DELAY_IN_SECONDS` | No | `'0'` | Delay before first scheduled job |
|
|
102
|
+
| `RESET_KAFKA_GROUP_OFFSETS` | No | — | Initial offset (creation-time only) |
|
|
103
|
+
| `COPY_JOB_HINT` | No | — | JSON job hints |
|
|
103
104
|
|
|
104
|
-
|
|
105
|
-
-- 无认证 Kafka Pipe
|
|
106
|
-
CREATE PIPE kafka_orders_pipe
|
|
107
|
-
VIRTUAL_CLUSTER = 'default'
|
|
108
|
-
BATCH_INTERVAL_IN_SECONDS = '60'
|
|
109
|
-
AS
|
|
110
|
-
COPY INTO ods.orders FROM (
|
|
111
|
-
SELECT
|
|
112
|
-
j['order_id']::STRING AS order_id,
|
|
113
|
-
j['user_id']::STRING AS user_id,
|
|
114
|
-
j['amount']::DECIMAL(10,2) AS amount,
|
|
115
|
-
CAST(`timestamp` AS TIMESTAMP) AS kafka_ts
|
|
116
|
-
FROM (
|
|
117
|
-
SELECT `timestamp`, parse_json(value::string) AS j
|
|
118
|
-
FROM read_kafka(
|
|
119
|
-
'kafka.example.com:9092',
|
|
120
|
-
'orders',
|
|
121
|
-
'',
|
|
122
|
-
'lakehouse_orders',
|
|
123
|
-
'', '', '', '',
|
|
124
|
-
'raw', 'raw', 0,
|
|
125
|
-
MAP('kafka.security.protocol', 'PLAINTEXT')
|
|
126
|
-
)
|
|
127
|
-
)
|
|
128
|
-
);
|
|
105
|
+
### RESET_KAFKA_GROUP_OFFSETS Values
|
|
129
106
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
'',
|
|
147
|
-
'cz_secure',
|
|
148
|
-
'', '', '', '',
|
|
149
|
-
'raw', 'raw', 0,
|
|
150
|
-
MAP(
|
|
151
|
-
'kafka.security.protocol', 'SASL_PLAINTEXT',
|
|
152
|
-
'kafka.sasl.mechanism', 'PLAIN',
|
|
153
|
-
'kafka.sasl.username', 'my_user',
|
|
154
|
-
'kafka.sasl.password', 'my_password'
|
|
155
|
-
)
|
|
156
|
-
)
|
|
157
|
-
)
|
|
158
|
-
);
|
|
159
|
-
```
|
|
107
|
+
| Value | Effect |
|
|
108
|
+
|-------|--------|
|
|
109
|
+
| `'none'` | No reset; use Kafka default (`auto.offset.reset` = latest) |
|
|
110
|
+
| `'valid'` | Reset only expired partitions to earliest |
|
|
111
|
+
| `'earliest'` | Consume from beginning |
|
|
112
|
+
| `'latest'` | Consume only new messages |
|
|
113
|
+
| `'<epoch_millis>'` | Consume from specific timestamp (e.g., `'1737789688000'`) |
|
|
114
|
+
|
|
115
|
+
### COPY_JOB_HINT Keys
|
|
116
|
+
|
|
117
|
+
| Key | Default | Description |
|
|
118
|
+
|-----|---------|-------------|
|
|
119
|
+
| `cz.sql.split.kafka.strategy` | `simple` | `simple` = 1 task/partition; `size` = split by message count |
|
|
120
|
+
| `cz.mapper.kafka.message.size` | `1000000` | Messages per task when strategy = `size` |
|
|
121
|
+
|
|
122
|
+
> Must be valid JSON: `'{"key":"value","key2":"value2"}'`. Setting overwrites all previous hints.
|
|
160
123
|
|
|
161
124
|
---
|
|
162
125
|
|
|
163
|
-
##
|
|
126
|
+
## CREATE PIPE (Table Stream)
|
|
164
127
|
|
|
165
128
|
```sql
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
'', '', '', '',
|
|
174
|
-
'raw', 'raw', 0,
|
|
175
|
-
MAP('kafka.security.protocol', 'PLAINTEXT', 'kafka.auto.offset.reset', 'earliest')
|
|
176
|
-
)
|
|
177
|
-
LIMIT 10;
|
|
178
|
-
|
|
179
|
-
-- SASL 认证
|
|
180
|
-
SELECT value::string
|
|
181
|
-
FROM read_kafka(
|
|
182
|
-
'kafka.example.com:9092',
|
|
183
|
-
'orders',
|
|
184
|
-
'',
|
|
185
|
-
'test_explore',
|
|
186
|
-
'', '', '', '',
|
|
187
|
-
'raw', 'raw', 0,
|
|
188
|
-
MAP(
|
|
189
|
-
'kafka.security.protocol', 'SASL_PLAINTEXT',
|
|
190
|
-
'kafka.sasl.mechanism', 'PLAIN',
|
|
191
|
-
'kafka.sasl.username', 'my_user',
|
|
192
|
-
'kafka.sasl.password', 'my_password',
|
|
193
|
-
'kafka.auto.offset.reset', 'earliest'
|
|
194
|
-
)
|
|
195
|
-
)
|
|
196
|
-
LIMIT 10;
|
|
129
|
+
CREATE PIPE <pipe_name>
|
|
130
|
+
VIRTUAL_CLUSTER = '<vcluster_name>'
|
|
131
|
+
[ BATCH_INTERVAL_IN_SECONDS = '<seconds>' ]
|
|
132
|
+
AS
|
|
133
|
+
INSERT INTO <schema>.<table>
|
|
134
|
+
SELECT <expressions>
|
|
135
|
+
FROM <stream_name>;
|
|
197
136
|
```
|
|
198
137
|
|
|
199
|
-
|
|
138
|
+
> Table Stream Pipe uses `INSERT INTO ... SELECT`, **not** `COPY INTO`.
|
|
200
139
|
|
|
201
|
-
|
|
140
|
+
---
|
|
202
141
|
|
|
203
|
-
|
|
142
|
+
## CREATE STORAGE CONNECTION
|
|
204
143
|
|
|
205
144
|
```sql
|
|
206
|
-
CREATE STORAGE CONNECTION IF NOT EXISTS <conn_name>
|
|
145
|
+
CREATE STORAGE CONNECTION [ IF NOT EXISTS ] <conn_name>
|
|
207
146
|
TYPE KAFKA
|
|
208
147
|
BOOTSTRAP_SERVERS = ['<host1>:<port1>', '<host2>:<port2>']
|
|
209
|
-
SECURITY_PROTOCOL = 'PLAINTEXT';
|
|
148
|
+
SECURITY_PROTOCOL = '<PLAINTEXT | SASL_PLAINTEXT>';
|
|
210
149
|
```
|
|
211
150
|
|
|
212
|
-
|
|
151
|
+
Drop: `DROP CONNECTION [ IF EXISTS ] <conn_name>;`
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## CREATE EXTERNAL TABLE (Kafka)
|
|
213
156
|
|
|
214
157
|
```sql
|
|
215
|
-
|
|
216
|
-
-- ⚠️ offset 是保留字,必须用反引号转义
|
|
217
|
-
CREATE EXTERNAL TABLE <ext_table_name> (
|
|
158
|
+
CREATE EXTERNAL TABLE <table_name> (
|
|
218
159
|
topic STRING,
|
|
219
160
|
partition INT,
|
|
220
161
|
`offset` BIGINT,
|
|
@@ -233,92 +174,117 @@ OPTIONS (
|
|
|
233
174
|
CONNECTION <conn_name>;
|
|
234
175
|
```
|
|
235
176
|
|
|
236
|
-
>
|
|
237
|
-
> -
|
|
238
|
-
> - `
|
|
239
|
-
> - 删除外部表用 `DROP TABLE`(不是 `DROP EXTERNAL TABLE`)
|
|
177
|
+
> - Column definitions **required** (error: `failed to detect columns` if omitted)
|
|
178
|
+
> - `offset`, `timestamp` are reserved words — backtick-escape always
|
|
179
|
+
> - Drop with `DROP TABLE` (not `DROP EXTERNAL TABLE`)
|
|
240
180
|
|
|
241
|
-
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## CREATE TABLE STREAM
|
|
242
184
|
|
|
243
185
|
```sql
|
|
244
186
|
CREATE TABLE STREAM <stream_name>
|
|
245
|
-
ON TABLE <
|
|
187
|
+
ON TABLE <source_table>
|
|
246
188
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'APPEND_ONLY');
|
|
247
189
|
```
|
|
248
190
|
|
|
249
|
-
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## ALTER PIPE
|
|
250
194
|
|
|
251
195
|
```sql
|
|
252
|
-
|
|
253
|
-
VIRTUAL_CLUSTER = '<vcluster_name>'
|
|
254
|
-
BATCH_INTERVAL_IN_SECONDS = '60'
|
|
255
|
-
AS
|
|
256
|
-
COPY INTO <target_table>
|
|
257
|
-
SELECT <expr> [, ...]
|
|
258
|
-
FROM <stream_name>;
|
|
196
|
+
ALTER PIPE <pipe_name> SET <property> = <value>;
|
|
259
197
|
```
|
|
260
198
|
|
|
199
|
+
Supported properties (one per ALTER):
|
|
200
|
+
|
|
201
|
+
| Property | Alterable | Notes |
|
|
202
|
+
|----------|-----------|-------|
|
|
203
|
+
| `PIPE_EXECUTION_PAUSED` | ✅ | `true` / `false` |
|
|
204
|
+
| `VIRTUAL_CLUSTER` | ✅ | New VCluster name |
|
|
205
|
+
| `COPY_JOB_HINT` | ✅ | JSON string; overwrites all hints |
|
|
206
|
+
| `BATCH_INTERVAL_IN_SECONDS` | ❌ | Drop + recreate |
|
|
207
|
+
| `BATCH_SIZE_PER_KAFKA_PARTITION` | ❌ | Drop + recreate |
|
|
208
|
+
| SELECT logic | ❌ | Drop + recreate |
|
|
209
|
+
|
|
261
210
|
---
|
|
262
211
|
|
|
263
|
-
##
|
|
212
|
+
## DROP PIPE
|
|
213
|
+
|
|
214
|
+
```sql
|
|
215
|
+
DROP PIPE [ IF EXISTS ] <pipe_name>;
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Monitoring Queries
|
|
264
221
|
|
|
265
222
|
```sql
|
|
266
|
-
--
|
|
267
|
-
|
|
223
|
+
-- Pipe details (includes pipe_latency JSON)
|
|
224
|
+
DESC PIPE EXTENDED <pipe_name>;
|
|
268
225
|
|
|
269
|
-
--
|
|
270
|
-
|
|
226
|
+
-- List all Pipes
|
|
227
|
+
SHOW PIPES;
|
|
271
228
|
|
|
272
|
-
--
|
|
273
|
-
|
|
229
|
+
-- Load history (retained 7 days)
|
|
230
|
+
SELECT * FROM load_history('<schema>.<table>') ORDER BY last_load_time DESC LIMIT 20;
|
|
274
231
|
|
|
275
|
-
--
|
|
276
|
-
|
|
232
|
+
-- Pipe jobs by query_tag
|
|
233
|
+
SHOW JOBS WHERE query_tag = 'pipe.<workspace>.<schema>.<pipe_name>';
|
|
277
234
|
```
|
|
278
235
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
> 不支持修改 COPY/INSERT 语句逻辑,需删除 Pipe 后重建。
|
|
287
|
-
> 修改 `COPY_JOB_HINT` 会覆盖所有已有 hints,需一次性设置全部参数。
|
|
236
|
+
### pipe_latency Fields
|
|
237
|
+
|
|
238
|
+
| Field | Description |
|
|
239
|
+
|-------|-------------|
|
|
240
|
+
| `lastConsumeTimestamp` | Timestamp of last consumed offset |
|
|
241
|
+
| `offsetLag` | Number of unconsumed messages |
|
|
242
|
+
| `timeLag` | Consumer lag in ms (-1 = abnormal) |
|
|
288
243
|
|
|
289
244
|
---
|
|
290
245
|
|
|
291
|
-
##
|
|
246
|
+
## JSON Field Extraction Patterns
|
|
292
247
|
|
|
293
248
|
```sql
|
|
294
|
-
--
|
|
295
|
-
|
|
249
|
+
-- Binary → String
|
|
250
|
+
value::string
|
|
296
251
|
|
|
297
|
-
--
|
|
298
|
-
|
|
252
|
+
-- String → JSON object
|
|
253
|
+
parse_json(value::string)
|
|
254
|
+
|
|
255
|
+
-- Extract top-level field
|
|
256
|
+
parse_json(value::string)['field']::TYPE
|
|
299
257
|
|
|
300
|
-
--
|
|
301
|
-
|
|
302
|
-
ORDER BY last_load_time DESC LIMIT 20;
|
|
258
|
+
-- Extract nested field
|
|
259
|
+
parse_json(value::string)['parent']['child']::TYPE
|
|
303
260
|
|
|
304
|
-
--
|
|
305
|
-
|
|
306
|
-
|
|
261
|
+
-- Deeply nested (string-within-string)
|
|
262
|
+
parse_json(parse_json(value::string)['outer']::STRING)['inner']::TYPE
|
|
263
|
+
|
|
264
|
+
-- Recommended: parse once in subquery
|
|
265
|
+
SELECT j['id']::STRING, j['amount']::DECIMAL(10,2)
|
|
266
|
+
FROM (SELECT parse_json(value::string) AS j FROM read_kafka(...))
|
|
307
267
|
```
|
|
308
268
|
|
|
309
269
|
---
|
|
310
270
|
|
|
311
|
-
##
|
|
271
|
+
## CSV Field Extraction Pattern
|
|
312
272
|
|
|
313
273
|
```sql
|
|
314
|
-
|
|
274
|
+
split(value::string, ',')[0]::STRING -- first field
|
|
275
|
+
split(value::string, ',')[1]::STRING -- second field
|
|
276
|
+
CAST(split(value::string, ',')[2] AS DECIMAL(10,2)) -- with type cast
|
|
315
277
|
```
|
|
316
278
|
|
|
317
|
-
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
## Reference Links
|
|
318
282
|
|
|
319
|
-
- [Pipe
|
|
320
|
-
- [
|
|
321
|
-
- [
|
|
322
|
-
- [
|
|
323
|
-
- [
|
|
283
|
+
- [Pipe Overview](https://www.yunqi.tech/documents/pipe-summary)
|
|
284
|
+
- [read_kafka Continuous Import](https://www.yunqi.tech/documents/pipe-kafka)
|
|
285
|
+
- [Kafka External Table + Table Stream](https://www.yunqi.tech/documents/pipe-kafka-table-stream)
|
|
286
|
+
- [Kafka Pipe Best Practice](https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1)
|
|
287
|
+
- [read_kafka Function](https://www.yunqi.tech/documents/read_kafka)
|
|
288
|
+
- [Kafka External Table](https://www.yunqi.tech/documents/kafka-external-table)
|
|
324
289
|
- [Kafka Storage Connection](https://www.yunqi.tech/documents/Kafka_connection)
|
|
290
|
+
- [PIPE Syntax](https://www.yunqi.tech/documents/pipe-syntax)
|