@clickzetta/cz-cli-linux-x64 0.3.2 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
- package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
- package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
- package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
- package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
- package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
- package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
- package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -450
- package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
- package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
- package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
- package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
- package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
- package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
- package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
- package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
- package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
- package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
- package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
- package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -86
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
- package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
- package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -117
- package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
- package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
- package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
- package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
- package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
- package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
- package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
- package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
- package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
- package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -531
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -186
- package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
- package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
- package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
- package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
- package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
- package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
- package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
- package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
- package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
- package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
- package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
- package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -402
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
- package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -353
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -173
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -160
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -123
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
- package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -155
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
- package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -249
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -194
- package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
# BulkloadStream 详细参考
|
|
2
|
-
|
|
3
|
-
> 适合:定时 ETL、本地文件导入、数据库迁移
|
|
4
|
-
> 不适合:主键表、5 分钟以内高频写入
|
|
5
|
-
|
|
6
|
-
## Maven 依赖
|
|
7
|
-
|
|
8
|
-
```xml
|
|
9
|
-
<!-- 最新版本见 https://central.sonatype.com/artifact/com.clickzetta/clickzetta-java -->
|
|
10
|
-
<dependency>
|
|
11
|
-
<groupId>com.clickzetta</groupId>
|
|
12
|
-
<artifactId>clickzetta-java</artifactId>
|
|
13
|
-
<version>2.0.0</version>
|
|
14
|
-
</dependency>
|
|
15
|
-
```
|
|
16
|
-
|
|
17
|
-
最新版本见 [Maven Central](https://central.sonatype.com/artifact/com.clickzetta/clickzetta-java)
|
|
18
|
-
|
|
19
|
-
## 使用限制
|
|
20
|
-
|
|
21
|
-
- **不支持主键(pk)表写入**
|
|
22
|
-
- **不适合时间间隔小于 5 分钟的高频写入**
|
|
23
|
-
- 写入完成 `close()` 后数据才可见
|
|
24
|
-
|
|
25
|
-
## 完整示例:读取本地 CSV 写入 Lakehouse
|
|
26
|
-
|
|
27
|
-
### 建表
|
|
28
|
-
|
|
29
|
-
```sql
|
|
30
|
-
CREATE TABLE bulk_order_items (
|
|
31
|
-
order_id STRING,
|
|
32
|
-
order_item_id INT,
|
|
33
|
-
product_id STRING,
|
|
34
|
-
seller_id STRING,
|
|
35
|
-
shipping_limit_date STRING,
|
|
36
|
-
price DOUBLE,
|
|
37
|
-
freight_value DOUBLE
|
|
38
|
-
);
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
### Java 代码(BulkloadFile 类)
|
|
42
|
-
|
|
43
|
-
```java
|
|
44
|
-
import com.clickzetta.client.BulkloadStream;
|
|
45
|
-
import com.clickzetta.client.ClickZettaClient;
|
|
46
|
-
import com.clickzetta.client.RowStream;
|
|
47
|
-
import com.clickzetta.client.StreamState;
|
|
48
|
-
import com.clickzetta.platform.client.api.Row;
|
|
49
|
-
|
|
50
|
-
import java.io.BufferedReader;
|
|
51
|
-
import java.io.File;
|
|
52
|
-
import java.io.FileReader;
|
|
53
|
-
import java.text.MessageFormat;
|
|
54
|
-
|
|
55
|
-
public class BulkloadFile {
|
|
56
|
-
private static ClickZettaClient client;
|
|
57
|
-
private static final String password = "";
|
|
58
|
-
private static final String table = "bulk_order_items";
|
|
59
|
-
private static final String workspace = "";
|
|
60
|
-
private static final String schema = "public";
|
|
61
|
-
private static final String vc = "default";
|
|
62
|
-
private static final String user = "";
|
|
63
|
-
static BulkloadStream bulkloadStream;
|
|
64
|
-
|
|
65
|
-
public static void main(String[] args) throws Exception {
|
|
66
|
-
initialize();
|
|
67
|
-
File csvFile = new File("olist_order_items_dataset.csv");
|
|
68
|
-
BufferedReader reader = new BufferedReader(new FileReader(csvFile));
|
|
69
|
-
reader.readLine(); // 跳过 header 行
|
|
70
|
-
|
|
71
|
-
String line;
|
|
72
|
-
while ((line = reader.readLine()) != null) {
|
|
73
|
-
String[] values = line.split(",");
|
|
74
|
-
// 类型转换必须与建表 DDL 一致
|
|
75
|
-
String orderId = values[0];
|
|
76
|
-
int orderItemId = Integer.parseInt(values[1]);
|
|
77
|
-
String productId = values[2];
|
|
78
|
-
String sellerId = values[3];
|
|
79
|
-
String shippingLimitDate = values[4];
|
|
80
|
-
double price = Double.parseDouble(values[5]);
|
|
81
|
-
double freightValue = Double.parseDouble(values[6]);
|
|
82
|
-
|
|
83
|
-
Row row = bulkloadStream.createRow();
|
|
84
|
-
// ⚠️ BulkloadStream 用列索引(从 0 开始),顺序与建表 DDL 一致
|
|
85
|
-
row.setValue(0, orderId);
|
|
86
|
-
row.setValue(1, orderItemId);
|
|
87
|
-
row.setValue(2, productId);
|
|
88
|
-
row.setValue(3, sellerId);
|
|
89
|
-
row.setValue(4, shippingLimitDate);
|
|
90
|
-
row.setValue(5, price);
|
|
91
|
-
row.setValue(6, freightValue);
|
|
92
|
-
// ⚠️ 必须调用 apply(),否则数据不发送到服务端
|
|
93
|
-
bulkloadStream.apply(row);
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
reader.close();
|
|
97
|
-
bulkloadStream.close();
|
|
98
|
-
waitForBulkloadCompletion();
|
|
99
|
-
client.close();
|
|
100
|
-
System.out.println("Data inserted successfully!");
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
private static void initialize() throws Exception {
|
|
104
|
-
// 推荐:显式参数方式(2.0.0+ 支持)
|
|
105
|
-
client = ClickZettaClient.newBuilder()
|
|
106
|
-
.service("cn-shanghai-alicloud.api.clickzetta.com")
|
|
107
|
-
.instance("your_instance")
|
|
108
|
-
.workspace(workspace)
|
|
109
|
-
.schema(schema)
|
|
110
|
-
.username(user)
|
|
111
|
-
.password(password)
|
|
112
|
-
.vcluster(vc)
|
|
113
|
-
.build();
|
|
114
|
-
bulkloadStream = client.newBulkloadStreamBuilder()
|
|
115
|
-
.schema(schema)
|
|
116
|
-
.table(table)
|
|
117
|
-
.operate(RowStream.BulkLoadOperate.APPEND)
|
|
118
|
-
.build();
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
private static void waitForBulkloadCompletion() throws InterruptedException {
|
|
122
|
-
while (bulkloadStream.getState() == StreamState.RUNNING) {
|
|
123
|
-
Thread.sleep(1000);
|
|
124
|
-
}
|
|
125
|
-
if (bulkloadStream.getState() == StreamState.FAILED) {
|
|
126
|
-
throw new RuntimeException(bulkloadStream.getErrorMessage());
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
## 关键 API
|
|
133
|
-
|
|
134
|
-
| API | 说明 |
|
|
135
|
-
|---|---|
|
|
136
|
-
| `bulkloadStream.createRow()` | 创建行对象(无参数) |
|
|
137
|
-
| `row.setValue(int index, Object value)` | 按列索引设值(从 0 开始) |
|
|
138
|
-
| `bulkloadStream.apply(row)` | 发送行到服务端(必须调用) |
|
|
139
|
-
| `bulkloadStream.close()` | 关闭并触发提交 |
|
|
140
|
-
| `bulkloadStream.getState()` | 获取状态:RUNNING / SUCCEEDED / FAILED |
|
|
141
|
-
| `bulkloadStream.getErrorMessage()` | 获取失败原因 |
|
|
142
|
-
|
|
143
|
-
## 类型映射
|
|
144
|
-
|
|
145
|
-
| Java 类型 | Lakehouse 类型 |
|
|
146
|
-
|---|---|
|
|
147
|
-
| `Long` / `long` | BIGINT |
|
|
148
|
-
| `Integer` / `int` | INT |
|
|
149
|
-
| `Double` / `double` | DOUBLE |
|
|
150
|
-
| `String` | STRING / VARCHAR |
|
|
151
|
-
| `Boolean` | BOOLEAN |
|
|
152
|
-
| `java.sql.Timestamp` | TIMESTAMP |
|
|
153
|
-
| `java.sql.Date` | DATE |
|
|
154
|
-
| `BigDecimal` | DECIMAL |
|
|
155
|
-
|
|
156
|
-
## 常见问题
|
|
157
|
-
|
|
158
|
-
| 问题 | 原因 | 解决方案 |
|
|
159
|
-
|---|---|---|
|
|
160
|
-
| 数据写入后查不到 | 未调用 `apply()` 或未等待 RUNNING 结束 | 确认每行都调用 `apply()`,等待状态变为 SUCCEEDED |
|
|
161
|
-
| 主键表写入报错 | BulkloadStream 不支持主键表 | 改用 JDBC + MERGE 或 Flink igs-dynamic-table |
|
|
162
|
-
| 列值类型不匹配 | Java 类型与建表 DDL 不一致 | 写入前做类型转换(parseInt、parseDouble 等) |
|
|
163
|
-
| 连接失败 | URL 参数名错误 | BulkloadStream 用 `virtualcluster=`,不是 `vcluster=` |
|
|
@@ -1,212 +0,0 @@
|
|
|
1
|
-
# RealtimeStream 实时写入参考
|
|
2
|
-
|
|
3
|
-
> 适合:Kafka 消费写入、高频实时数据接入(秒级可查)、主键表 CDC 写入
|
|
4
|
-
|
|
5
|
-
## Maven 依赖
|
|
6
|
-
|
|
7
|
-
```xml
|
|
8
|
-
<!-- 最新版本见 https://central.sonatype.com/artifact/com.clickzetta/clickzetta-java -->
|
|
9
|
-
<dependency>
|
|
10
|
-
<groupId>com.clickzetta</groupId>
|
|
11
|
-
<artifactId>clickzetta-java</artifactId>
|
|
12
|
-
<version>2.0.0</version>
|
|
13
|
-
</dependency>
|
|
14
|
-
<dependency>
|
|
15
|
-
<groupId>org.apache.kafka</groupId>
|
|
16
|
-
<artifactId>kafka-clients</artifactId>
|
|
17
|
-
<version>3.2.0</version>
|
|
18
|
-
</dependency>
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
## 使用限制
|
|
22
|
-
|
|
23
|
-
- 实时写入的数据可以秒级查询
|
|
24
|
-
- table stream、dynamic table 需等待约 **1 分钟**才能看到写入数据
|
|
25
|
-
- 表结构变更时,需停止任务,变更后约 **90 分钟**重新启动
|
|
26
|
-
|
|
27
|
-
## 操作模式
|
|
28
|
-
|
|
29
|
-
| 模式 | 适用表 | 可用 Operator |
|
|
30
|
-
|---|---|---|
|
|
31
|
-
| `RealTimeOperate.APPEND_ONLY` | 普通表 | `Stream.Operator.INSERT` |
|
|
32
|
-
| `RealTimeOperate.CDC` | 主键表 | `Stream.Operator.UPSERT`、`Stream.Operator.DELETE_IGNORE` |
|
|
33
|
-
|
|
34
|
-
## 普通表写入(APPEND_ONLY)
|
|
35
|
-
|
|
36
|
-
```java
|
|
37
|
-
// 推荐:显式参数方式(2.0.0+ 支持,不依赖 URL 解析)
|
|
38
|
-
ClickZettaClient client = ClickZettaClient.newBuilder()
|
|
39
|
-
.service("cn-shanghai-alicloud.api.clickzetta.com")
|
|
40
|
-
.instance("your_instance")
|
|
41
|
-
.workspace(workspace)
|
|
42
|
-
.schema(schema)
|
|
43
|
-
.username(user)
|
|
44
|
-
.password(password)
|
|
45
|
-
.vcluster(vc)
|
|
46
|
-
.build();
|
|
47
|
-
Options options = Options.builder().withMutationBufferLinesNum(10).build();
|
|
48
|
-
|
|
49
|
-
RealtimeStream stream = client.newRealtimeStreamBuilder()
|
|
50
|
-
.operate(RowStream.RealTimeOperate.APPEND_ONLY)
|
|
51
|
-
.options(options)
|
|
52
|
-
.schema(schema)
|
|
53
|
-
.table("events")
|
|
54
|
-
.build();
|
|
55
|
-
|
|
56
|
-
// ⚠️ RealtimeStream 用列名(不是索引)
|
|
57
|
-
Row row = stream.createRow(Stream.Operator.INSERT);
|
|
58
|
-
row.setValue("id", 1);
|
|
59
|
-
row.setValue("event", "{\"type\":\"click\"}");
|
|
60
|
-
stream.apply(row);
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
## 主键表写入(CDC 模式)
|
|
64
|
-
|
|
65
|
-
```java
|
|
66
|
-
// 建表(主键表)
|
|
67
|
-
// CREATE TABLE orders (`txid` STRING PRIMARY KEY, `amount` DOUBLE, `status` STRING);
|
|
68
|
-
|
|
69
|
-
RealtimeStream stream = client.newRealtimeStreamBuilder()
|
|
70
|
-
.operate(RowStream.RealTimeOperate.CDC)
|
|
71
|
-
.options(options)
|
|
72
|
-
.schema(schema)
|
|
73
|
-
.table("orders")
|
|
74
|
-
.build();
|
|
75
|
-
|
|
76
|
-
// UPSERT:存在则更新,不存在则插入
|
|
77
|
-
Row row = stream.createRow(Stream.Operator.UPSERT);
|
|
78
|
-
row.setValue("txid", "order-001");
|
|
79
|
-
row.setValue("amount", 299.99);
|
|
80
|
-
row.setValue("status", "paid");
|
|
81
|
-
stream.apply(row);
|
|
82
|
-
|
|
83
|
-
// DELETE_IGNORE:删除,目标行不存在时自动忽略
|
|
84
|
-
Row delRow = stream.createRow(Stream.Operator.DELETE_IGNORE);
|
|
85
|
-
delRow.setValue("txid", "order-001");
|
|
86
|
-
stream.apply(delRow);
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
## 完整示例:Kafka → Lakehouse
|
|
90
|
-
|
|
91
|
-
### KafkaReader 类
|
|
92
|
-
|
|
93
|
-
```java
|
|
94
|
-
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
|
95
|
-
import org.apache.kafka.clients.consumer.KafkaConsumer;
|
|
96
|
-
import java.util.Collections;
|
|
97
|
-
import java.util.Properties;
|
|
98
|
-
|
|
99
|
-
public class KafkaReader {
|
|
100
|
-
private KafkaConsumer<String, String> consumer;
|
|
101
|
-
|
|
102
|
-
public KafkaReader() {
|
|
103
|
-
Properties props = new Properties();
|
|
104
|
-
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
|
105
|
-
props.put(ConsumerConfig.GROUP_ID_CONFIG, "test-group");
|
|
106
|
-
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
|
|
107
|
-
"org.apache.kafka.common.serialization.StringDeserializer");
|
|
108
|
-
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
|
|
109
|
-
"org.apache.kafka.common.serialization.StringDeserializer");
|
|
110
|
-
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
|
|
111
|
-
props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
|
|
112
|
-
consumer = new KafkaConsumer<>(props);
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
public KafkaConsumer<String, String> readFromTopic(String topic) {
|
|
116
|
-
consumer.subscribe(Collections.singleton(topic));
|
|
117
|
-
return consumer;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
### Kafka2Lakehouse 主类
|
|
123
|
-
|
|
124
|
-
```java
|
|
125
|
-
import com.clickzetta.client.ClickZettaClient;
|
|
126
|
-
import com.clickzetta.client.RealtimeStream;
|
|
127
|
-
import com.clickzetta.client.RowStream;
|
|
128
|
-
import com.clickzetta.platform.client.api.Options;
|
|
129
|
-
import com.clickzetta.platform.client.api.Row;
|
|
130
|
-
import com.clickzetta.platform.client.api.Stream;
|
|
131
|
-
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
|
132
|
-
import org.apache.kafka.clients.consumer.ConsumerRecords;
|
|
133
|
-
import org.apache.kafka.clients.consumer.KafkaConsumer;
|
|
134
|
-
import java.time.Duration;
|
|
135
|
-
|
|
136
|
-
public class Kafka2Lakehouse {
|
|
137
|
-
private static ClickZettaClient client;
|
|
138
|
-
private static final String password = "";
|
|
139
|
-
private static final String table = "realtime_stream";
|
|
140
|
-
private static final String workspace = "";
|
|
141
|
-
private static final String schema = "public";
|
|
142
|
-
private static final String user = "";
|
|
143
|
-
private static final String vc = "default";
|
|
144
|
-
static RealtimeStream realtimeStream;
|
|
145
|
-
|
|
146
|
-
public static void main(String[] args) throws Exception {
|
|
147
|
-
initialize();
|
|
148
|
-
KafkaReader kafkaReader = new KafkaReader();
|
|
149
|
-
final KafkaConsumer<String, String> consumer = kafkaReader.readFromTopic("lakehouse-stream");
|
|
150
|
-
int i = 1;
|
|
151
|
-
while (true) {
|
|
152
|
-
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
|
|
153
|
-
for (ConsumerRecord<String, String> record : records) {
|
|
154
|
-
Row row = realtimeStream.createRow(Stream.Operator.INSERT);
|
|
155
|
-
row.setValue("id", i++);
|
|
156
|
-
row.setValue("event", record.value());
|
|
157
|
-
realtimeStream.apply(row);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
private static void initialize() throws Exception {
|
|
163
|
-
Options options = Options.builder().withMutationBufferLinesNum(10).build();
|
|
164
|
-
client = ClickZettaClient.newBuilder()
|
|
165
|
-
.service("cn-shanghai-alicloud.api.clickzetta.com")
|
|
166
|
-
.instance("your_instance")
|
|
167
|
-
.workspace(workspace)
|
|
168
|
-
.schema(schema)
|
|
169
|
-
.username(user)
|
|
170
|
-
.password(password)
|
|
171
|
-
.vcluster(vc)
|
|
172
|
-
.build();
|
|
173
|
-
realtimeStream = client.newRealtimeStreamBuilder()
|
|
174
|
-
.operate(RowStream.RealTimeOperate.APPEND_ONLY)
|
|
175
|
-
.options(options)
|
|
176
|
-
.schema(schema)
|
|
177
|
-
.table(table)
|
|
178
|
-
.build();
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
```
|
|
182
|
-
|
|
183
|
-
## 关键 API
|
|
184
|
-
|
|
185
|
-
| API | 说明 |
|
|
186
|
-
|---|---|
|
|
187
|
-
| `realtimeStream.createRow(Stream.Operator.INSERT)` | 普通表插入行 |
|
|
188
|
-
| `realtimeStream.createRow(Stream.Operator.UPSERT)` | 主键表 upsert 行 |
|
|
189
|
-
| `realtimeStream.createRow(Stream.Operator.DELETE_IGNORE)` | 主键表删除行 |
|
|
190
|
-
| `row.setValue(String columnName, Object value)` | 按列名设值(不是索引) |
|
|
191
|
-
| `realtimeStream.apply(row)` | 发送行到服务端 |
|
|
192
|
-
| `Options.builder().withMutationBufferLinesNum(n)` | 设置缓冲行数(默认 10) |
|
|
193
|
-
|
|
194
|
-
## BulkloadStream vs RealtimeStream 对比
|
|
195
|
-
|
|
196
|
-
| 维度 | BulkloadStream | RealtimeStream |
|
|
197
|
-
|---|---|---|
|
|
198
|
-
| 列设值方式 | `setValue(int index, value)` | `setValue(String name, value)` |
|
|
199
|
-
| URL 参数 | `virtualcluster=` | `vcluster=` |
|
|
200
|
-
| createRow 参数 | 无参数 | `Stream.Operator.INSERT/UPSERT/DELETE_IGNORE` |
|
|
201
|
-
| 适用频率 | 低频(≥5 分钟/批) | 高频(秒级) |
|
|
202
|
-
| 数据可见延迟 | close() 后可见 | ~1 分钟后可见 |
|
|
203
|
-
| 主键表 | ❌ | ✅ CDC 模式 |
|
|
204
|
-
|
|
205
|
-
## 常见问题
|
|
206
|
-
|
|
207
|
-
| 问题 | 原因 | 解决方案 |
|
|
208
|
-
|---|---|---|
|
|
209
|
-
| 连接失败 | URL 参数名错误 | RealtimeStream 用 `vcluster=`,不是 `virtualcluster=` |
|
|
210
|
-
| 列名找不到 | 列名拼写错误 | 列名区分大小写,与建表 DDL 保持一致 |
|
|
211
|
-
| 表结构变更后写入失败 | 旧 Stream 实例缓存了旧 schema | 停止任务,变更后等约 90 分钟再重启 |
|
|
212
|
-
| dynamic table 看不到数据 | 实时写入有 ~1 分钟确认延迟 | 等待 1 分钟后再查询 |
|