@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
- package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
- package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
- package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
- package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
- package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
- package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
- package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
- package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
- package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
- package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
- package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
- package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
- package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
- package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
- package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
- package/package.json +1 -1
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
|
@@ -1,18 +1,20 @@
|
|
|
1
|
-
# External Function DDL
|
|
1
|
+
# External Function DDL Reference
|
|
2
2
|
|
|
3
|
-
>
|
|
3
|
+
> Source: https://www.yunqi.tech/documents/CREATE_EXTERNATL_FUNCTION
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Concepts
|
|
6
6
|
|
|
7
|
-
External Function
|
|
8
|
-
-
|
|
9
|
-
-
|
|
7
|
+
An External Function is a custom UDF written in Python or Java and executed on a cloud function service (Alibaba Cloud FC / Tencent Cloud SCF / AWS Lambda). It can call:
|
|
8
|
+
- **Online services**: image recognition APIs, custom REST services, etc.
|
|
9
|
+
- **Offline models**: Hugging Face models packaged and uploaded
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
Supported function types: UDF (scalar), UDAF (aggregate, Java only), UDTF (table function, Java only)
|
|
12
|
+
|
|
13
|
+
> For built-in LLM functions (AI_COMPLETE, AI_EMBEDDING), see the `clickzetta-ai-function` skill.
|
|
12
14
|
|
|
13
15
|
---
|
|
14
16
|
|
|
15
|
-
## CREATE API CONNECTION
|
|
17
|
+
## CREATE API CONNECTION (Cloud Function)
|
|
16
18
|
|
|
17
19
|
```sql
|
|
18
20
|
CREATE API CONNECTION IF NOT EXISTS my_fc_conn
|
|
@@ -20,17 +22,17 @@ CREATE API CONNECTION IF NOT EXISTS my_fc_conn
|
|
|
20
22
|
PROVIDER = 'aliyun' -- 'aliyun' | 'tencent' | 'aws'
|
|
21
23
|
REGION = 'cn-shanghai'
|
|
22
24
|
ROLE_ARN = 'acs:ram::1234567890:role/CzUDFRole'
|
|
23
|
-
NAMESPACE = 'default' --
|
|
25
|
+
NAMESPACE = 'default' -- Required for Tencent Cloud; use 'default' for others
|
|
24
26
|
CODE_BUCKET = 'my-oss-bucket';
|
|
25
27
|
```
|
|
26
28
|
|
|
27
|
-
|
|
|
29
|
+
| Parameter | Description |
|
|
28
30
|
|---|---|
|
|
29
31
|
| PROVIDER | `'aliyun'` / `'tencent'` / `'aws'` |
|
|
30
|
-
| REGION |
|
|
31
|
-
| ROLE_ARN |
|
|
32
|
-
| NAMESPACE |
|
|
33
|
-
| CODE_BUCKET |
|
|
32
|
+
| REGION | Alibaba Cloud: `cn-shanghai`; Tencent Cloud: `ap-beijing`; AWS: `cn-northwest-1` |
|
|
33
|
+
| ROLE_ARN | RAM role ARN granted to Lakehouse |
|
|
34
|
+
| NAMESPACE | Tencent Cloud namespace (required); use `'default'` for others |
|
|
35
|
+
| CODE_BUCKET | OSS/COS/S3 bucket name where the function code package is stored |
|
|
34
36
|
|
|
35
37
|
---
|
|
36
38
|
|
|
@@ -44,10 +46,10 @@ CREATE EXTERNAL FUNCTION IF NOT EXISTS my_schema.my_udf
|
|
|
44
46
|
WITH PROPERTIES (
|
|
45
47
|
'remote.udf.api' = 'python3.mc.v0' -- Python: python3.mc.v0 | Java: java8.hive2.v0
|
|
46
48
|
)
|
|
47
|
-
COMMENT '
|
|
49
|
+
COMMENT 'Custom function description';
|
|
48
50
|
```
|
|
49
51
|
|
|
50
|
-
###
|
|
52
|
+
### Resource File Path Formats
|
|
51
53
|
|
|
52
54
|
```
|
|
53
55
|
-- OSS/COS/S3
|
|
@@ -55,24 +57,24 @@ oss://bucket-name/path/to/code.zip
|
|
|
55
57
|
cos://bucket-name/path/to/code.zip
|
|
56
58
|
s3://bucket-name/path/to/code.zip
|
|
57
59
|
|
|
58
|
-
-- User Volume
|
|
60
|
+
-- User Volume (no object storage required)
|
|
59
61
|
volume:user://~/code.zip
|
|
60
62
|
|
|
61
63
|
-- External Volume
|
|
62
64
|
volume://workspace.schema.volume_name/code.zip
|
|
63
65
|
```
|
|
64
66
|
|
|
65
|
-
### WITH PROPERTIES
|
|
67
|
+
### WITH PROPERTIES Parameters
|
|
66
68
|
|
|
67
|
-
|
|
|
69
|
+
| Parameter | Value | Description |
|
|
68
70
|
|---|---|---|
|
|
69
|
-
| `remote.udf.api` | `python3.mc.v0` | Python 3.10
|
|
70
|
-
| `remote.udf.api` | `java8.hive2.v0` | Java 8 Hive
|
|
71
|
-
| `remote.udf.protocol` | `http.arrow.v0` |
|
|
71
|
+
| `remote.udf.api` | `python3.mc.v0` | Python 3.10 runtime |
|
|
72
|
+
| `remote.udf.api` | `java8.hive2.v0` | Java 8 Hive-style UDF |
|
|
73
|
+
| `remote.udf.protocol` | `http.arrow.v0` | Default protocol for accessing the cloud function |
|
|
72
74
|
|
|
73
75
|
---
|
|
74
76
|
|
|
75
|
-
## Python UDF
|
|
77
|
+
## Python UDF Code Structure
|
|
76
78
|
|
|
77
79
|
```python
|
|
78
80
|
#!/usr/bin/env python
|
|
@@ -81,7 +83,7 @@ try:
|
|
|
81
83
|
except ImportError:
|
|
82
84
|
annotate = lambda _: lambda _: _
|
|
83
85
|
|
|
84
|
-
@annotate("string->string") #
|
|
86
|
+
@annotate("string->string") # Function signature: input_type->return_type
|
|
85
87
|
class Upper(object):
|
|
86
88
|
def evaluate(self, arg):
|
|
87
89
|
if arg is None:
|
|
@@ -89,83 +91,43 @@ class Upper(object):
|
|
|
89
91
|
return arg.upper()
|
|
90
92
|
```
|
|
91
93
|
|
|
92
|
-
###
|
|
94
|
+
### Function Signature Format
|
|
93
95
|
|
|
94
96
|
```
|
|
95
97
|
"input_type1,input_type2->return_type"
|
|
96
98
|
|
|
97
|
-
#
|
|
98
|
-
"string->string" #
|
|
99
|
-
"string,int->double" #
|
|
100
|
-
"string->array<string>" #
|
|
99
|
+
# Examples
|
|
100
|
+
"string->string" # String in, string out
|
|
101
|
+
"string,int->double" # Two inputs, returns double
|
|
102
|
+
"string->array<string>" # Returns an array
|
|
101
103
|
```
|
|
102
104
|
|
|
103
|
-
|
|
105
|
+
Supported types: `string`, `int`, `bigint`, `double`, `float`, `boolean`, `array<T>`, `map<K,V>`
|
|
104
106
|
|
|
105
|
-
###
|
|
107
|
+
### Packaging and Upload
|
|
106
108
|
|
|
107
109
|
```bash
|
|
108
|
-
#
|
|
110
|
+
# Install dependencies into the current directory
|
|
109
111
|
pip3 install httpx pydantic -t .
|
|
110
112
|
|
|
111
|
-
#
|
|
113
|
+
# Package (must be < 500 MB)
|
|
112
114
|
zip -rq code.zip ./*
|
|
113
115
|
```
|
|
114
116
|
|
|
115
117
|
```sql
|
|
116
|
-
--
|
|
118
|
+
-- Upload to User Volume (run in ClickZetta Studio or CLI; source_path must be an absolute path)
|
|
117
119
|
PUT '/path/to/code.zip' TO USER VOLUME;
|
|
118
120
|
```
|
|
119
121
|
|
|
120
122
|
---
|
|
121
123
|
|
|
122
|
-
##
|
|
124
|
+
## Management
|
|
123
125
|
|
|
124
126
|
```sql
|
|
125
|
-
--
|
|
127
|
+
-- List external functions
|
|
126
128
|
SHOW EXTERNAL FUNCTIONS;
|
|
127
129
|
SHOW EXTERNAL FUNCTIONS LIKE 'my_%';
|
|
128
130
|
|
|
129
|
-
--
|
|
131
|
+
-- Drop an external function
|
|
130
132
|
DROP FUNCTION IF EXISTS my_schema.my_udf;
|
|
131
133
|
```
|
|
132
|
-
|
|
133
|
-
---
|
|
134
|
-
|
|
135
|
-
## 内置 AI 函数(无需部署云函数)
|
|
136
|
-
|
|
137
|
-
### AI_COMPLETE(调用 LLM)
|
|
138
|
-
|
|
139
|
-
```sql
|
|
140
|
-
-- 通过 API Connection 调用(需先创建连接)
|
|
141
|
-
CREATE API CONNECTION conn_bailian
|
|
142
|
-
TYPE ai_function
|
|
143
|
-
PROVIDER = 'bailian'
|
|
144
|
-
BASE_URL = 'https://dashscope.aliyuncs.com/api/v1'
|
|
145
|
-
API_KEY = '<key>';
|
|
146
|
-
|
|
147
|
-
-- 调用 LLM 生成文本
|
|
148
|
-
SELECT AI_COMPLETE('connection:conn_bailian', '请用一句话总结:' || content) AS summary
|
|
149
|
-
FROM articles
|
|
150
|
-
LIMIT 10;
|
|
151
|
-
|
|
152
|
-
-- 通过平台 Endpoint 调用(管理员预配置)
|
|
153
|
-
SELECT AI_COMPLETE('endpoint:my_llm_endpoint', prompt_col) AS result
|
|
154
|
-
FROM my_table;
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
### AI_EMBEDDING(文本向量化)
|
|
158
|
-
|
|
159
|
-
```sql
|
|
160
|
-
-- 将文本转为向量(用于语义搜索)
|
|
161
|
-
SELECT id, content,
|
|
162
|
-
AI_EMBEDDING('connection:conn_bailian', content) AS embedding
|
|
163
|
-
FROM documents;
|
|
164
|
-
|
|
165
|
-
-- 结合向量索引做语义搜索
|
|
166
|
-
SELECT id, content,
|
|
167
|
-
cosine_distance(embedding, AI_EMBEDDING('connection:conn_bailian', '查询文本')) AS dist
|
|
168
|
-
FROM doc_embeddings
|
|
169
|
-
ORDER BY dist
|
|
170
|
-
LIMIT 10;
|
|
171
|
-
```
|
|
@@ -1,30 +1,31 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: clickzetta-java-sdk
|
|
3
3
|
description: |
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
"
|
|
10
|
-
"clickzetta-java"
|
|
4
|
+
Use the ClickZetta Java SDK to write data to Lakehouse tables in batch or in real time.
|
|
5
|
+
Covers complete usage patterns for BulkloadStream (local file/database batch uploads)
|
|
6
|
+
and RealtimeStream (Kafka real-time consumption and writes), including Maven dependencies,
|
|
7
|
+
connection URL formats, row write APIs, status monitoring, Options tuning, and common error handling.
|
|
8
|
+
Trigger when users say "Java SDK", "BulkloadStream", "RealtimeStream",
|
|
9
|
+
"write to Lakehouse with Java", "Java batch upload", "Kafka Java write",
|
|
10
|
+
"clickzetta-java", "Maven dependency", "Java data import",
|
|
11
|
+
"Java 写入 Lakehouse", "Java 批量上传", or "Kafka Java 写入".
|
|
11
12
|
Keywords: Java SDK, BulkloadStream, RealtimeStream, Kafka consumer, batch write, real-time write
|
|
12
13
|
---
|
|
13
14
|
|
|
14
15
|
# ClickZetta Java SDK
|
|
15
16
|
|
|
16
|
-
Java SDK
|
|
17
|
-
- **BulkloadStream**
|
|
18
|
-
- **RealtimeStream**
|
|
17
|
+
The Java SDK provides two write interfaces:
|
|
18
|
+
- **BulkloadStream** - batch writes for scheduled ETL and local file imports. It does not support primary-key tables and is not recommended for high-frequency writes under 5 minutes.
|
|
19
|
+
- **RealtimeStream** - real-time writes for Kafka consumption and streaming ingestion. Data can be queried within seconds.
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
Read [references/bulkload.md](references/bulkload.md) for batch writes and [references/realtime.md](references/realtime.md) for real-time writes.
|
|
21
22
|
|
|
22
23
|
---
|
|
23
24
|
|
|
24
|
-
## Maven
|
|
25
|
+
## Maven Dependency
|
|
25
26
|
|
|
26
27
|
```xml
|
|
27
|
-
<!--
|
|
28
|
+
<!-- See https://central.sonatype.com/artifact/com.clickzetta/clickzetta-java for the latest clickzetta-java version. -->
|
|
28
29
|
<dependency>
|
|
29
30
|
<groupId>com.clickzetta</groupId>
|
|
30
31
|
<artifactId>clickzetta-java</artifactId>
|
|
@@ -32,7 +33,7 @@ Java SDK 提供两种写入接口:
|
|
|
32
33
|
</dependency>
|
|
33
34
|
```
|
|
34
35
|
|
|
35
|
-
RealtimeStream
|
|
36
|
+
RealtimeStream with Kafka also requires:
|
|
36
37
|
|
|
37
38
|
```xml
|
|
38
39
|
<dependency>
|
|
@@ -44,10 +45,10 @@ RealtimeStream + Kafka 还需要:
|
|
|
44
45
|
|
|
45
46
|
---
|
|
46
47
|
|
|
47
|
-
##
|
|
48
|
+
## Connection URL Format
|
|
48
49
|
|
|
49
50
|
```java
|
|
50
|
-
//
|
|
51
|
+
// Recommended: explicit parameters. Supported in 2.0.0+ and does not depend on URL parsing.
|
|
51
52
|
ClickZettaClient client = ClickZettaClient.newBuilder()
|
|
52
53
|
.service("cn-shanghai-alicloud.api.clickzetta.com")
|
|
53
54
|
.instance("your_instance")
|
|
@@ -58,7 +59,7 @@ ClickZettaClient client = ClickZettaClient.newBuilder()
|
|
|
58
59
|
.vcluster("default")
|
|
59
60
|
.build();
|
|
60
61
|
|
|
61
|
-
//
|
|
62
|
+
// Compatible URL-based mode. BulkloadStream uses virtualcluster=, while RealtimeStream uses vcluster=.
|
|
62
63
|
String bulkUrl = MessageFormat.format(
|
|
63
64
|
"jdbc:clickzetta://{0}.{1}/{2}?schema={3}&username={4}&password={5}&virtualcluster={6}",
|
|
64
65
|
instance, region_endpoint, workspace, schema, username, password, vcluster
|
|
@@ -70,35 +71,35 @@ String rtUrl = MessageFormat.format(
|
|
|
70
71
|
ClickZettaClient client = ClickZettaClient.newBuilder().url(url).build();
|
|
71
72
|
```
|
|
72
73
|
|
|
73
|
-
JDBC
|
|
74
|
+
JDBC connection for DDL and queries:
|
|
74
75
|
|
|
75
76
|
```java
|
|
76
|
-
// 2.0.0
|
|
77
|
-
// 1.x
|
|
77
|
+
// Driver class for 2.0.0+: com.clickzetta.client.jdbc.ClickZettaDriver
|
|
78
|
+
// Driver class for 1.x: com.clickzetta.jdbc.ClickZettaDriver
|
|
78
79
|
Class.forName("com.clickzetta.client.jdbc.ClickZettaDriver");
|
|
79
80
|
Connection conn = DriverManager.getConnection(jdbcUrl);
|
|
80
81
|
```
|
|
81
82
|
|
|
82
83
|
---
|
|
83
84
|
|
|
84
|
-
## BulkloadStream
|
|
85
|
+
## BulkloadStream Quick Example
|
|
85
86
|
|
|
86
87
|
```java
|
|
87
|
-
//
|
|
88
|
+
// Create a BulkloadStream.
|
|
88
89
|
BulkloadStream stream = client.newBulkloadStreamBuilder()
|
|
89
90
|
.schema("public")
|
|
90
91
|
.table("orders")
|
|
91
92
|
.operate(RowStream.BulkLoadOperate.APPEND)
|
|
92
93
|
.build();
|
|
93
94
|
|
|
94
|
-
//
|
|
95
|
+
// Write data. Column indexes start at 0 and must match the table DDL order.
|
|
95
96
|
Row row = stream.createRow();
|
|
96
97
|
row.setValue(0, "order-001"); // STRING
|
|
97
98
|
row.setValue(1, 1); // INT
|
|
98
99
|
row.setValue(2, 299.99); // DOUBLE
|
|
99
|
-
stream.apply(row); //
|
|
100
|
+
stream.apply(row); // Required. Otherwise the row is not sent to the server.
|
|
100
101
|
|
|
101
|
-
//
|
|
102
|
+
// Close and wait for completion.
|
|
102
103
|
stream.close();
|
|
103
104
|
while (stream.getState() == StreamState.RUNNING) {
|
|
104
105
|
Thread.sleep(1000);
|
|
@@ -111,15 +112,15 @@ client.close();
|
|
|
111
112
|
|
|
112
113
|
---
|
|
113
114
|
|
|
114
|
-
## RealtimeStream
|
|
115
|
+
## RealtimeStream Quick Example
|
|
115
116
|
|
|
116
117
|
```java
|
|
117
|
-
// Options
|
|
118
|
+
// Options tuning.
|
|
118
119
|
Options options = Options.builder()
|
|
119
|
-
.withMutationBufferLinesNum(10) //
|
|
120
|
+
.withMutationBufferLinesNum(10) // Number of buffered rows.
|
|
120
121
|
.build();
|
|
121
122
|
|
|
122
|
-
//
|
|
123
|
+
// Create a RealtimeStream for a regular table in APPEND_ONLY mode.
|
|
123
124
|
RealtimeStream stream = client.newRealtimeStreamBuilder()
|
|
124
125
|
.operate(RowStream.RealTimeOperate.APPEND_ONLY)
|
|
125
126
|
.options(options)
|
|
@@ -127,7 +128,7 @@ RealtimeStream stream = client.newRealtimeStreamBuilder()
|
|
|
127
128
|
.table("events")
|
|
128
129
|
.build();
|
|
129
130
|
|
|
130
|
-
//
|
|
131
|
+
// Write data by column name, not by index.
|
|
131
132
|
Row row = stream.createRow(Stream.Operator.INSERT);
|
|
132
133
|
row.setValue("id", 1);
|
|
133
134
|
row.setValue("event", "{\"type\":\"click\"}");
|
|
@@ -135,26 +136,26 @@ stream.apply(row);
|
|
|
135
136
|
stream.close();
|
|
136
137
|
```
|
|
137
138
|
|
|
138
|
-
## RealtimeStream CDC
|
|
139
|
+
## RealtimeStream CDC Example for Primary-Key Tables
|
|
139
140
|
|
|
140
141
|
```java
|
|
141
|
-
//
|
|
142
|
+
// Table DDL: CREATE TABLE orders (txid STRING NOT NULL PRIMARY KEY, amount DOUBLE, status STRING);
|
|
142
143
|
|
|
143
144
|
RealtimeStream stream = client.newRealtimeStreamBuilder()
|
|
144
|
-
.operate(RowStream.RealTimeOperate.CDC) //
|
|
145
|
+
.operate(RowStream.RealTimeOperate.CDC) // Primary-key tables must use CDC.
|
|
145
146
|
.options(options)
|
|
146
147
|
.schema("public")
|
|
147
148
|
.table("orders")
|
|
148
149
|
.build();
|
|
149
150
|
|
|
150
|
-
// UPSERT
|
|
151
|
+
// UPSERT: update an existing row or insert a new row.
|
|
151
152
|
Row row = stream.createRow(Stream.Operator.UPSERT);
|
|
152
153
|
row.setValue("txid", "order-001");
|
|
153
154
|
row.setValue("amount", 299.99);
|
|
154
155
|
row.setValue("status", "paid");
|
|
155
156
|
stream.apply(row);
|
|
156
157
|
|
|
157
|
-
// DELETE_IGNORE
|
|
158
|
+
// DELETE_IGNORE: delete the row and ignore the operation if the target row does not exist.
|
|
158
159
|
Row del = stream.createRow(Stream.Operator.DELETE_IGNORE);
|
|
159
160
|
del.setValue("txid", "order-001");
|
|
160
161
|
stream.apply(del);
|
|
@@ -164,23 +165,23 @@ stream.close();
|
|
|
164
165
|
|
|
165
166
|
---
|
|
166
167
|
|
|
167
|
-
##
|
|
168
|
+
## Selection Guide
|
|
168
169
|
|
|
169
|
-
|
|
|
170
|
+
| Scenario | Recommended interface |
|
|
170
171
|
|---|---|
|
|
171
|
-
|
|
|
172
|
-
| Kafka
|
|
173
|
-
| 5
|
|
174
|
-
|
|
|
172
|
+
| Scheduled batch ETL, hourly or daily | BulkloadStream |
|
|
173
|
+
| Kafka real-time consumption | RealtimeStream |
|
|
174
|
+
| High-frequency writes under 5 minutes | RealtimeStream |
|
|
175
|
+
| Primary-key table writes with UPSERT or DELETE | RealtimeStream CDC mode |
|
|
175
176
|
|
|
176
177
|
---
|
|
177
178
|
|
|
178
|
-
##
|
|
179
|
+
## Usage Limits
|
|
179
180
|
|
|
180
|
-
|
|
|
181
|
+
| Limit | BulkloadStream | RealtimeStream |
|
|
181
182
|
|---|---|---|
|
|
182
|
-
|
|
|
183
|
-
|
|
|
184
|
-
|
|
|
185
|
-
| Table Stream/Dynamic Table
|
|
186
|
-
|
|
|
183
|
+
| Primary-key tables | Not supported | Supported in CDC mode |
|
|
184
|
+
| High-frequency writes under 5 minutes | Not recommended | Supported |
|
|
185
|
+
| Data visibility latency | Visible after `close()` | Visible after about 1 minute |
|
|
186
|
+
| Table Stream/Dynamic Table visibility | After `close()` | After about 1 minute |
|
|
187
|
+
| Schema changes | Recreate the stream | Stop the task and restart about 90 minutes after the schema change |
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
{"case_id":"001","type":"should_call","user_input":"
|
|
2
|
-
{"case_id":"002","type":"should_call","user_input":"Java
|
|
3
|
-
{"case_id":"003","type":"should_call","user_input":"
|
|
4
|
-
{"case_id":"004","type":"should_call","user_input":"BulkloadStream
|
|
5
|
-
{"case_id":"005","type":"should_call","user_input":"Java SDK
|
|
6
|
-
{"case_id":"006","type":"should_call","user_input":"
|
|
7
|
-
{"case_id":"007","type":"should_call","user_input":"
|
|
8
|
-
{"case_id":"008","type":"should_not_call","user_input":"
|
|
9
|
-
{"case_id":"009","type":"should_not_call","user_input":"
|
|
10
|
-
{"case_id":"010","type":"should_not_call","user_input":"Flink
|
|
11
|
-
{"case_id":"011","type":"should_not_call","user_input":"
|
|
12
|
-
{"case_id":"012","type":"should_not_call","user_input":"MySQL JDBC
|
|
1
|
+
{"case_id":"001","type":"should_call","user_input":"Use Java SDK BulkloadStream to batch write data to Lakehouse","expected_skill":"clickzetta-java-sdk","expected_output_contains":["BulkloadStream"]}
|
|
2
|
+
{"case_id":"002","type":"should_call","user_input":"How can Java consume Kafka and write to Lakehouse in real time","expected_skill":"clickzetta-java-sdk","expected_output_contains":["RealtimeStream"]}
|
|
3
|
+
{"case_id":"003","type":"should_call","user_input":"How do I configure the Maven dependency for clickzetta-java","expected_skill":"clickzetta-java-sdk","expected_output_contains":["groupId","clickzetta-java"]}
|
|
4
|
+
{"case_id":"004","type":"should_call","user_input":"What is the difference between BulkloadStream and RealtimeStream","expected_skill":"clickzetta-java-sdk","expected_output_contains":["BulkloadStream","RealtimeStream"]}
|
|
5
|
+
{"case_id":"005","type":"should_call","user_input":"What is the Java SDK connection URL format","expected_skill":"clickzetta-java-sdk","expected_output_contains":["URL"]}
|
|
6
|
+
{"case_id":"006","type":"should_call","user_input":"Batch upload a local file to Lakehouse with Java","expected_skill":"clickzetta-java-sdk","expected_output_contains":["BulkloadStream"]}
|
|
7
|
+
{"case_id":"007","type":"should_call","user_input":"How do I use setValue with RealtimeStream","expected_skill":"clickzetta-java-sdk","expected_output_contains":["setValue"]}
|
|
8
|
+
{"case_id":"008","type":"should_not_call","user_input":"How do I connect to Lakehouse with the Python SDK","forbidden_skill":"clickzetta-java-sdk"}
|
|
9
|
+
{"case_id":"009","type":"should_not_call","user_input":"Help me write a Spring Boot application","forbidden_skill":"clickzetta-java-sdk"}
|
|
10
|
+
{"case_id":"010","type":"should_not_call","user_input":"How does Flink write to Lakehouse","forbidden_skill":"clickzetta-java-sdk"}
|
|
11
|
+
{"case_id":"011","type":"should_not_call","user_input":"How do I create a VCluster","forbidden_skill":"clickzetta-java-sdk"}
|
|
12
|
+
{"case_id":"012","type":"should_not_call","user_input":"How do I configure a MySQL JDBC connection","forbidden_skill":"clickzetta-java-sdk"}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
# BulkloadStream
|
|
1
|
+
# BulkloadStream Detailed Reference
|
|
2
2
|
|
|
3
|
-
>
|
|
4
|
-
>
|
|
3
|
+
> Best for: scheduled ETL, local file imports, and database migration.
|
|
4
|
+
> Not for: primary-key tables or high-frequency writes under 5 minutes.
|
|
5
5
|
|
|
6
|
-
## Maven
|
|
6
|
+
## Maven Dependency
|
|
7
7
|
|
|
8
8
|
```xml
|
|
9
|
-
<!--
|
|
9
|
+
<!-- See https://central.sonatype.com/artifact/com.clickzetta/clickzetta-java for the latest version. -->
|
|
10
10
|
<dependency>
|
|
11
11
|
<groupId>com.clickzetta</groupId>
|
|
12
12
|
<artifactId>clickzetta-java</artifactId>
|
|
@@ -14,17 +14,17 @@
|
|
|
14
14
|
</dependency>
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
See [Maven Central](https://central.sonatype.com/artifact/com.clickzetta/clickzetta-java) for the latest version.
|
|
18
18
|
|
|
19
|
-
##
|
|
19
|
+
## Usage Limits
|
|
20
20
|
|
|
21
|
-
-
|
|
22
|
-
-
|
|
23
|
-
-
|
|
21
|
+
- **Primary-key table writes are not supported.**
|
|
22
|
+
- **High-frequency writes at intervals shorter than 5 minutes are not recommended.**
|
|
23
|
+
- Data becomes visible only after writing is complete and `close()` has been called.
|
|
24
24
|
|
|
25
|
-
##
|
|
25
|
+
## Complete Example: Read a Local CSV and Write to Lakehouse
|
|
26
26
|
|
|
27
|
-
###
|
|
27
|
+
### Create the Table
|
|
28
28
|
|
|
29
29
|
```sql
|
|
30
30
|
CREATE TABLE bulk_order_items (
|
|
@@ -38,7 +38,7 @@ CREATE TABLE bulk_order_items (
|
|
|
38
38
|
);
|
|
39
39
|
```
|
|
40
40
|
|
|
41
|
-
### Java
|
|
41
|
+
### Java Code: BulkloadFile Class
|
|
42
42
|
|
|
43
43
|
```java
|
|
44
44
|
import com.clickzetta.client.BulkloadStream;
|
|
@@ -66,12 +66,12 @@ public class BulkloadFile {
|
|
|
66
66
|
initialize();
|
|
67
67
|
File csvFile = new File("olist_order_items_dataset.csv");
|
|
68
68
|
BufferedReader reader = new BufferedReader(new FileReader(csvFile));
|
|
69
|
-
reader.readLine(); //
|
|
69
|
+
reader.readLine(); // Skip the header row.
|
|
70
70
|
|
|
71
71
|
String line;
|
|
72
72
|
while ((line = reader.readLine()) != null) {
|
|
73
73
|
String[] values = line.split(",");
|
|
74
|
-
//
|
|
74
|
+
// Type conversion must match the table DDL.
|
|
75
75
|
String orderId = values[0];
|
|
76
76
|
int orderItemId = Integer.parseInt(values[1]);
|
|
77
77
|
String productId = values[2];
|
|
@@ -81,7 +81,7 @@ public class BulkloadFile {
|
|
|
81
81
|
double freightValue = Double.parseDouble(values[6]);
|
|
82
82
|
|
|
83
83
|
Row row = bulkloadStream.createRow();
|
|
84
|
-
//
|
|
84
|
+
// BulkloadStream uses column indexes starting at 0. The order must match the table DDL.
|
|
85
85
|
row.setValue(0, orderId);
|
|
86
86
|
row.setValue(1, orderItemId);
|
|
87
87
|
row.setValue(2, productId);
|
|
@@ -89,7 +89,7 @@ public class BulkloadFile {
|
|
|
89
89
|
row.setValue(4, shippingLimitDate);
|
|
90
90
|
row.setValue(5, price);
|
|
91
91
|
row.setValue(6, freightValue);
|
|
92
|
-
//
|
|
92
|
+
// apply() is required. Otherwise the row is not sent to the server.
|
|
93
93
|
bulkloadStream.apply(row);
|
|
94
94
|
}
|
|
95
95
|
|
|
@@ -101,7 +101,7 @@ public class BulkloadFile {
|
|
|
101
101
|
}
|
|
102
102
|
|
|
103
103
|
private static void initialize() throws Exception {
|
|
104
|
-
//
|
|
104
|
+
// Recommended: explicit parameters. Supported in 2.0.0+.
|
|
105
105
|
client = ClickZettaClient.newBuilder()
|
|
106
106
|
.service("cn-shanghai-alicloud.api.clickzetta.com")
|
|
107
107
|
.instance("your_instance")
|
|
@@ -129,20 +129,20 @@ public class BulkloadFile {
|
|
|
129
129
|
}
|
|
130
130
|
```
|
|
131
131
|
|
|
132
|
-
##
|
|
132
|
+
## Key APIs
|
|
133
133
|
|
|
134
|
-
| API |
|
|
134
|
+
| API | Description |
|
|
135
135
|
|---|---|
|
|
136
|
-
| `bulkloadStream.createRow()` |
|
|
137
|
-
| `row.setValue(int index, Object value)` |
|
|
138
|
-
| `bulkloadStream.apply(row)` |
|
|
139
|
-
| `bulkloadStream.close()` |
|
|
140
|
-
| `bulkloadStream.getState()` |
|
|
141
|
-
| `bulkloadStream.getErrorMessage()` |
|
|
136
|
+
| `bulkloadStream.createRow()` | Create a row object without arguments. |
|
|
137
|
+
| `row.setValue(int index, Object value)` | Set a value by column index, starting at 0. |
|
|
138
|
+
| `bulkloadStream.apply(row)` | Send the row to the server. This call is required. |
|
|
139
|
+
| `bulkloadStream.close()` | Close the stream and trigger the commit. |
|
|
140
|
+
| `bulkloadStream.getState()` | Get the state: RUNNING, SUCCEEDED, or FAILED. |
|
|
141
|
+
| `bulkloadStream.getErrorMessage()` | Get the failure reason. |
|
|
142
142
|
|
|
143
|
-
##
|
|
143
|
+
## Type Mapping
|
|
144
144
|
|
|
145
|
-
| Java
|
|
145
|
+
| Java type | Lakehouse type |
|
|
146
146
|
|---|---|
|
|
147
147
|
| `Long` / `long` | BIGINT |
|
|
148
148
|
| `Integer` / `int` | INT |
|
|
@@ -153,11 +153,11 @@ public class BulkloadFile {
|
|
|
153
153
|
| `java.sql.Date` | DATE |
|
|
154
154
|
| `BigDecimal` | DECIMAL |
|
|
155
155
|
|
|
156
|
-
##
|
|
156
|
+
## FAQ
|
|
157
157
|
|
|
158
|
-
|
|
|
158
|
+
| Issue | Cause | Solution |
|
|
159
159
|
|---|---|---|
|
|
160
|
-
|
|
|
161
|
-
|
|
|
162
|
-
|
|
|
163
|
-
|
|
|
160
|
+
| Data cannot be queried after writing | `apply()` was not called or the RUNNING state has not finished | Call `apply()` for every row and wait until the state becomes SUCCEEDED. |
|
|
161
|
+
| Primary-key table write fails | BulkloadStream does not support primary-key tables | Use JDBC with MERGE or Flink `igs-dynamic-table` instead. |
|
|
162
|
+
| Column value type mismatch | Java types do not match the table DDL | Convert values before writing, for example with `parseInt` or `parseDouble`. |
|
|
163
|
+
| Connection fails | Wrong URL parameter name | BulkloadStream uses `virtualcluster=`, not `vcluster=`. |
|