@clickzetta/cz-cli-linux-x64 0.3.2 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
- package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
- package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
- package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
- package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
- package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
- package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
- package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -450
- package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
- package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
- package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
- package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
- package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
- package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
- package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
- package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
- package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
- package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
- package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
- package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -86
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
- package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
- package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -117
- package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
- package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
- package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
- package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
- package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
- package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
- package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
- package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
- package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
- package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -531
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -186
- package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
- package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
- package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
- package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
- package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
- package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
- package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
- package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
- package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
- package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
- package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
- package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -402
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
- package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -353
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -173
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -160
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -123
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
- package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -155
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
- package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -249
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -194
- package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
# 语义视图完整语法参考
|
|
2
|
-
|
|
3
|
-
> 来源:https://www.yunqi.tech/documents/semantic_view
|
|
4
|
-
> 功能状态:邀测(1.3 版本起)
|
|
5
|
-
|
|
6
|
-
---
|
|
7
|
-
|
|
8
|
-
## CREATE SEMANTIC VIEW 完整语法
|
|
9
|
-
|
|
10
|
-
```sql
|
|
11
|
-
CREATE SEMANTIC VIEW <视图名称>
|
|
12
|
-
TABLES (
|
|
13
|
-
<逻辑表定义> [ , ... ]
|
|
14
|
-
)
|
|
15
|
-
[ FILTERS (
|
|
16
|
-
<过滤器定义> [ , ... ]
|
|
17
|
-
) ]
|
|
18
|
-
DIMENSIONS (
|
|
19
|
-
<维度定义> [ , ... ]
|
|
20
|
-
)
|
|
21
|
-
METRICS (
|
|
22
|
-
<指标定义> [ , ... ]
|
|
23
|
-
)
|
|
24
|
-
[ COMMENT = '<视图说明>' ];
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
**约束**:`DIMENSIONS` 和 `METRICS` 至少包含其中一个。
|
|
28
|
-
|
|
29
|
-
---
|
|
30
|
-
|
|
31
|
-
## 逻辑表定义语法
|
|
32
|
-
|
|
33
|
-
```sql
|
|
34
|
-
<表别名> AS <schema>.<物理表名>
|
|
35
|
-
PRIMARY KEY ( <列名> [ , ... ] )
|
|
36
|
-
[ FOREIGN KEY ( <列名> ) REFERENCES <其他逻辑表别名> ]
|
|
37
|
-
[ WITH SYNONYMS ( '<同义词>' [ , ... ] ) ]
|
|
38
|
-
[ COMMENT = '<说明>' ]
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
| 参数 | 说明 |
|
|
42
|
-
|---|---|
|
|
43
|
-
| `<表别名> AS <schema>.<物理表>` | 为物理表指定逻辑别名,后续维度/指标/外键均用此别名引用 |
|
|
44
|
-
| `PRIMARY KEY` | 主键列,用于确定表间关系类型(一对多/一对一) |
|
|
45
|
-
| `FOREIGN KEY ... REFERENCES` | 外键关系,引擎据此自动处理 JOIN;引用目标必须是逻辑表别名 |
|
|
46
|
-
| `WITH SYNONYMS` | 逻辑表同义词,增强可发现性 |
|
|
47
|
-
|
|
48
|
-
**注意**:被外键引用的表必须在 TABLES 子句中先定义。
|
|
49
|
-
|
|
50
|
-
---
|
|
51
|
-
|
|
52
|
-
## 过滤器定义语法
|
|
53
|
-
|
|
54
|
-
```sql
|
|
55
|
-
<逻辑表别名>.<过滤器名> AS <布尔表达式>
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
示例:
|
|
59
|
-
```sql
|
|
60
|
-
FILTERS (
|
|
61
|
-
customers.is_building AS customers.c_mktsegment = 'BUILDING',
|
|
62
|
-
orders.is_open AS orders.o_orderstatus = 'O'
|
|
63
|
-
)
|
|
64
|
-
```
|
|
65
|
-
|
|
66
|
-
**重要**:FILTERS 是面向 AI/元数据层的语义注解,**不能**作为 `semantic_view()` 函数参数直接传入。若要在查询中过滤,需将对应列定义为 DIMENSION,再用外层 WHERE 子句。
|
|
67
|
-
|
|
68
|
-
---
|
|
69
|
-
|
|
70
|
-
## 维度定义语法
|
|
71
|
-
|
|
72
|
-
```sql
|
|
73
|
-
{ <逻辑表别名>.<维度名> | <维度名> } AS <表达式>
|
|
74
|
-
[ WITH SYNONYMS = ( '<同义词>' [ , ... ] ) ]
|
|
75
|
-
[ is_unique = { true | false } ]
|
|
76
|
-
[ is_time = { true | false } ]
|
|
77
|
-
[ enum_values = [ <值1>, <值2>, ... ] ]
|
|
78
|
-
[ COMMENT = '<说明>' ]
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
| 参数 | 说明 |
|
|
82
|
-
|---|---|
|
|
83
|
-
| `AS <表达式>` | 可以是列名,也可以是计算表达式(如 `YEAR(o_orderdate)`) |
|
|
84
|
-
| `WITH SYNONYMS` | 维度同义词,用户可用不同业务术语引用同一维度 |
|
|
85
|
-
| `is_unique = true` | 标识该维度值唯一(如客户名称),帮助引擎优化 |
|
|
86
|
-
| `is_time = true` | 标识为时间类型维度(如订单日期) |
|
|
87
|
-
| `enum_values` | 限定允许的枚举值,提升查询准确性 |
|
|
88
|
-
|
|
89
|
-
---
|
|
90
|
-
|
|
91
|
-
## 指标定义语法
|
|
92
|
-
|
|
93
|
-
```sql
|
|
94
|
-
<逻辑表别名>.<指标名> AS <聚合表达式>
|
|
95
|
-
[ COMMENT = '<说明>' ]
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
支持的聚合函数:`COUNT`、`AVG`、`SUM`、`MIN`、`MAX`
|
|
99
|
-
|
|
100
|
-
示例:
|
|
101
|
-
```sql
|
|
102
|
-
METRICS (
|
|
103
|
-
orders.total_revenue AS SUM(o_totalprice)
|
|
104
|
-
COMMENT = '总收入',
|
|
105
|
-
orders.avg_order_value AS AVG(o_totalprice)
|
|
106
|
-
COMMENT = '平均订单金额',
|
|
107
|
-
customers.customer_count AS COUNT(c_custkey)
|
|
108
|
-
COMMENT = '客户总数'
|
|
109
|
-
)
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
---
|
|
113
|
-
|
|
114
|
-
## semantic_view() 查询函数语法
|
|
115
|
-
|
|
116
|
-
```sql
|
|
117
|
-
SELECT *
|
|
118
|
-
FROM semantic_view(
|
|
119
|
-
<视图名称>,
|
|
120
|
-
DIMENSIONS <维度名> [ , DIMENSIONS <维度名> ... ],
|
|
121
|
-
METRICS <指标名> [ , METRICS <指标名> ... ]
|
|
122
|
-
)
|
|
123
|
-
[ WHERE <过滤条件> ];
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
- 维度名可用限定名(`表别名.维度名`)或短名(名称唯一时)
|
|
127
|
-
- 结果自动按指定维度分组,无需写 GROUP BY
|
|
128
|
-
- WHERE 子句中的列名使用短名(不含表别名前缀)
|
|
129
|
-
|
|
130
|
-
---
|
|
131
|
-
|
|
132
|
-
## 管理命令
|
|
133
|
-
|
|
134
|
-
| 命令 | 说明 |
|
|
135
|
-
|---|---|
|
|
136
|
-
| `CREATE SEMANTIC VIEW` | 创建语义视图 |
|
|
137
|
-
| `DROP SEMANTIC VIEW IF EXISTS <名称>` | 删除语义视图 |
|
|
138
|
-
| `SHOW SEMANTIC VIEWS` | 列出当前 Schema 所有语义视图 |
|
|
139
|
-
| `SHOW SEMANTIC VIEWS IN <schema>` | 列出指定 Schema 的语义视图 |
|
|
140
|
-
| `DESC EXTENDED <名称>` | 查看详细定义(逻辑表/维度/指标/外键/索引) |
|
|
141
|
-
|
|
142
|
-
---
|
|
143
|
-
|
|
144
|
-
## 最佳实践
|
|
145
|
-
|
|
146
|
-
```sql
|
|
147
|
-
-- 1. 幂等创建(始终先删再建)
|
|
148
|
-
DROP SEMANTIC VIEW IF EXISTS my_view;
|
|
149
|
-
CREATE SEMANTIC VIEW my_view ...;
|
|
150
|
-
|
|
151
|
-
-- 2. 使用有意义的业务术语命名
|
|
152
|
-
-- 好:customer_name, total_revenue, order_date
|
|
153
|
-
-- 差:c_name, sum_totalprice, o_orderdate
|
|
154
|
-
|
|
155
|
-
-- 3. 合理设置维度元数据
|
|
156
|
-
-- is_time=true 用于日期/时间维度
|
|
157
|
-
-- is_unique=true 用于主键类维度(如客户ID、订单号)
|
|
158
|
-
-- enum_values 用于状态类维度(如订单状态)
|
|
159
|
-
|
|
160
|
-
-- 4. 计算维度示例
|
|
161
|
-
DIMENSIONS (
|
|
162
|
-
orders.order_year AS YEAR(o_orderdate) -- 从日期提取年份
|
|
163
|
-
COMMENT = '下单年份',
|
|
164
|
-
orders.order_month AS MONTH(o_orderdate) -- 从日期提取月份
|
|
165
|
-
COMMENT = '下单月份'
|
|
166
|
-
)
|
|
167
|
-
```
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: clickzetta-spark-flink-connector
|
|
3
|
-
description: |
|
|
4
|
-
使用 Spark Connector 或 Flink Write Connector 将数据写入 ClickZetta Lakehouse。
|
|
5
|
-
覆盖 Spark DataFrame 读写配置(Maven 依赖、连接参数、read/write 代码)、
|
|
6
|
-
Flink Table API 写入(CDC 模式 igs-dynamic-table、仅追加模式 igs-dynamic-table-append-only)、
|
|
7
|
-
checkpoint 配置、buffer/flush 调优,以及主键表限制等关键约束。
|
|
8
|
-
当用户说"Spark Connector"、"Flink Connector"、"Spark 写入 Lakehouse"、
|
|
9
|
-
"Flink 写入 Lakehouse"、"spark-clickzetta"、"igs-flink-connector"、
|
|
10
|
-
"Spark DataFrame 写入"、"Flink CDC 写入"、"Flink sink"、
|
|
11
|
-
"spark.read.format clickzetta"时触发。
|
|
12
|
-
Keywords: Spark, Flink, DataFrame, connector, read, write, CDC, igs-dynamic-table
|
|
13
|
-
---
|
|
14
|
-
|
|
15
|
-
# ClickZetta Spark & Flink Connector
|
|
16
|
-
|
|
17
|
-
阅读 [references/spark.md](references/spark.md) 了解 Spark Connector,[references/flink.md](references/flink.md) 了解 Flink Write Connector。
|
|
18
|
-
|
|
19
|
-
---
|
|
20
|
-
|
|
21
|
-
## 关键约束(必读)
|
|
22
|
-
|
|
23
|
-
| 约束 | Spark Connector | Flink Connector |
|
|
24
|
-
|---|---|---|
|
|
25
|
-
| 主键表写入 | ❌ 不支持 | ✅ 支持(igs-dynamic-table 模式) |
|
|
26
|
-
| 部分字段写入 | ❌ 必须写全部字段 | ✅ 支持 |
|
|
27
|
-
| CDC(UPDATE/DELETE) | ❌ 仅 append | ✅ igs-dynamic-table 模式支持 |
|
|
28
|
-
| Spark 版本 | 3.4.0+ | — |
|
|
29
|
-
| Flink 版本 | — | 1.15.2+ |
|
|
30
|
-
|
|
31
|
-
---
|
|
32
|
-
|
|
33
|
-
## Spark Connector 快速示例
|
|
34
|
-
|
|
35
|
-
```scala
|
|
36
|
-
// 写入
|
|
37
|
-
df.write.format("clickzetta")
|
|
38
|
-
.option("endpoint", "your_instance.cn-shanghai-alicloud.api.clickzetta.com")
|
|
39
|
-
.option("username", sys.env("CZ_USERNAME"))
|
|
40
|
-
.option("password", sys.env("CZ_PASSWORD"))
|
|
41
|
-
.option("workspace", "your_workspace")
|
|
42
|
-
.option("virtualCluster", "default_ap")
|
|
43
|
-
.option("schema", "public")
|
|
44
|
-
.option("table", "orders")
|
|
45
|
-
.mode("append")
|
|
46
|
-
.save()
|
|
47
|
-
|
|
48
|
-
// 读取
|
|
49
|
-
val df = spark.read.format("clickzetta")
|
|
50
|
-
.option("endpoint", "your_instance.cn-shanghai-alicloud.api.clickzetta.com")
|
|
51
|
-
.option("username", sys.env("CZ_USERNAME"))
|
|
52
|
-
.option("password", sys.env("CZ_PASSWORD"))
|
|
53
|
-
.option("workspace", "your_workspace")
|
|
54
|
-
.option("virtualCluster", "default_ap")
|
|
55
|
-
.option("schema", "public")
|
|
56
|
-
.option("table", "orders")
|
|
57
|
-
.load()
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
---
|
|
61
|
-
|
|
62
|
-
## Flink Connector 快速示例
|
|
63
|
-
|
|
64
|
-
```sql
|
|
65
|
-
-- CDC 模式(支持 INSERT/UPDATE/DELETE,目标表需有主键)
|
|
66
|
-
CREATE TABLE lakehouse_sink (
|
|
67
|
-
order_id INT,
|
|
68
|
-
status STRING,
|
|
69
|
-
amount DOUBLE,
|
|
70
|
-
PRIMARY KEY (order_id) NOT ENFORCED
|
|
71
|
-
) WITH (
|
|
72
|
-
'connector' = 'igs-dynamic-table',
|
|
73
|
-
'curl' = 'jdbc:clickzetta://your_instance.cn-shanghai-alicloud.api.clickzetta.com/default?username=user&password=***&schema=public',
|
|
74
|
-
'schema-name' = 'public',
|
|
75
|
-
'table-name' = 'orders',
|
|
76
|
-
'sink.parallelism' = '1',
|
|
77
|
-
'properties' = 'authentication:true'
|
|
78
|
-
);
|
|
79
|
-
|
|
80
|
-
INSERT INTO lakehouse_sink SELECT order_id, status, amount FROM source_table;
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
---
|
|
84
|
-
|
|
85
|
-
## 选择指南
|
|
86
|
-
|
|
87
|
-
| 场景 | 推荐方案 |
|
|
88
|
-
|---|---|
|
|
89
|
-
| Spark ETL 批量写入(无主键表) | Spark Connector |
|
|
90
|
-
| Flink 实时流写入(无主键表) | Flink igs-dynamic-table-append-only |
|
|
91
|
-
| Flink CDC 同步(有主键表,含 UPDATE/DELETE) | Flink igs-dynamic-table |
|
|
92
|
-
| 高频实时写入(Java 应用) | Java SDK RealtimeStream |
|
|
@@ -1,147 +0,0 @@
|
|
|
1
|
-
# Flink Write Connector 详细参考
|
|
2
|
-
|
|
3
|
-
## Maven 依赖
|
|
4
|
-
|
|
5
|
-
```xml
|
|
6
|
-
<dependency>
|
|
7
|
-
<groupId>com.clickzetta</groupId>
|
|
8
|
-
<artifactId>igs-flink-connector-1.15</artifactId> <!-- 按 Flink 版本替换 -->
|
|
9
|
-
<version>联系 ClickZetta 支持获取版本号</version>
|
|
10
|
-
</dependency>
|
|
11
|
-
<!-- Flink 核心(provided) -->
|
|
12
|
-
<dependency>
|
|
13
|
-
<groupId>org.apache.flink</groupId>
|
|
14
|
-
<artifactId>flink-streaming-java</artifactId>
|
|
15
|
-
<version>1.15.2</version>
|
|
16
|
-
<scope>provided</scope>
|
|
17
|
-
</dependency>
|
|
18
|
-
<dependency>
|
|
19
|
-
<groupId>org.apache.flink</groupId>
|
|
20
|
-
<artifactId>flink-table-api-java-bridge</artifactId>
|
|
21
|
-
<version>1.15.2</version>
|
|
22
|
-
<scope>provided</scope>
|
|
23
|
-
</dependency>
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
## 两种写入模式
|
|
27
|
-
|
|
28
|
-
### 模式 1:igs-dynamic-table(CDC,支持主键表)
|
|
29
|
-
|
|
30
|
-
```sql
|
|
31
|
-
-- 目标表必须有主键
|
|
32
|
-
CREATE TABLE lakehouse_orders_sink (
|
|
33
|
-
order_id INT,
|
|
34
|
-
customer STRING,
|
|
35
|
-
amount DOUBLE,
|
|
36
|
-
status STRING,
|
|
37
|
-
updated_at TIMESTAMP(3),
|
|
38
|
-
PRIMARY KEY (order_id) NOT ENFORCED
|
|
39
|
-
) WITH (
|
|
40
|
-
'connector' = 'igs-dynamic-table',
|
|
41
|
-
'curl' = 'jdbc:clickzetta://your_instance.cn-shanghai-alicloud.api.clickzetta.com/default?username=user&password=***&schema=public&virtualcluster=default_ap',
|
|
42
|
-
'schema-name' = 'public',
|
|
43
|
-
'table-name' = 'orders',
|
|
44
|
-
'sink.parallelism' = '1', -- 主键表必须为 1
|
|
45
|
-
'properties' = 'authentication:true'
|
|
46
|
-
);
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
### 模式 2:igs-dynamic-table-append-only(仅追加,无主键表)
|
|
50
|
-
|
|
51
|
-
```sql
|
|
52
|
-
CREATE TABLE lakehouse_events_sink (
|
|
53
|
-
event_id BIGINT,
|
|
54
|
-
user_id BIGINT,
|
|
55
|
-
event_type STRING,
|
|
56
|
-
event_time TIMESTAMP(3)
|
|
57
|
-
) WITH (
|
|
58
|
-
'connector' = 'igs-dynamic-table-append-only',
|
|
59
|
-
'curl' = 'jdbc:clickzetta://your_instance.cn-shanghai-alicloud.api.clickzetta.com/default?username=user&password=***&schema=public&virtualcluster=default_ap',
|
|
60
|
-
'schema-name' = 'public',
|
|
61
|
-
'table-name' = 'events',
|
|
62
|
-
'sink.parallelism' = '4', -- 无主键表可提高并行度
|
|
63
|
-
'properties' = 'authentication:true'
|
|
64
|
-
);
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
## 完整 CDC 同步示例(MySQL → Lakehouse)
|
|
68
|
-
|
|
69
|
-
```sql
|
|
70
|
-
-- 1. MySQL CDC 源表
|
|
71
|
-
CREATE TABLE mysql_orders_source (
|
|
72
|
-
order_id INT,
|
|
73
|
-
customer STRING,
|
|
74
|
-
amount DOUBLE,
|
|
75
|
-
status STRING,
|
|
76
|
-
updated_at TIMESTAMP(3),
|
|
77
|
-
PRIMARY KEY (order_id) NOT ENFORCED
|
|
78
|
-
) WITH (
|
|
79
|
-
'connector' = 'mysql-cdc',
|
|
80
|
-
'hostname' = 'mysql-host',
|
|
81
|
-
'port' = '3306',
|
|
82
|
-
'username' = 'cdc_user',
|
|
83
|
-
'password' = 'cdc_password',
|
|
84
|
-
'database-name' = 'orders_db',
|
|
85
|
-
'table-name' = 'orders'
|
|
86
|
-
);
|
|
87
|
-
|
|
88
|
-
-- 2. Lakehouse Sink(CDC 模式)
|
|
89
|
-
CREATE TABLE lakehouse_orders_sink (
|
|
90
|
-
order_id INT,
|
|
91
|
-
customer STRING,
|
|
92
|
-
amount DOUBLE,
|
|
93
|
-
status STRING,
|
|
94
|
-
updated_at TIMESTAMP(3),
|
|
95
|
-
PRIMARY KEY (order_id) NOT ENFORCED
|
|
96
|
-
) WITH (
|
|
97
|
-
'connector' = 'igs-dynamic-table',
|
|
98
|
-
'curl' = 'jdbc:clickzetta://...',
|
|
99
|
-
'schema-name' = 'public',
|
|
100
|
-
'table-name' = 'orders',
|
|
101
|
-
'sink.parallelism' = '1',
|
|
102
|
-
'properties' = 'authentication:true'
|
|
103
|
-
);
|
|
104
|
-
|
|
105
|
-
-- 3. 同步
|
|
106
|
-
INSERT INTO lakehouse_orders_sink SELECT * FROM mysql_orders_source;
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
## Buffer 与 Flush 调优
|
|
110
|
-
|
|
111
|
-
```sql
|
|
112
|
-
-- 在 WITH 子句中添加调优参数
|
|
113
|
-
'mutation.buffer.lines.num' = '500' -- 每批缓冲行数(默认 100)
|
|
114
|
-
'mutation.buffer.space' = '10MB' -- 缓冲区大小(默认 5MB)
|
|
115
|
-
'mutation.buffer.max.num' = '8' -- 并发缓冲区数(默认 5)
|
|
116
|
-
'mutation.flush.interval' = '5000' -- flush 间隔毫秒(默认 10000)
|
|
117
|
-
'flush.mode' = 'AUTO_FLUSH_BACKGROUND' -- 异步 flush(默认)
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
## Checkpoint 配置(Java)
|
|
121
|
-
|
|
122
|
-
```java
|
|
123
|
-
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
|
124
|
-
|
|
125
|
-
// 生产环境必须开启 checkpoint
|
|
126
|
-
env.enableCheckpointing(60000); // 每 60 秒一次
|
|
127
|
-
env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
|
|
128
|
-
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
|
|
129
|
-
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(30000);
|
|
130
|
-
env.getCheckpointConfig().setCheckpointTimeout(120000);
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
## 私有网络访问
|
|
134
|
-
|
|
135
|
-
```sql
|
|
136
|
-
-- 内网访问(VPC 内部)
|
|
137
|
-
'properties' = 'authentication:true,isInternal:true,isDirect:false'
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
## 常见问题
|
|
141
|
-
|
|
142
|
-
| 问题 | 原因 | 解决方案 |
|
|
143
|
-
|---|---|---|
|
|
144
|
-
| 写入主键表数据不更新 | 使用了 append-only 模式 | 改用 `igs-dynamic-table` 模式 |
|
|
145
|
-
| 并行度 > 1 时数据乱序 | 主键表要求顺序写入 | 主键表 `sink.parallelism` 必须设为 `1` |
|
|
146
|
-
| checkpoint 失败 | 未配置 checkpoint 或超时 | 增大 `setCheckpointTimeout`,检查网络 |
|
|
147
|
-
| 连接超时 | 网络不通或认证失败 | 检查 `curl` 中的 username/password,确认 VPC 配置 |
|
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
# Spark Connector 详细参考
|
|
2
|
-
|
|
3
|
-
## Maven 依赖
|
|
4
|
-
|
|
5
|
-
```xml
|
|
6
|
-
<dependencies>
|
|
7
|
-
<dependency>
|
|
8
|
-
<groupId>org.apache.spark</groupId>
|
|
9
|
-
<artifactId>spark-sql_2.12</artifactId>
|
|
10
|
-
<version>3.4.0</version>
|
|
11
|
-
<scope>provided</scope>
|
|
12
|
-
</dependency>
|
|
13
|
-
<dependency>
|
|
14
|
-
<groupId>com.clickzetta</groupId>
|
|
15
|
-
<artifactId>spark-clickzetta</artifactId>
|
|
16
|
-
<version>1.0.0</version>
|
|
17
|
-
</dependency>
|
|
18
|
-
</dependencies>
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
> ⚠️ `spark-clickzetta` JAR 需从 ClickZetta 官方下载,不在 Maven Central。联系 ClickZetta 支持获取。
|
|
22
|
-
|
|
23
|
-
## 连接参数
|
|
24
|
-
|
|
25
|
-
| 参数 | 必填 | 说明 |
|
|
26
|
-
|---|---|---|
|
|
27
|
-
| `endpoint` | ✅ | 如 `your_instance.cn-shanghai-alicloud.api.clickzetta.com` |
|
|
28
|
-
| `username` | ✅ | 用户名 |
|
|
29
|
-
| `password` | ✅ | 密码 |
|
|
30
|
-
| `workspace` | ✅ | 工作空间 |
|
|
31
|
-
| `virtualCluster` | ✅ | 虚拟集群,默认 `default_ap` |
|
|
32
|
-
| `schema` | ✅ | Schema 名称 |
|
|
33
|
-
| `table` | ✅ | 目标表名 |
|
|
34
|
-
|
|
35
|
-
## 完整 Scala 示例
|
|
36
|
-
|
|
37
|
-
```scala
|
|
38
|
-
import org.apache.spark.sql.SparkSession
|
|
39
|
-
|
|
40
|
-
object SparkToLakehouse {
|
|
41
|
-
def main(args: Array[String]): Unit = {
|
|
42
|
-
val spark = SparkSession.builder()
|
|
43
|
-
.appName("SparkToLakehouse")
|
|
44
|
-
.getOrCreate()
|
|
45
|
-
|
|
46
|
-
val endpoint = sys.env("CZ_ENDPOINT")
|
|
47
|
-
val username = sys.env("CZ_USERNAME")
|
|
48
|
-
val password = sys.env("CZ_PASSWORD")
|
|
49
|
-
val workspace = sys.env("CZ_WORKSPACE")
|
|
50
|
-
|
|
51
|
-
// 读取
|
|
52
|
-
val df = spark.read.format("clickzetta")
|
|
53
|
-
.option("endpoint", endpoint)
|
|
54
|
-
.option("username", username)
|
|
55
|
-
.option("password", password)
|
|
56
|
-
.option("workspace", workspace)
|
|
57
|
-
.option("virtualCluster", "default_ap")
|
|
58
|
-
.option("schema", "silver")
|
|
59
|
-
.option("table", "orders_cleaned")
|
|
60
|
-
.load()
|
|
61
|
-
|
|
62
|
-
// 转换
|
|
63
|
-
import org.apache.spark.sql.functions._
|
|
64
|
-
val result = df
|
|
65
|
-
.filter(col("amount") > 0)
|
|
66
|
-
.groupBy("region")
|
|
67
|
-
.agg(sum("amount").as("total_revenue"), count("*").as("order_count"))
|
|
68
|
-
|
|
69
|
-
// 写入(必须写全部字段,不支持主键表)
|
|
70
|
-
result.write.format("clickzetta")
|
|
71
|
-
.option("endpoint", endpoint)
|
|
72
|
-
.option("username", username)
|
|
73
|
-
.option("password", password)
|
|
74
|
-
.option("workspace", workspace)
|
|
75
|
-
.option("virtualCluster", "default_ap")
|
|
76
|
-
.option("schema", "gold")
|
|
77
|
-
.option("table", "region_summary")
|
|
78
|
-
.mode("append")
|
|
79
|
-
.save()
|
|
80
|
-
|
|
81
|
-
spark.stop()
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
## Python(PySpark)示例
|
|
87
|
-
|
|
88
|
-
```python
|
|
89
|
-
from pyspark.sql import SparkSession
|
|
90
|
-
import os
|
|
91
|
-
|
|
92
|
-
spark = SparkSession.builder.appName("PySparkToLakehouse").getOrCreate()
|
|
93
|
-
|
|
94
|
-
options = {
|
|
95
|
-
"endpoint": os.environ["CZ_ENDPOINT"],
|
|
96
|
-
"username": os.environ["CZ_USERNAME"],
|
|
97
|
-
"password": os.environ["CZ_PASSWORD"],
|
|
98
|
-
"workspace": os.environ["CZ_WORKSPACE"],
|
|
99
|
-
"virtualCluster": "default_ap",
|
|
100
|
-
"schema": "public",
|
|
101
|
-
"table": "orders",
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
# 读取
|
|
105
|
-
df = spark.read.format("clickzetta").options(**options).load()
|
|
106
|
-
df.show(5)
|
|
107
|
-
|
|
108
|
-
# 写入
|
|
109
|
-
df.write.format("clickzetta").options(**options).mode("append").save()
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
## 类型映射
|
|
113
|
-
|
|
114
|
-
| Spark 类型 | Lakehouse 类型 |
|
|
115
|
-
|---|---|
|
|
116
|
-
| BooleanType | BOOLEAN |
|
|
117
|
-
| IntegerType | INT32 |
|
|
118
|
-
| LongType | INT64 |
|
|
119
|
-
| FloatType | FLOAT32 |
|
|
120
|
-
| DoubleType | FLOAT64 |
|
|
121
|
-
| StringType | STRING |
|
|
122
|
-
| TimestampType | TIMESTAMP |
|
|
123
|
-
| DateType | DATE |
|
|
124
|
-
| ArrayType | ARRAY |
|
|
125
|
-
| MapType | MAP |
|
|
126
|
-
| StructType | STRUCT |
|
|
127
|
-
|
|
128
|
-
## 限制
|
|
129
|
-
|
|
130
|
-
- **不支持主键表写入**:目标表不能有主键,否则报错
|
|
131
|
-
- **必须写全部字段**:DataFrame schema 必须与目标表完全匹配,不支持部分字段写入
|
|
132
|
-
- **仅支持 append 模式**:不支持 overwrite(会报错)
|