@clickzetta/cz-cli-linux-x64 0.3.2 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/bin/cz-cli +0 -0
  2. package/package.json +1 -1
  3. package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
  4. package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
  5. package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
  6. package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
  7. package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
  8. package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
  9. package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
  10. package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
  11. package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
  12. package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
  13. package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
  14. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
  15. package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
  16. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
  17. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -450
  18. package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
  19. package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
  20. package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
  21. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
  22. package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
  23. package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
  24. package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
  25. package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
  26. package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
  27. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
  28. package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
  29. package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
  30. package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
  31. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
  32. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
  33. package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
  34. package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
  35. package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
  36. package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
  37. package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
  38. package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -86
  39. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
  40. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
  41. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
  42. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
  43. package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
  44. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  45. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
  46. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
  47. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
  48. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
  49. package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
  50. package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
  51. package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
  52. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
  53. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -117
  54. package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
  55. package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
  56. package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
  57. package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
  58. package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
  59. package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
  60. package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
  61. package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
  62. package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
  63. package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
  64. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
  65. package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
  66. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -531
  67. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -186
  68. package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
  69. package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
  70. package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
  71. package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
  72. package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
  73. package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
  74. package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
  75. package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
  76. package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
  77. package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
  78. package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
  79. package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
  80. package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
  81. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -402
  82. package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
  83. package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
  84. package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
  85. package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
  86. package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
  87. package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
  88. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
  89. package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
  90. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
  91. package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
  92. package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
  93. package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
  94. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -353
  95. package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
  96. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -173
  97. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
  98. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -160
  99. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -123
  100. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
  101. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  102. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  103. package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
  104. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  105. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  106. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
  107. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  108. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  109. package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
  110. package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
  111. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -155
  112. package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
  113. package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
  114. package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
  115. package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -249
  116. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -194
  117. package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
  118. package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
@@ -1,167 +0,0 @@
1
- # 语义视图完整语法参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/semantic_view
4
- > 功能状态:邀测(1.3 版本起)
5
-
6
- ---
7
-
8
- ## CREATE SEMANTIC VIEW 完整语法
9
-
10
- ```sql
11
- CREATE SEMANTIC VIEW <视图名称>
12
- TABLES (
13
- <逻辑表定义> [ , ... ]
14
- )
15
- [ FILTERS (
16
- <过滤器定义> [ , ... ]
17
- ) ]
18
- DIMENSIONS (
19
- <维度定义> [ , ... ]
20
- )
21
- METRICS (
22
- <指标定义> [ , ... ]
23
- )
24
- [ COMMENT = '<视图说明>' ];
25
- ```
26
-
27
- **约束**:`DIMENSIONS` 和 `METRICS` 至少包含其中一个。
28
-
29
- ---
30
-
31
- ## 逻辑表定义语法
32
-
33
- ```sql
34
- <表别名> AS <schema>.<物理表名>
35
- PRIMARY KEY ( <列名> [ , ... ] )
36
- [ FOREIGN KEY ( <列名> ) REFERENCES <其他逻辑表别名> ]
37
- [ WITH SYNONYMS ( '<同义词>' [ , ... ] ) ]
38
- [ COMMENT = '<说明>' ]
39
- ```
40
-
41
- | 参数 | 说明 |
42
- |---|---|
43
- | `<表别名> AS <schema>.<物理表>` | 为物理表指定逻辑别名,后续维度/指标/外键均用此别名引用 |
44
- | `PRIMARY KEY` | 主键列,用于确定表间关系类型(一对多/一对一) |
45
- | `FOREIGN KEY ... REFERENCES` | 外键关系,引擎据此自动处理 JOIN;引用目标必须是逻辑表别名 |
46
- | `WITH SYNONYMS` | 逻辑表同义词,增强可发现性 |
47
-
48
- **注意**:被外键引用的表必须在 TABLES 子句中先定义。
49
-
50
- ---
51
-
52
- ## 过滤器定义语法
53
-
54
- ```sql
55
- <逻辑表别名>.<过滤器名> AS <布尔表达式>
56
- ```
57
-
58
- 示例:
59
- ```sql
60
- FILTERS (
61
- customers.is_building AS customers.c_mktsegment = 'BUILDING',
62
- orders.is_open AS orders.o_orderstatus = 'O'
63
- )
64
- ```
65
-
66
- **重要**:FILTERS 是面向 AI/元数据层的语义注解,**不能**作为 `semantic_view()` 函数参数直接传入。若要在查询中过滤,需将对应列定义为 DIMENSION,再用外层 WHERE 子句。
67
-
68
- ---
69
-
70
- ## 维度定义语法
71
-
72
- ```sql
73
- { <逻辑表别名>.<维度名> | <维度名> } AS <表达式>
74
- [ WITH SYNONYMS = ( '<同义词>' [ , ... ] ) ]
75
- [ is_unique = { true | false } ]
76
- [ is_time = { true | false } ]
77
- [ enum_values = [ <值1>, <值2>, ... ] ]
78
- [ COMMENT = '<说明>' ]
79
- ```
80
-
81
- | 参数 | 说明 |
82
- |---|---|
83
- | `AS <表达式>` | 可以是列名,也可以是计算表达式(如 `YEAR(o_orderdate)`) |
84
- | `WITH SYNONYMS` | 维度同义词,用户可用不同业务术语引用同一维度 |
85
- | `is_unique = true` | 标识该维度值唯一(如客户名称),帮助引擎优化 |
86
- | `is_time = true` | 标识为时间类型维度(如订单日期) |
87
- | `enum_values` | 限定允许的枚举值,提升查询准确性 |
88
-
89
- ---
90
-
91
- ## 指标定义语法
92
-
93
- ```sql
94
- <逻辑表别名>.<指标名> AS <聚合表达式>
95
- [ COMMENT = '<说明>' ]
96
- ```
97
-
98
- 支持的聚合函数:`COUNT`、`AVG`、`SUM`、`MIN`、`MAX`
99
-
100
- 示例:
101
- ```sql
102
- METRICS (
103
- orders.total_revenue AS SUM(o_totalprice)
104
- COMMENT = '总收入',
105
- orders.avg_order_value AS AVG(o_totalprice)
106
- COMMENT = '平均订单金额',
107
- customers.customer_count AS COUNT(c_custkey)
108
- COMMENT = '客户总数'
109
- )
110
- ```
111
-
112
- ---
113
-
114
- ## semantic_view() 查询函数语法
115
-
116
- ```sql
117
- SELECT *
118
- FROM semantic_view(
119
- <视图名称>,
120
- DIMENSIONS <维度名> [ , DIMENSIONS <维度名> ... ],
121
- METRICS <指标名> [ , METRICS <指标名> ... ]
122
- )
123
- [ WHERE <过滤条件> ];
124
- ```
125
-
126
- - 维度名可用限定名(`表别名.维度名`)或短名(名称唯一时)
127
- - 结果自动按指定维度分组,无需写 GROUP BY
128
- - WHERE 子句中的列名使用短名(不含表别名前缀)
129
-
130
- ---
131
-
132
- ## 管理命令
133
-
134
- | 命令 | 说明 |
135
- |---|---|
136
- | `CREATE SEMANTIC VIEW` | 创建语义视图 |
137
- | `DROP SEMANTIC VIEW IF EXISTS <名称>` | 删除语义视图 |
138
- | `SHOW SEMANTIC VIEWS` | 列出当前 Schema 所有语义视图 |
139
- | `SHOW SEMANTIC VIEWS IN <schema>` | 列出指定 Schema 的语义视图 |
140
- | `DESC EXTENDED <名称>` | 查看详细定义(逻辑表/维度/指标/外键/索引) |
141
-
142
- ---
143
-
144
- ## 最佳实践
145
-
146
- ```sql
147
- -- 1. 幂等创建(始终先删再建)
148
- DROP SEMANTIC VIEW IF EXISTS my_view;
149
- CREATE SEMANTIC VIEW my_view ...;
150
-
151
- -- 2. 使用有意义的业务术语命名
152
- -- 好:customer_name, total_revenue, order_date
153
- -- 差:c_name, sum_totalprice, o_orderdate
154
-
155
- -- 3. 合理设置维度元数据
156
- -- is_time=true 用于日期/时间维度
157
- -- is_unique=true 用于主键类维度(如客户ID、订单号)
158
- -- enum_values 用于状态类维度(如订单状态)
159
-
160
- -- 4. 计算维度示例
161
- DIMENSIONS (
162
- orders.order_year AS YEAR(o_orderdate) -- 从日期提取年份
163
- COMMENT = '下单年份',
164
- orders.order_month AS MONTH(o_orderdate) -- 从日期提取月份
165
- COMMENT = '下单月份'
166
- )
167
- ```
@@ -1,92 +0,0 @@
1
- ---
2
- name: clickzetta-spark-flink-connector
3
- description: |
4
- 使用 Spark Connector 或 Flink Write Connector 将数据写入 ClickZetta Lakehouse。
5
- 覆盖 Spark DataFrame 读写配置(Maven 依赖、连接参数、read/write 代码)、
6
- Flink Table API 写入(CDC 模式 igs-dynamic-table、仅追加模式 igs-dynamic-table-append-only)、
7
- checkpoint 配置、buffer/flush 调优,以及主键表限制等关键约束。
8
- 当用户说"Spark Connector"、"Flink Connector"、"Spark 写入 Lakehouse"、
9
- "Flink 写入 Lakehouse"、"spark-clickzetta"、"igs-flink-connector"、
10
- "Spark DataFrame 写入"、"Flink CDC 写入"、"Flink sink"、
11
- "spark.read.format clickzetta"时触发。
12
- Keywords: Spark, Flink, DataFrame, connector, read, write, CDC, igs-dynamic-table
13
- ---
14
-
15
- # ClickZetta Spark & Flink Connector
16
-
17
- 阅读 [references/spark.md](references/spark.md) 了解 Spark Connector,[references/flink.md](references/flink.md) 了解 Flink Write Connector。
18
-
19
- ---
20
-
21
- ## 关键约束(必读)
22
-
23
- | 约束 | Spark Connector | Flink Connector |
24
- |---|---|---|
25
- | 主键表写入 | ❌ 不支持 | ✅ 支持(igs-dynamic-table 模式) |
26
- | 部分字段写入 | ❌ 必须写全部字段 | ✅ 支持 |
27
- | CDC(UPDATE/DELETE) | ❌ 仅 append | ✅ igs-dynamic-table 模式支持 |
28
- | Spark 版本 | 3.4.0+ | — |
29
- | Flink 版本 | — | 1.15.2+ |
30
-
31
- ---
32
-
33
- ## Spark Connector 快速示例
34
-
35
- ```scala
36
- // 写入
37
- df.write.format("clickzetta")
38
- .option("endpoint", "your_instance.cn-shanghai-alicloud.api.clickzetta.com")
39
- .option("username", sys.env("CZ_USERNAME"))
40
- .option("password", sys.env("CZ_PASSWORD"))
41
- .option("workspace", "your_workspace")
42
- .option("virtualCluster", "default_ap")
43
- .option("schema", "public")
44
- .option("table", "orders")
45
- .mode("append")
46
- .save()
47
-
48
- // 读取
49
- val df = spark.read.format("clickzetta")
50
- .option("endpoint", "your_instance.cn-shanghai-alicloud.api.clickzetta.com")
51
- .option("username", sys.env("CZ_USERNAME"))
52
- .option("password", sys.env("CZ_PASSWORD"))
53
- .option("workspace", "your_workspace")
54
- .option("virtualCluster", "default_ap")
55
- .option("schema", "public")
56
- .option("table", "orders")
57
- .load()
58
- ```
59
-
60
- ---
61
-
62
- ## Flink Connector 快速示例
63
-
64
- ```sql
65
- -- CDC 模式(支持 INSERT/UPDATE/DELETE,目标表需有主键)
66
- CREATE TABLE lakehouse_sink (
67
- order_id INT,
68
- status STRING,
69
- amount DOUBLE,
70
- PRIMARY KEY (order_id) NOT ENFORCED
71
- ) WITH (
72
- 'connector' = 'igs-dynamic-table',
73
- 'curl' = 'jdbc:clickzetta://your_instance.cn-shanghai-alicloud.api.clickzetta.com/default?username=user&password=***&schema=public',
74
- 'schema-name' = 'public',
75
- 'table-name' = 'orders',
76
- 'sink.parallelism' = '1',
77
- 'properties' = 'authentication:true'
78
- );
79
-
80
- INSERT INTO lakehouse_sink SELECT order_id, status, amount FROM source_table;
81
- ```
82
-
83
- ---
84
-
85
- ## 选择指南
86
-
87
- | 场景 | 推荐方案 |
88
- |---|---|
89
- | Spark ETL 批量写入(无主键表) | Spark Connector |
90
- | Flink 实时流写入(无主键表) | Flink igs-dynamic-table-append-only |
91
- | Flink CDC 同步(有主键表,含 UPDATE/DELETE) | Flink igs-dynamic-table |
92
- | 高频实时写入(Java 应用) | Java SDK RealtimeStream |
@@ -1,147 +0,0 @@
1
- # Flink Write Connector 详细参考
2
-
3
- ## Maven 依赖
4
-
5
- ```xml
6
- <dependency>
7
- <groupId>com.clickzetta</groupId>
8
- <artifactId>igs-flink-connector-1.15</artifactId> <!-- 按 Flink 版本替换 -->
9
- <version>联系 ClickZetta 支持获取版本号</version>
10
- </dependency>
11
- <!-- Flink 核心(provided) -->
12
- <dependency>
13
- <groupId>org.apache.flink</groupId>
14
- <artifactId>flink-streaming-java</artifactId>
15
- <version>1.15.2</version>
16
- <scope>provided</scope>
17
- </dependency>
18
- <dependency>
19
- <groupId>org.apache.flink</groupId>
20
- <artifactId>flink-table-api-java-bridge</artifactId>
21
- <version>1.15.2</version>
22
- <scope>provided</scope>
23
- </dependency>
24
- ```
25
-
26
- ## 两种写入模式
27
-
28
- ### 模式 1:igs-dynamic-table(CDC,支持主键表)
29
-
30
- ```sql
31
- -- 目标表必须有主键
32
- CREATE TABLE lakehouse_orders_sink (
33
- order_id INT,
34
- customer STRING,
35
- amount DOUBLE,
36
- status STRING,
37
- updated_at TIMESTAMP(3),
38
- PRIMARY KEY (order_id) NOT ENFORCED
39
- ) WITH (
40
- 'connector' = 'igs-dynamic-table',
41
- 'curl' = 'jdbc:clickzetta://your_instance.cn-shanghai-alicloud.api.clickzetta.com/default?username=user&password=***&schema=public&virtualcluster=default_ap',
42
- 'schema-name' = 'public',
43
- 'table-name' = 'orders',
44
- 'sink.parallelism' = '1', -- 主键表必须为 1
45
- 'properties' = 'authentication:true'
46
- );
47
- ```
48
-
49
- ### 模式 2:igs-dynamic-table-append-only(仅追加,无主键表)
50
-
51
- ```sql
52
- CREATE TABLE lakehouse_events_sink (
53
- event_id BIGINT,
54
- user_id BIGINT,
55
- event_type STRING,
56
- event_time TIMESTAMP(3)
57
- ) WITH (
58
- 'connector' = 'igs-dynamic-table-append-only',
59
- 'curl' = 'jdbc:clickzetta://your_instance.cn-shanghai-alicloud.api.clickzetta.com/default?username=user&password=***&schema=public&virtualcluster=default_ap',
60
- 'schema-name' = 'public',
61
- 'table-name' = 'events',
62
- 'sink.parallelism' = '4', -- 无主键表可提高并行度
63
- 'properties' = 'authentication:true'
64
- );
65
- ```
66
-
67
- ## 完整 CDC 同步示例(MySQL → Lakehouse)
68
-
69
- ```sql
70
- -- 1. MySQL CDC 源表
71
- CREATE TABLE mysql_orders_source (
72
- order_id INT,
73
- customer STRING,
74
- amount DOUBLE,
75
- status STRING,
76
- updated_at TIMESTAMP(3),
77
- PRIMARY KEY (order_id) NOT ENFORCED
78
- ) WITH (
79
- 'connector' = 'mysql-cdc',
80
- 'hostname' = 'mysql-host',
81
- 'port' = '3306',
82
- 'username' = 'cdc_user',
83
- 'password' = 'cdc_password',
84
- 'database-name' = 'orders_db',
85
- 'table-name' = 'orders'
86
- );
87
-
88
- -- 2. Lakehouse Sink(CDC 模式)
89
- CREATE TABLE lakehouse_orders_sink (
90
- order_id INT,
91
- customer STRING,
92
- amount DOUBLE,
93
- status STRING,
94
- updated_at TIMESTAMP(3),
95
- PRIMARY KEY (order_id) NOT ENFORCED
96
- ) WITH (
97
- 'connector' = 'igs-dynamic-table',
98
- 'curl' = 'jdbc:clickzetta://...',
99
- 'schema-name' = 'public',
100
- 'table-name' = 'orders',
101
- 'sink.parallelism' = '1',
102
- 'properties' = 'authentication:true'
103
- );
104
-
105
- -- 3. 同步
106
- INSERT INTO lakehouse_orders_sink SELECT * FROM mysql_orders_source;
107
- ```
108
-
109
- ## Buffer 与 Flush 调优
110
-
111
- ```sql
112
- -- 在 WITH 子句中添加调优参数
113
- 'mutation.buffer.lines.num' = '500' -- 每批缓冲行数(默认 100)
114
- 'mutation.buffer.space' = '10MB' -- 缓冲区大小(默认 5MB)
115
- 'mutation.buffer.max.num' = '8' -- 并发缓冲区数(默认 5)
116
- 'mutation.flush.interval' = '5000' -- flush 间隔毫秒(默认 10000)
117
- 'flush.mode' = 'AUTO_FLUSH_BACKGROUND' -- 异步 flush(默认)
118
- ```
119
-
120
- ## Checkpoint 配置(Java)
121
-
122
- ```java
123
- StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
124
-
125
- // 生产环境必须开启 checkpoint
126
- env.enableCheckpointing(60000); // 每 60 秒一次
127
- env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
128
- env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
129
- env.getCheckpointConfig().setMinPauseBetweenCheckpoints(30000);
130
- env.getCheckpointConfig().setCheckpointTimeout(120000);
131
- ```
132
-
133
- ## 私有网络访问
134
-
135
- ```sql
136
- -- 内网访问(VPC 内部)
137
- 'properties' = 'authentication:true,isInternal:true,isDirect:false'
138
- ```
139
-
140
- ## 常见问题
141
-
142
- | 问题 | 原因 | 解决方案 |
143
- |---|---|---|
144
- | 写入主键表数据不更新 | 使用了 append-only 模式 | 改用 `igs-dynamic-table` 模式 |
145
- | 并行度 > 1 时数据乱序 | 主键表要求顺序写入 | 主键表 `sink.parallelism` 必须设为 `1` |
146
- | checkpoint 失败 | 未配置 checkpoint 或超时 | 增大 `setCheckpointTimeout`,检查网络 |
147
- | 连接超时 | 网络不通或认证失败 | 检查 `curl` 中的 username/password,确认 VPC 配置 |
@@ -1,132 +0,0 @@
1
- # Spark Connector 详细参考
2
-
3
- ## Maven 依赖
4
-
5
- ```xml
6
- <dependencies>
7
- <dependency>
8
- <groupId>org.apache.spark</groupId>
9
- <artifactId>spark-sql_2.12</artifactId>
10
- <version>3.4.0</version>
11
- <scope>provided</scope>
12
- </dependency>
13
- <dependency>
14
- <groupId>com.clickzetta</groupId>
15
- <artifactId>spark-clickzetta</artifactId>
16
- <version>1.0.0</version>
17
- </dependency>
18
- </dependencies>
19
- ```
20
-
21
- > ⚠️ `spark-clickzetta` JAR 需从 ClickZetta 官方下载,不在 Maven Central。联系 ClickZetta 支持获取。
22
-
23
- ## 连接参数
24
-
25
- | 参数 | 必填 | 说明 |
26
- |---|---|---|
27
- | `endpoint` | ✅ | 如 `your_instance.cn-shanghai-alicloud.api.clickzetta.com` |
28
- | `username` | ✅ | 用户名 |
29
- | `password` | ✅ | 密码 |
30
- | `workspace` | ✅ | 工作空间 |
31
- | `virtualCluster` | ✅ | 虚拟集群,默认 `default_ap` |
32
- | `schema` | ✅ | Schema 名称 |
33
- | `table` | ✅ | 目标表名 |
34
-
35
- ## 完整 Scala 示例
36
-
37
- ```scala
38
- import org.apache.spark.sql.SparkSession
39
-
40
- object SparkToLakehouse {
41
- def main(args: Array[String]): Unit = {
42
- val spark = SparkSession.builder()
43
- .appName("SparkToLakehouse")
44
- .getOrCreate()
45
-
46
- val endpoint = sys.env("CZ_ENDPOINT")
47
- val username = sys.env("CZ_USERNAME")
48
- val password = sys.env("CZ_PASSWORD")
49
- val workspace = sys.env("CZ_WORKSPACE")
50
-
51
- // 读取
52
- val df = spark.read.format("clickzetta")
53
- .option("endpoint", endpoint)
54
- .option("username", username)
55
- .option("password", password)
56
- .option("workspace", workspace)
57
- .option("virtualCluster", "default_ap")
58
- .option("schema", "silver")
59
- .option("table", "orders_cleaned")
60
- .load()
61
-
62
- // 转换
63
- import org.apache.spark.sql.functions._
64
- val result = df
65
- .filter(col("amount") > 0)
66
- .groupBy("region")
67
- .agg(sum("amount").as("total_revenue"), count("*").as("order_count"))
68
-
69
- // 写入(必须写全部字段,不支持主键表)
70
- result.write.format("clickzetta")
71
- .option("endpoint", endpoint)
72
- .option("username", username)
73
- .option("password", password)
74
- .option("workspace", workspace)
75
- .option("virtualCluster", "default_ap")
76
- .option("schema", "gold")
77
- .option("table", "region_summary")
78
- .mode("append")
79
- .save()
80
-
81
- spark.stop()
82
- }
83
- }
84
- ```
85
-
86
- ## Python(PySpark)示例
87
-
88
- ```python
89
- from pyspark.sql import SparkSession
90
- import os
91
-
92
- spark = SparkSession.builder.appName("PySparkToLakehouse").getOrCreate()
93
-
94
- options = {
95
- "endpoint": os.environ["CZ_ENDPOINT"],
96
- "username": os.environ["CZ_USERNAME"],
97
- "password": os.environ["CZ_PASSWORD"],
98
- "workspace": os.environ["CZ_WORKSPACE"],
99
- "virtualCluster": "default_ap",
100
- "schema": "public",
101
- "table": "orders",
102
- }
103
-
104
- # 读取
105
- df = spark.read.format("clickzetta").options(**options).load()
106
- df.show(5)
107
-
108
- # 写入
109
- df.write.format("clickzetta").options(**options).mode("append").save()
110
- ```
111
-
112
- ## 类型映射
113
-
114
- | Spark 类型 | Lakehouse 类型 |
115
- |---|---|
116
- | BooleanType | BOOLEAN |
117
- | IntegerType | INT32 |
118
- | LongType | INT64 |
119
- | FloatType | FLOAT32 |
120
- | DoubleType | FLOAT64 |
121
- | StringType | STRING |
122
- | TimestampType | TIMESTAMP |
123
- | DateType | DATE |
124
- | ArrayType | ARRAY |
125
- | MapType | MAP |
126
- | StructType | STRUCT |
127
-
128
- ## 限制
129
-
130
- - **不支持主键表写入**:目标表不能有主键,否则报错
131
- - **必须写全部字段**:DataFrame schema 必须与目标表完全匹配,不支持部分字段写入
132
- - **仅支持 append 模式**:不支持 overwrite(会报错)