@clickzetta/cz-cli-linux-x64 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/bin/cz-cli +0 -0
  2. package/package.json +1 -1
  3. package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
  4. package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
  5. package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
  6. package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
  7. package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
  8. package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
  9. package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
  10. package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
  11. package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
  12. package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
  13. package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
  14. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
  15. package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
  16. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
  17. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -457
  18. package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
  19. package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
  20. package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
  21. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
  22. package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
  23. package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
  24. package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
  25. package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
  26. package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
  27. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
  28. package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
  29. package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
  30. package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
  31. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
  32. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
  33. package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
  34. package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
  35. package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
  36. package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
  37. package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
  38. package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -112
  39. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
  40. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
  41. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
  42. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
  43. package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
  44. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  45. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
  46. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
  47. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
  48. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
  49. package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
  50. package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
  51. package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
  52. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
  53. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -156
  54. package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
  55. package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
  56. package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
  57. package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
  58. package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
  59. package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
  60. package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
  61. package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
  62. package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
  63. package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
  64. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
  65. package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
  66. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -639
  67. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
  68. package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
  69. package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
  70. package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
  71. package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
  72. package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
  73. package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
  74. package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
  75. package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
  76. package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
  77. package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
  78. package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
  79. package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
  80. package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
  81. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -427
  82. package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
  83. package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
  84. package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
  85. package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
  86. package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
  87. package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
  88. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
  89. package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
  90. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
  91. package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
  92. package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
  93. package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
  94. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -379
  95. package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
  96. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
  97. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
  98. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
  99. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
  100. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
  101. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  102. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  103. package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
  104. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  105. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  106. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
  107. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  108. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  109. package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
  110. package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
  111. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
  112. package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
  113. package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
  114. package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
  115. package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
  116. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
  117. package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
  118. package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
@@ -1,324 +0,0 @@
1
- # Kafka Pipe SQL 语法参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/pipe-kafka 和 https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1
4
-
5
- > **⚠️ ClickZetta READ_KAFKA 使用位置参数(positional parameters)**
6
- > - ❌ 不支持 `=>` 命名参数语法(如 `KAFKA_BROKER => 'host:port'`)
7
- > - ❌ 不支持 `TABLE(READ_KAFKA(...))` 包装
8
- > - ✅ 正确:`FROM read_kafka('broker', 'topic', '', 'group', '', '', '', '', 'raw', 'raw', 0, MAP(...))`
9
-
10
- ## CREATE PIPE(READ_KAFKA 方式)
11
-
12
- ```sql
13
- CREATE [ OR REPLACE ] PIPE <pipe_name>
14
- VIRTUAL_CLUSTER = '<vcluster_name>'
15
- [ BATCH_INTERVAL_IN_SECONDS = '<seconds>' ]
16
- [ BATCH_SIZE_PER_KAFKA_PARTITION = '<count>' ]
17
- [ MAX_SKIP_BATCH_COUNT_ON_ERROR = '<count>' ]
18
- [ INITIAL_DELAY_IN_SECONDS = '<seconds>' ]
19
- [ RESET_KAFKA_GROUP_OFFSETS = '<offset_value>' ]
20
- [ COPY_JOB_HINT = '<json>' ]
21
- AS
22
- COPY INTO <target_table> FROM (
23
- SELECT <expr> [, ...]
24
- FROM read_kafka(
25
- '<bootstrap_servers>', -- 位置 1:Kafka 集群地址(必填)
26
- '<topic_name>', -- 位置 2:Topic 名称(必填)
27
- '', -- 位置 3:Topic pattern(保留,填空字符串)
28
- '<group_id>', -- 位置 4:消费者组 ID(必填)
29
- '', -- 位置 5:starting_offsets(Pipe 中留空)
30
- '', -- 位置 6:ending_offsets(Pipe 中留空)
31
- '', -- 位置 7:starting_timestamp(Pipe 中留空)
32
- '', -- 位置 8:ending_timestamp(Pipe 中留空)
33
- 'raw', -- 位置 9:key 格式(目前只支持 raw)
34
- 'raw', -- 位置 10:value 格式(目前只支持 raw)
35
- 0, -- 位置 11:max_errors
36
- MAP(<kafka_config>) -- 位置 12:Kafka 配置参数
37
- )
38
- );
39
- ```
40
-
41
- ### Pipe 参数说明
42
-
43
- | 参数 | 必填 | 默认值 | 说明 |
44
- |------|------|--------|------|
45
- | `VIRTUAL_CLUSTER` | 是 | — | 执行 Pipe 任务的计算集群 |
46
- | `BATCH_INTERVAL_IN_SECONDS` | 否 | 60 | 批处理间隔(秒),即数据新鲜度 |
47
- | `BATCH_SIZE_PER_KAFKA_PARTITION` | 否 | 500000 | 每个 Kafka 分区每批最大消息数 |
48
- | `MAX_SKIP_BATCH_COUNT_ON_ERROR` | 否 | 30 | 出错时跳过批次的最大重试次数 |
49
- | `INITIAL_DELAY_IN_SECONDS` | 否 | 0 | 首个作业调度延迟 |
50
- | `RESET_KAFKA_GROUP_OFFSETS` | 否 | — | 启动时消费位点(仅创建时生效) |
51
- | `COPY_JOB_HINT` | 否 | — | JSON 格式的作业参数 |
52
-
53
- ### RESET_KAFKA_GROUP_OFFSETS 可选值
54
-
55
- | 值 | 说明 |
56
- |----|------|
57
- | `'none'` | 无操作,使用 Kafka `auto.offset.reset`(默认 latest) |
58
- | `'valid'` | 检查当前位点是否过期,将过期分区重置到 earliest |
59
- | `'earliest'` | 重置到最早位点 |
60
- | `'latest'` | 重置到最新位点 |
61
- | `'<毫秒时间戳>'` | 重置到指定时间戳对应位点(如 `'1737789688000'`) |
62
-
63
- ### READ_KAFKA 参数(在 Pipe 中 vs 独立使用)
64
-
65
- | 特性 | 独立使用 read_kafka | 在 Pipe 中使用 |
66
- |------|-------------------|---------------|
67
- | 消费者组 | 临时,执行完即销毁 | 持久,保持消费位置 |
68
- | 位置管理 | 在 MAP 中设置 `kafka.auto.offset.reset` | Pipe 自动管理,位置参数**必须留空** |
69
- | 执行方式 | 一次性查询 | 持续调度执行 |
70
- | 默认起始位置 | latest(可在 MAP 中改为 earliest) | latest(由 RESET_KAFKA_GROUP_OFFSETS 控制) |
71
-
72
- ### MAP 配置参数
73
-
74
- | 参数 | 说明 |
75
- |------|------|
76
- | `kafka.security.protocol` | 安全协议:`PLAINTEXT` 或 `SASL_PLAINTEXT` |
77
- | `kafka.sasl.mechanism` | SASL 机制:`PLAIN` |
78
- | `kafka.sasl.username` | SASL 用户名 |
79
- | `kafka.sasl.password` | SASL 密码 |
80
- | `kafka.auto.offset.reset` | 独立探查时的起始位点(`earliest` / `latest`) |
81
- | `cz.kafka.fetch.retry.enable` | 启用 fetch 重试(`true`/`false`) |
82
- | `cz.kafka.fetch.retry.times` | 重试次数 |
83
- | `cz.kafka.fetch.retry.intervalMs` | 重试间隔(毫秒) |
84
-
85
- ### JSON 字段提取语法
86
-
87
- ```sql
88
- -- key 和 value 都是 binary 类型,需要先转换
89
- value::string -- 转为字符串
90
- parse_json(value::string) -- 解析为 JSON 对象
91
- parse_json(value::string)['field']::TYPE -- 提取顶层字段
92
- parse_json(value::string)['nested']['key']::TYPE -- 提取嵌套字段
93
-
94
- -- 推荐模式:在子查询中先 parse_json,外层直接用 j['field']
95
- SELECT j['order_id']::STRING, j['amount']::DECIMAL(10,2)
96
- FROM (
97
- SELECT parse_json(value::string) AS j
98
- FROM read_kafka(...)
99
- )
100
- ```
101
-
102
- ### 完整示例
103
-
104
- ```sql
105
- -- 无认证 Kafka Pipe
106
- CREATE PIPE kafka_orders_pipe
107
- VIRTUAL_CLUSTER = 'default'
108
- BATCH_INTERVAL_IN_SECONDS = '60'
109
- AS
110
- COPY INTO ods.orders FROM (
111
- SELECT
112
- j['order_id']::STRING AS order_id,
113
- j['user_id']::STRING AS user_id,
114
- j['amount']::DECIMAL(10,2) AS amount,
115
- CAST(`timestamp` AS TIMESTAMP) AS kafka_ts
116
- FROM (
117
- SELECT `timestamp`, parse_json(value::string) AS j
118
- FROM read_kafka(
119
- 'kafka.example.com:9092',
120
- 'orders',
121
- '',
122
- 'lakehouse_orders',
123
- '', '', '', '',
124
- 'raw', 'raw', 0,
125
- MAP('kafka.security.protocol', 'PLAINTEXT')
126
- )
127
- )
128
- );
129
-
130
- -- SASL 认证 + 指定时间点消费
131
- CREATE PIPE kafka_secure_pipe
132
- VIRTUAL_CLUSTER = 'pipe_vc'
133
- BATCH_INTERVAL_IN_SECONDS = '60'
134
- RESET_KAFKA_GROUP_OFFSETS = '1737789688000'
135
- AS
136
- COPY INTO ods.secure_events FROM (
137
- SELECT
138
- j['id']::STRING AS event_id,
139
- j['payload']::STRING AS payload,
140
- CAST(`timestamp` AS TIMESTAMP) AS kafka_ts
141
- FROM (
142
- SELECT `timestamp`, parse_json(value::string) AS j
143
- FROM read_kafka(
144
- 'kafka.example.com:9092',
145
- 'secure_events',
146
- '',
147
- 'cz_secure',
148
- '', '', '', '',
149
- 'raw', 'raw', 0,
150
- MAP(
151
- 'kafka.security.protocol', 'SASL_PLAINTEXT',
152
- 'kafka.sasl.mechanism', 'PLAIN',
153
- 'kafka.sasl.username', 'my_user',
154
- 'kafka.sasl.password', 'my_password'
155
- )
156
- )
157
- )
158
- );
159
- ```
160
-
161
- ---
162
-
163
- ## 独立探查(验证连接和数据格式)
164
-
165
- ```sql
166
- -- 无认证
167
- SELECT value::string
168
- FROM read_kafka(
169
- 'kafka.example.com:9092',
170
- 'orders',
171
- '',
172
- 'test_explore',
173
- '', '', '', '',
174
- 'raw', 'raw', 0,
175
- MAP('kafka.security.protocol', 'PLAINTEXT', 'kafka.auto.offset.reset', 'earliest')
176
- )
177
- LIMIT 10;
178
-
179
- -- SASL 认证
180
- SELECT value::string
181
- FROM read_kafka(
182
- 'kafka.example.com:9092',
183
- 'orders',
184
- '',
185
- 'test_explore',
186
- '', '', '', '',
187
- 'raw', 'raw', 0,
188
- MAP(
189
- 'kafka.security.protocol', 'SASL_PLAINTEXT',
190
- 'kafka.sasl.mechanism', 'PLAIN',
191
- 'kafka.sasl.username', 'my_user',
192
- 'kafka.sasl.password', 'my_password',
193
- 'kafka.auto.offset.reset', 'earliest'
194
- )
195
- )
196
- LIMIT 10;
197
- ```
198
-
199
- ---
200
-
201
- ## CREATE PIPE(Kafka 外部表 + Table Stream 方式)
202
-
203
- ### 步骤 1:创建 Kafka Storage Connection
204
-
205
- ```sql
206
- CREATE STORAGE CONNECTION IF NOT EXISTS <conn_name>
207
- TYPE KAFKA
208
- BOOTSTRAP_SERVERS = ['<host1>:<port1>', '<host2>:<port2>']
209
- SECURITY_PROTOCOL = 'PLAINTEXT';
210
- ```
211
-
212
- ### 步骤 2:创建 Kafka 外部表
213
-
214
- ```sql
215
- -- ⚠️ 必须显式指定列定义(不能省略)
216
- -- ⚠️ offset 是保留字,必须用反引号转义
217
- CREATE EXTERNAL TABLE <ext_table_name> (
218
- topic STRING,
219
- partition INT,
220
- `offset` BIGINT,
221
- `timestamp` TIMESTAMP,
222
- timestamp_type STRING,
223
- headers STRING,
224
- key BINARY,
225
- value BINARY
226
- )
227
- USING KAFKA
228
- OPTIONS (
229
- 'group_id' = '<consumer_group>',
230
- 'topics' = '<topic_name>',
231
- 'starting_offset' = '<earliest | latest>'
232
- )
233
- CONNECTION <conn_name>;
234
- ```
235
-
236
- > **注意**:
237
- > - 列定义是**必须的**,省略会报错 `failed to detect columns`
238
- > - `offset` 和 `timestamp` 是保留字,需要反引号转义
239
- > - 删除外部表用 `DROP TABLE`(不是 `DROP EXTERNAL TABLE`)
240
-
241
- ### 步骤 3:创建 Table Stream
242
-
243
- ```sql
244
- CREATE TABLE STREAM <stream_name>
245
- ON TABLE <ext_table_name>
246
- WITH PROPERTIES ('TABLE_STREAM_MODE' = 'APPEND_ONLY');
247
- ```
248
-
249
- ### 步骤 4:创建 Pipe
250
-
251
- ```sql
252
- CREATE PIPE <pipe_name>
253
- VIRTUAL_CLUSTER = '<vcluster_name>'
254
- BATCH_INTERVAL_IN_SECONDS = '60'
255
- AS
256
- COPY INTO <target_table>
257
- SELECT <expr> [, ...]
258
- FROM <stream_name>;
259
- ```
260
-
261
- ---
262
-
263
- ## ALTER PIPE
264
-
265
- ```sql
266
- -- 暂停
267
- ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = true;
268
-
269
- -- 恢复
270
- ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = false;
271
-
272
- -- 修改 VCluster
273
- ALTER PIPE <pipe_name> SET VIRTUAL_CLUSTER = 'new_vc';
274
-
275
- -- 修改 COPY_JOB_HINT
276
- ALTER PIPE <pipe_name> SET COPY_JOB_HINT = '{"cz.sql.split.kafka.strategy":"size","cz.mapper.kafka.message.size":"200000"}';
277
- ```
278
-
279
- > ⚠️ **ALTER PIPE 支持的属性**:
280
- > - ✅ `PIPE_EXECUTION_PAUSED`
281
- > - ✅ `VIRTUAL_CLUSTER`
282
- > - ✅ `COPY_JOB_HINT`
283
- > - ❌ `BATCH_INTERVAL_IN_SECONDS`(不支持,需删除重建)
284
- > - ❌ `BATCH_SIZE_PER_KAFKA_PARTITION`(不支持,需删除重建)
285
- >
286
- > 不支持修改 COPY/INSERT 语句逻辑,需删除 Pipe 后重建。
287
- > 修改 `COPY_JOB_HINT` 会覆盖所有已有 hints,需一次性设置全部参数。
288
-
289
- ---
290
-
291
- ## 监控
292
-
293
- ```sql
294
- -- 查看 Pipe 详情(含延迟信息 pipe_latency)
295
- DESC PIPE EXTENDED <pipe_name>;
296
-
297
- -- 查看所有 Pipe
298
- SHOW PIPES;
299
-
300
- -- 查看加载历史
301
- SELECT * FROM load_history('<schema>.<table>')
302
- ORDER BY last_load_time DESC LIMIT 20;
303
-
304
- -- 通过 query_tag 查看 Pipe 作业
305
- -- 格式:pipe.<workspace_name>.<schema_name>.<pipe_name>
306
- SHOW JOBS WHERE query_tag = 'pipe.my_workspace.ods.kafka_orders_pipe';
307
- ```
308
-
309
- ---
310
-
311
- ## DROP PIPE
312
-
313
- ```sql
314
- DROP PIPE [ IF EXISTS ] <pipe_name>;
315
- ```
316
-
317
- ## 参考文档
318
-
319
- - [Pipe 简介](https://www.yunqi.tech/documents/pipe-summary)
320
- - [借助 read_kafka 函数持续导入](https://www.yunqi.tech/documents/pipe-kafka)
321
- - [借助 Kafka 外表 Table Stream 持续导入](https://www.yunqi.tech/documents/pipe-kafka-table-stream)
322
- - [最佳实践:使用 Pipe 高效接入 Kafka 数据](https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1)
323
- - [Kafka 外部表](https://www.yunqi.tech/documents/kafka-external-table)
324
- - [Kafka Storage Connection](https://www.yunqi.tech/documents/Kafka_connection)
@@ -1,218 +0,0 @@
1
- ---
2
- name: clickzetta-lakehouse-connect
3
- description: |
4
- Guide for connecting to ClickZetta Lakehouse via SDK/JDBC. Covers Python SDK (clickzetta.connect), ZettaPark Session (DataFrame API), SQLAlchemy (ORM/BI tools), and JDBC (Java). Use this skill when user needs to configure a connection from external tools or code. Trigger for: "Python SDK 连接", "JDBC 连接", "SQLAlchemy 配置", "ZettaPark 怎么用", "连接报错", "clickzetta-connector-python", "clickzetta-sqlalchemy".
5
- Keywords: connection, Python SDK, JDBC, SQLAlchemy, ZettaPark, driver, connect
6
- ---
7
-
8
- # ClickZetta Lakehouse 连接指南
9
-
10
- ## 指令
11
-
12
- ### 步骤 0:自动获取连接参数(优先)
13
-
14
- **在询问用户之前,先尝试从本地配置文件自动读取连接参数。**
15
-
16
- 按以下优先级查找配置文件(找到第一个即停止):
17
- 1. `/app/.clickzetta/lakehouse_connection/connections.json`
18
- 2. `config/lakehouse_connection/connections.json`
19
- 3. `~/.clickzetta/connections.json`
20
- 4. `/app/.clickzetta/connections.json`
21
-
22
- 找到配置文件后:
23
- - 解析 JSON,提取 `connections` 数组
24
- - 根据用户描述的区域/环境匹配对应连接(如"阿里云上海"匹配 `service` 含 `cn-shanghai-alicloud` 的连接)
25
- - 若有 `is_default: true` 且用户未指定区域,使用默认连接
26
- - **不要将密码或完整配置输出到对话中**,仅内部使用
27
-
28
- 若配置文件不存在或无匹配连接,再向用户询问:service、instance、workspace、username、password、schema、vcluster。
29
-
30
- ### 步骤 1:确认连接方式
31
-
32
- 根据用户场景选择连接方式,阅读对应参考文件:
33
-
34
- | 用户需求 | 参考文件 |
35
- |:--|:--|
36
- | Python 脚本 / 自动化 / 执行 SQL | [references/python-sdk.md](references/python-sdk.md) |
37
- | DataFrame / 数据工程 | [references/zettapark-session.md](references/zettapark-session.md) |
38
- | ORM / Web 应用 / BI 工具(Superset) | [references/sqlalchemy.md](references/sqlalchemy.md) |
39
- | Java 应用 / BI 工具(DBeaver) | [references/jdbc.md](references/jdbc.md) |
40
- | 多环境配置文件管理 | [references/config-file.md](references/config-file.md) |
41
-
42
- 不确定时参考决策树:
43
- - 需要 DataFrame 操作 → ZettaPark Session
44
- - 需要 ORM / SQLAlchemy 集成 → SQLAlchemy
45
- - Java 应用 → JDBC
46
- - 其他 Python 场景(含直接执行 SQL)→ Python SDK
47
-
48
- ### 步骤 2:确认 service 地址
49
-
50
- `service` 参数必须包含区域前缀,根据实例所在区域选择:
51
-
52
- **云器 Lakehouse(国内版,`clickzetta.com`)**
53
-
54
- | 云厂商 | 区域 | service 地址 |
55
- |:--|:--|:--|
56
- | 阿里云 | 华东2(上海) | `cn-shanghai-alicloud.api.clickzetta.com` |
57
- | 腾讯云 | 华东(上海) | `ap-shanghai-tencentcloud.api.clickzetta.com` |
58
- | 腾讯云 | 华北(北京) | `ap-beijing-tencentcloud.api.clickzetta.com` |
59
- | 腾讯云 | 华南(广州) | `ap-guangzhou-tencentcloud.api.clickzetta.com` |
60
- | AWS | 中国(北京) | `cn-north-1-aws.api.clickzetta.com` |
61
-
62
- **Singdata Lakehouse(国际版,`singdata.com`)**
63
-
64
- | 云厂商 | 区域 | service 地址 |
65
- |:--|:--|:--|
66
- | 阿里云 | 亚太东南1(新加坡) | `ap-southeast-1-alicloud.api.singdata.com` |
67
- | AWS | 亚太(新加坡) | `ap-southeast-1-aws.api.singdata.com` |
68
-
69
- 控制台:`https://{instance}.{region}.app.clickzetta.com`
70
-
71
- ### 步骤 3:执行查询或提供可运行代码
72
-
73
- **若用户要求执行查询(如 SHOW SCHEMAS、SELECT、SHOW TABLES 等):**
74
-
75
- 1. 确认 `clickzetta-connector-python` 已安装:
76
- ```bash
77
- pip3 show clickzetta-connector-python
78
- ```
79
- 若未安装,执行:`pip3 install clickzetta-connector-python --user`
80
-
81
- 2. 使用步骤 0 获取的连接参数直接执行查询,将结果格式化后展示给用户。
82
-
83
- **若用户要求生成代码:**
84
-
85
- 阅读对应参考文件后,根据参数生成完整可运行代码。所有参数均为必填,`vcluster` 默认值为 `default_ap`。
86
-
87
- 密码含特殊字符时(SQLAlchemy URI),提醒用户用 `urllib.parse.quote_plus()` 编码。
88
-
89
- ## 示例
90
-
91
- ### 示例 0:自动读取配置并执行查询
92
-
93
- ```python
94
- import json, os, clickzetta
95
-
96
- # 按优先级查找配置文件
97
- config_paths = [
98
- "/app/.clickzetta/lakehouse_connection/connections.json",
99
- "config/lakehouse_connection/connections.json",
100
- os.path.expanduser("~/.clickzetta/connections.json"),
101
- "/app/.clickzetta/connections.json",
102
- ]
103
- config = None
104
- for path in config_paths:
105
- if os.path.exists(path):
106
- with open(path) as f:
107
- config = json.load(f)
108
- break
109
-
110
- # 选择目标连接(示例:匹配阿里云上海)
111
- conn_cfg = next(
112
- (c for c in config["connections"] if "cn-shanghai-alicloud" in c.get("service", "")),
113
- None
114
- ) or next((c for c in config["connections"] if c.get("is_default")), config["connections"][0])
115
-
116
- conn = clickzetta.connect(
117
- service=conn_cfg["service"],
118
- instance=conn_cfg["instance"],
119
- workspace=conn_cfg["workspace"],
120
- schema=conn_cfg.get("schema", "public"),
121
- username=conn_cfg["username"],
122
- password=conn_cfg["password"],
123
- vcluster=conn_cfg.get("vcluster", "default_ap")
124
- )
125
- cursor = conn.cursor()
126
- cursor.execute("SHOW SCHEMAS")
127
- for row in cursor.fetchall():
128
- print(row[0])
129
- cursor.close()
130
- conn.close()
131
- ```
132
-
133
- ### 示例 1:Python SDK 连接并查询
134
-
135
- ```python
136
- import clickzetta
137
-
138
- conn = clickzetta.connect(
139
- service="cn-shanghai-alicloud.api.clickzetta.com",
140
- instance="my_instance",
141
- workspace="my_workspace",
142
- schema="public",
143
- username="my_user",
144
- password="my_password",
145
- vcluster="default_ap"
146
- )
147
- cursor = conn.cursor()
148
- cursor.execute("SELECT * FROM orders LIMIT 10")
149
- for row in cursor.fetchall():
150
- print(row)
151
- cursor.close()
152
- conn.close()
153
- ```
154
-
155
- ### 示例 2:ZettaPark 按 region 汇总 revenue
156
-
157
- ```python
158
- from clickzetta.zettapark.session import Session
159
- from clickzetta.zettapark import functions as F
160
-
161
- session = Session.builder.configs({
162
- "service": "cn-shanghai-alicloud.api.clickzetta.com",
163
- "instance": "my_instance", "workspace": "my_workspace",
164
- "schema": "public", "username": "my_user",
165
- "password": "my_password", "vcluster": "default_ap"
166
- }).create()
167
-
168
- session.table("sales") \
169
- .group_by(F.col("region")) \
170
- .agg(F.sum("revenue").as_("total_revenue")) \
171
- .write.save_as_table("sales_summary", mode="overwrite")
172
- session.close()
173
- ```
174
-
175
- ## 故障排除
176
-
177
- | 错误信息 | 原因 | 解决方案 |
178
- |:--|:--|:--|
179
- | `Connection refused` | service 地址不正确或网络不通 | 检查 service 是否匹配区域(参见步骤 2 区域表) |
180
- | `Authentication failed` | 用户名或密码错误 | 核实 username 和 password |
181
- | `Workspace not found` | 工作空间名称不存在 | 在控制台确认 workspace 拼写 |
182
- | `Instance not found` | 实例名称不存在 | 在控制台确认 instance 拼写 |
183
- | `Timeout` | 查询超时 | 增大 `hints` 中的 `sdk.job.timeout`(默认 300 秒) |
184
- | `VCluster not available` | 虚拟集群未启动或名称错误 | 确认 vcluster 名称,检查集群状态 |
185
- | SQLAlchemy URL 解析错误 | 密码含特殊字符 | 用 `urllib.parse.quote_plus()` 对密码 URL 编码 |
186
- | `ClassNotFoundException` | JDBC 驱动未在 classpath | 确保 `clickzetta-java` JAR 已加入 classpath |
187
-
188
- ## 安装
189
-
190
- > ⚠️ **Python 版本要求**:推荐 **Python 3.12**(最低 3.10)。Python 3.9 及以下不支持。
191
-
192
- | 连接方式 | 安装命令 |
193
- |:--|:--|
194
- | Python SDK | `pip install clickzetta-connector-python -i https://pypi.tuna.tsinghua.edu.cn/simple` |
195
- | ZettaPark | `pip install clickzetta-zettapark-python -i https://pypi.tuna.tsinghua.edu.cn/simple` |
196
- | SQLAlchemy | `pip install clickzetta-connector-python clickzetta-sqlalchemy -i https://pypi.tuna.tsinghua.edu.cn/simple` |
197
- | JDBC | Maven: `com.clickzetta:clickzetta-java` |
198
-
199
- ```bash
200
- # 方式 1:venv(Python 内置,推荐)
201
- python3.12 -m venv .venv
202
- source .venv/bin/activate # macOS/Linux
203
- # .venv\Scripts\activate # Windows
204
- pip install clickzetta-connector-python clickzetta-zettapark-python \
205
- -i https://pypi.tuna.tsinghua.edu.cn/simple
206
-
207
- # 方式 2:pyenv(需要切换 Python 版本时)
208
- pyenv install 3.12.9
209
- pyenv local 3.12.9
210
- python -m venv .venv && source .venv/bin/activate
211
- pip install clickzetta-connector-python clickzetta-zettapark-python \
212
- -i https://pypi.tuna.tsinghua.edu.cn/simple
213
-
214
- # 方式 3:conda(数据科学环境)
215
- conda create -n lakehouse python=3.12 -y && conda activate lakehouse
216
- pip install clickzetta-connector-python clickzetta-zettapark-python \
217
- -i https://pypi.tuna.tsinghua.edu.cn/simple
218
- ```
@@ -1,35 +0,0 @@
1
- {
2
- "skill_name": "clickzetta-lakehouse-connect",
3
- "evals": [
4
- {
5
- "id": 1,
6
- "prompt": "我需要用 Python 连接 ClickZetta,实例名是 my_instance,工作空间是 analytics,region 是上海阿里云,用户名 alice,密码 secret123。帮我写一段查询 orders 表前 10 行的代码。",
7
- "expected_output": "使用 clickzetta.connect() 或 clickzetta-connector-python,包含所有必填参数(service/instance/workspace/schema/username/password/vcluster),并演示 cursor.execute + fetchall 查询",
8
- "files": []
9
- },
10
- {
11
- "id": 2,
12
- "prompt": "我想用 ZettaPark 做数据工程,需要读取 sales 表,按 region 分组求 revenue 总和,然后写回到 sales_summary 表。帮我写完整代码。",
13
- "expected_output": "使用 Session.builder.configs().create(),展示 session.table() + group_by + agg + write.save_as_table(),包含连接参数配置",
14
- "files": []
15
- },
16
- {
17
- "id": 3,
18
- "prompt": "我在用 Apache Superset 连接 ClickZetta,SQLAlchemy URI 应该怎么填?密码是 P@ss#2024,需要注意什么?",
19
- "expected_output": "提供正确的 clickzetta:// URI 格式,指出密码特殊字符需要 quote_plus 编码,给出编码后的示例",
20
- "files": []
21
- },
22
- {
23
- "id": 4,
24
- "prompt": "连接云器 Lakehouse 报错 Connection refused,我的 service 填的是 api.clickzetta.com,实例在上海腾讯云,怎么排查?",
25
- "expected_output": "识别 service 地址填错,给出正确的上海腾讯云地址 ap-shanghai-tencentcloud.api.clickzetta.com,并提供排查步骤",
26
- "files": []
27
- },
28
- {
29
- "id": 5,
30
- "prompt": "我有三个环境:dev/staging/prod,都在同一个 ClickZetta 实例上但不同 workspace。想用 connections.json 统一管理,并在代码里切换。怎么配置?",
31
- "expected_output": "提供 connections.json 多连接配置示例(含 is_default),展示 switch_connection() 用法,说明文件放置路径",
32
- "files": []
33
- }
34
- ]
35
- }