@clickzetta/cz-cli-linux-x64 0.3.2 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/bin/cz-cli +0 -0
  2. package/package.json +1 -1
  3. package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
  4. package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
  5. package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
  6. package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
  7. package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
  8. package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
  9. package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
  10. package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
  11. package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
  12. package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
  13. package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
  14. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
  15. package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
  16. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
  17. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -450
  18. package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
  19. package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
  20. package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
  21. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
  22. package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
  23. package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
  24. package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
  25. package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
  26. package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
  27. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
  28. package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
  29. package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
  30. package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
  31. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
  32. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
  33. package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
  34. package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
  35. package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
  36. package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
  37. package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
  38. package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -86
  39. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
  40. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
  41. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
  42. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
  43. package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
  44. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  45. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
  46. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
  47. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
  48. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
  49. package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
  50. package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
  51. package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
  52. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
  53. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -117
  54. package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
  55. package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
  56. package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
  57. package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
  58. package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
  59. package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
  60. package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
  61. package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
  62. package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
  63. package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
  64. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
  65. package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
  66. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -531
  67. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -186
  68. package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
  69. package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
  70. package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
  71. package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
  72. package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
  73. package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
  74. package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
  75. package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
  76. package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
  77. package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
  78. package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
  79. package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
  80. package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
  81. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -402
  82. package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
  83. package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
  84. package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
  85. package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
  86. package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
  87. package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
  88. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
  89. package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
  90. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
  91. package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
  92. package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
  93. package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
  94. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -353
  95. package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
  96. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -173
  97. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
  98. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -160
  99. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -123
  100. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
  101. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  102. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  103. package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
  104. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  105. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  106. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
  107. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  108. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  109. package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
  110. package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
  111. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -155
  112. package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
  113. package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
  114. package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
  115. package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -249
  116. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -194
  117. package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
  118. package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
@@ -1,186 +0,0 @@
1
- # Kafka Pipe SQL 语法参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/pipe-kafka 和 https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1
4
-
5
- ## CREATE PIPE(READ_KAFKA 方式)
6
-
7
- ```sql
8
- CREATE [ OR REPLACE ] PIPE <pipe_name>
9
- VIRTUAL_CLUSTER = <vcluster_name>
10
- [ BATCH_INTERVAL_IN_SECONDS = <seconds> ]
11
- [ BATCH_SIZE_PER_KAFKA_PARTITION = <count> ]
12
- [ MAX_SKIP_BATCH_COUNT_ON_ERROR = <count> ]
13
- [ INITIAL_DELAY_IN_SECONDS = <seconds> ]
14
- [ RESET_KAFKA_GROUP_OFFSETS = '<offset_value>' ]
15
- AS
16
- INSERT INTO <target_table> [ ( <col1>, <col2>, ... ) ]
17
- SELECT <expr> [, ...]
18
- FROM TABLE(
19
- READ_KAFKA(
20
- KAFKA_BROKER => '<broker_host>:<port>',
21
- KAFKA_TOPIC => '<topic_name>',
22
- KAFKA_GROUP_ID => '<consumer_group>',
23
- KAFKA_DATA_FORMAT => '<json | csv | avro>',
24
- [ KAFKA_SASL_USERNAME => '<username>', ]
25
- [ KAFKA_SASL_PASSWORD => '<password>' ]
26
- )
27
- );
28
- ```
29
-
30
- ### 参数说明
31
-
32
- | 参数 | 必填 | 默认值 | 说明 |
33
- |------|------|--------|------|
34
- | `VIRTUAL_CLUSTER` | 是 | — | 执行 Pipe 任务的计算集群 |
35
- | `BATCH_INTERVAL_IN_SECONDS` | 否 | 60 | 批处理间隔(秒),即数据新鲜度 |
36
- | `BATCH_SIZE_PER_KAFKA_PARTITION` | 否 | 500000 | 每个 Kafka 分区每批最大消息数 |
37
- | `MAX_SKIP_BATCH_COUNT_ON_ERROR` | 否 | 30 | 出错时跳过批次的最大重试次数 |
38
- | `INITIAL_DELAY_IN_SECONDS` | 否 | 0 | 首个作业调度延迟 |
39
- | `RESET_KAFKA_GROUP_OFFSETS` | 否 | — | 启动时消费位点(仅创建时生效) |
40
-
41
- ### RESET_KAFKA_GROUP_OFFSETS 可选值
42
-
43
- | 值 | 说明 |
44
- |----|------|
45
- | `'none'` | 无操作,使用 Kafka `auto.offset.reset`(默认 latest) |
46
- | `'valid'` | 检查当前位点是否过期,将过期分区重置到 earliest |
47
- | `'earliest'` | 重置到最早位点 |
48
- | `'latest'` | 重置到最新位点 |
49
- | `'<毫秒时间戳>'` | 重置到指定时间戳对应位点(如 `'1737789688000'`) |
50
-
51
- ### READ_KAFKA 参数(在 Pipe 中 vs 独立使用)
52
-
53
- | 特性 | 独立使用 READ_KAFKA | 在 Pipe 中使用 |
54
- |------|-------------------|---------------|
55
- | 消费者组 | 临时,执行完即销毁 | 持久,保持消费位置 |
56
- | 位置管理 | 手动指定 `KAFKA_OFFSET` | Pipe 自动管理,**不要设置** `KAFKA_OFFSET` |
57
- | 执行方式 | 一次性查询 | 持续调度执行 |
58
- | 默认起始位置 | earliest(探查历史数据) | latest(处理新数据) |
59
-
60
- ### JSON 字段提取语法
61
-
62
- ```sql
63
- -- $1 表示整行 JSON
64
- $1:field_name::TYPE -- 提取顶层字段
65
- $1:nested.field::TYPE -- 提取嵌套字段(点号访问)
66
- PARSE_JSON($1:field::STRING) -- 将字符串字段解析为 JSON 对象
67
- ```
68
-
69
- ---
70
-
71
- ## CREATE PIPE(Kafka 外部表 + Table Stream 方式)
72
-
73
- ### 步骤 1:创建 Kafka Storage Connection
74
-
75
- ```sql
76
- CREATE STORAGE CONNECTION IF NOT EXISTS <conn_name>
77
- TYPE KAFKA
78
- BOOTSTRAP_SERVERS = ['<host1>:<port1>', '<host2>:<port2>']
79
- SECURITY_PROTOCOL = '<PLAINTEXT | SASL_PLAINTEXT>';
80
- ```
81
-
82
- ### 步骤 2:创建 Kafka 外部表
83
-
84
- ```sql
85
- CREATE EXTERNAL TABLE <ext_table_name>
86
- USING KAFKA
87
- OPTIONS (
88
- 'group_id' = '<consumer_group>',
89
- 'topics' = '<topic_name>',
90
- 'starting_offset' = '<earliest | latest>'
91
- )
92
- CONNECTION <conn_name>;
93
- ```
94
-
95
- 固定字段:
96
-
97
- | 字段 | 类型 | 说明 |
98
- |------|------|------|
99
- | topic | STRING | Kafka 主题名称 |
100
- | partition | INT | 分区 ID |
101
- | offset | BIGINT | 分区内偏移量 |
102
- | timestamp | TIMESTAMP_LTZ | 消息时间戳 |
103
- | timestamp_type | STRING | 时间戳类型 |
104
- | headers | MAP<STRING, BINARY> | 消息头 |
105
- | key | BINARY | 消息键 |
106
- | value | BINARY | 消息体 |
107
-
108
- ### 步骤 3:创建 Table Stream
109
-
110
- ```sql
111
- CREATE TABLE STREAM <stream_name>
112
- ON TABLE <ext_table_name>
113
- WITH PROPERTIES ('TABLE_STREAM_MODE' = 'APPEND_ONLY');
114
- ```
115
-
116
- ### 步骤 4:创建 Pipe
117
-
118
- ```sql
119
- CREATE PIPE <pipe_name>
120
- VIRTUAL_CLUSTER = <vcluster_name>
121
- [ BATCH_INTERVAL_IN_SECONDS = <seconds> ]
122
- AS
123
- COPY INTO <target_table>
124
- SELECT <expr> [, ...]
125
- FROM <stream_name>;
126
- ```
127
-
128
- ---
129
-
130
- ## ALTER PIPE
131
-
132
- ```sql
133
- -- 暂停
134
- ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = true;
135
-
136
- -- 恢复
137
- ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = false;
138
-
139
- -- 修改属性(每次只能改一个)
140
- ALTER PIPE <pipe_name> SET BATCH_INTERVAL_IN_SECONDS = 120;
141
- ALTER PIPE <pipe_name> SET BATCH_SIZE_PER_KAFKA_PARTITION = 1000000;
142
- ALTER PIPE <pipe_name> SET VIRTUAL_CLUSTER = 'new_vc';
143
- ALTER PIPE <pipe_name> SET COPY_JOB_HINT = '{"cz.sql.split.kafka.strategy":"size","cz.mapper.kafka.message.size":"200000"}';
144
- ```
145
-
146
- > 不支持修改 COPY/INSERT 语句逻辑,需删除 Pipe 后重建。
147
- > 修改 `COPY_JOB_HINT` 会覆盖所有已有 hints,需一次性设置全部参数。
148
-
149
- ---
150
-
151
- ## 监控
152
-
153
- ```sql
154
- -- 查看 Pipe 详情(含延迟信息)
155
- DESC PIPE <pipe_name>;
156
- DESC PIPE EXTENDED <pipe_name>;
157
-
158
- -- 查看所有 Pipe
159
- SHOW PIPES;
160
-
161
- -- 查看加载历史
162
- SELECT * FROM TABLE(load_history('<schema>.<table>'))
163
- ORDER BY last_load_time DESC LIMIT 20;
164
-
165
- -- 通过 query_tag 查看 Pipe 作业
166
- -- 格式:pipe.<workspace_name>.<schema_name>.<pipe_name>
167
- SHOW JOBS WHERE query_tag = 'pipe.my_workspace.ods.kafka_orders_pipe';
168
- ```
169
-
170
- ---
171
-
172
- ## DROP PIPE
173
-
174
- ```sql
175
- DROP PIPE [ IF EXISTS ] <pipe_name>;
176
- ```
177
-
178
- ## 参考文档
179
-
180
- - [Pipe 简介](https://www.yunqi.tech/documents/pipe-summary)
181
- - [借助 read_kafka 函数持续导入](https://www.yunqi.tech/documents/pipe-kafka)
182
- - [借助 Kafka 外表 Table Stream 持续导入](https://www.yunqi.tech/documents/pipe-kafka-table-stream)
183
- - [最佳实践:使用 Pipe 高效接入 Kafka 数据](https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1)
184
- - [read_kafka 函数](https://www.yunqi.tech/documents/read_kafka)
185
- - [Kafka 外部表](https://www.yunqi.tech/documents/kafka-external-table)
186
- - [PIPE 导入语法](https://www.yunqi.tech/documents/pipe-syntax)
@@ -1,218 +0,0 @@
1
- ---
2
- name: clickzetta-lakehouse-connect
3
- description: |
4
- Guide for connecting to ClickZetta Lakehouse via SDK/JDBC. Covers Python SDK (clickzetta.connect), ZettaPark Session (DataFrame API), SQLAlchemy (ORM/BI tools), and JDBC (Java). Use this skill when user needs to configure a connection from external tools or code — NOT for querying data inside czcode (use execute_sql/list_objects tools instead). Trigger for: "Python SDK 连接", "JDBC 连接", "SQLAlchemy 配置", "ZettaPark 怎么用", "连接报错", "clickzetta-connector-python", "clickzetta-sqlalchemy".
5
- Keywords: connection, Python SDK, JDBC, SQLAlchemy, ZettaPark, driver, connect
6
- ---
7
-
8
- # ClickZetta Lakehouse 连接指南
9
-
10
- ## 指令
11
-
12
- ### 步骤 0:自动获取连接参数(优先)
13
-
14
- **在询问用户之前,先尝试从本地配置文件自动读取连接参数。**
15
-
16
- 按以下优先级查找配置文件(找到第一个即停止):
17
- 1. `/app/.clickzetta/lakehouse_connection/connections.json`
18
- 2. `config/lakehouse_connection/connections.json`
19
- 3. `~/.clickzetta/connections.json`
20
- 4. `/app/.clickzetta/connections.json`
21
-
22
- 找到配置文件后:
23
- - 解析 JSON,提取 `connections` 数组
24
- - 根据用户描述的区域/环境匹配对应连接(如"阿里云上海"匹配 `service` 含 `cn-shanghai-alicloud` 的连接)
25
- - 若有 `is_default: true` 且用户未指定区域,使用默认连接
26
- - **不要将密码或完整配置输出到对话中**,仅内部使用
27
-
28
- 若配置文件不存在或无匹配连接,再向用户询问:service、instance、workspace、username、password、schema、vcluster。
29
-
30
- ### 步骤 1:确认连接方式
31
-
32
- 根据用户场景选择连接方式,阅读对应参考文件:
33
-
34
- | 用户需求 | 参考文件 |
35
- |:--|:--|
36
- | Python 脚本 / 自动化 / 执行 SQL | [references/python-sdk.md](references/python-sdk.md) |
37
- | DataFrame / 数据工程 | [references/zettapark-session.md](references/zettapark-session.md) |
38
- | ORM / Web 应用 / BI 工具(Superset) | [references/sqlalchemy.md](references/sqlalchemy.md) |
39
- | Java 应用 / BI 工具(DBeaver) | [references/jdbc.md](references/jdbc.md) |
40
- | 多环境配置文件管理 | [references/config-file.md](references/config-file.md) |
41
-
42
- 不确定时参考决策树:
43
- - 需要 DataFrame 操作 → ZettaPark Session
44
- - 需要 ORM / SQLAlchemy 集成 → SQLAlchemy
45
- - Java 应用 → JDBC
46
- - 其他 Python 场景(含直接执行 SQL)→ Python SDK
47
-
48
- ### 步骤 2:确认 service 地址
49
-
50
- `service` 参数必须包含区域前缀,根据实例所在区域选择:
51
-
52
- **云器 Lakehouse(国内版,`clickzetta.com`)**
53
-
54
- | 云厂商 | 区域 | service 地址 |
55
- |:--|:--|:--|
56
- | 阿里云 | 华东2(上海) | `cn-shanghai-alicloud.api.clickzetta.com` |
57
- | 腾讯云 | 华东(上海) | `ap-shanghai-tencentcloud.api.clickzetta.com` |
58
- | 腾讯云 | 华北(北京) | `ap-beijing-tencentcloud.api.clickzetta.com` |
59
- | 腾讯云 | 华南(广州) | `ap-guangzhou-tencentcloud.api.clickzetta.com` |
60
- | AWS | 中国(北京) | `cn-north-1-aws.api.clickzetta.com` |
61
-
62
- **Singdata Lakehouse(国际版,`singdata.com`)**
63
-
64
- | 云厂商 | 区域 | service 地址 |
65
- |:--|:--|:--|
66
- | 阿里云 | 亚太东南1(新加坡) | `ap-southeast-1-alicloud.api.singdata.com` |
67
- | AWS | 亚太(新加坡) | `ap-southeast-1-aws.api.singdata.com` |
68
-
69
- 控制台:`https://{instance}.{region}.app.clickzetta.com`
70
-
71
- ### 步骤 3:执行查询或提供可运行代码
72
-
73
- **若用户要求执行查询(如 SHOW SCHEMAS、SELECT、SHOW TABLES 等):**
74
-
75
- 1. 确认 `clickzetta-connector-python` 已安装:
76
- ```bash
77
- pip3 show clickzetta-connector-python
78
- ```
79
- 若未安装,执行:`pip3 install clickzetta-connector-python --user`
80
-
81
- 2. 使用步骤 0 获取的连接参数直接执行查询,将结果格式化后展示给用户。
82
-
83
- **若用户要求生成代码:**
84
-
85
- 阅读对应参考文件后,根据参数生成完整可运行代码。所有参数均为必填,`vcluster` 默认值为 `default_ap`。
86
-
87
- 密码含特殊字符时(SQLAlchemy URI),提醒用户用 `urllib.parse.quote_plus()` 编码。
88
-
89
- ## 示例
90
-
91
- ### 示例 0:自动读取配置并执行查询
92
-
93
- ```python
94
- import json, os, clickzetta
95
-
96
- # 按优先级查找配置文件
97
- config_paths = [
98
- "/app/.clickzetta/lakehouse_connection/connections.json",
99
- "config/lakehouse_connection/connections.json",
100
- os.path.expanduser("~/.clickzetta/connections.json"),
101
- "/app/.clickzetta/connections.json",
102
- ]
103
- config = None
104
- for path in config_paths:
105
- if os.path.exists(path):
106
- with open(path) as f:
107
- config = json.load(f)
108
- break
109
-
110
- # 选择目标连接(示例:匹配阿里云上海)
111
- conn_cfg = next(
112
- (c for c in config["connections"] if "cn-shanghai-alicloud" in c.get("service", "")),
113
- None
114
- ) or next((c for c in config["connections"] if c.get("is_default")), config["connections"][0])
115
-
116
- conn = clickzetta.connect(
117
- service=conn_cfg["service"],
118
- instance=conn_cfg["instance"],
119
- workspace=conn_cfg["workspace"],
120
- schema=conn_cfg.get("schema", "public"),
121
- username=conn_cfg["username"],
122
- password=conn_cfg["password"],
123
- vcluster=conn_cfg.get("vcluster", "default_ap")
124
- )
125
- cursor = conn.cursor()
126
- cursor.execute("SHOW SCHEMAS")
127
- for row in cursor.fetchall():
128
- print(row[0])
129
- cursor.close()
130
- conn.close()
131
- ```
132
-
133
- ### 示例 1:Python SDK 连接并查询
134
-
135
- ```python
136
- import clickzetta
137
-
138
- conn = clickzetta.connect(
139
- service="cn-shanghai-alicloud.api.clickzetta.com",
140
- instance="my_instance",
141
- workspace="my_workspace",
142
- schema="public",
143
- username="my_user",
144
- password="my_password",
145
- vcluster="default_ap"
146
- )
147
- cursor = conn.cursor()
148
- cursor.execute("SELECT * FROM orders LIMIT 10")
149
- for row in cursor.fetchall():
150
- print(row)
151
- cursor.close()
152
- conn.close()
153
- ```
154
-
155
- ### 示例 2:ZettaPark 按 region 汇总 revenue
156
-
157
- ```python
158
- from clickzetta.zettapark.session import Session
159
- from clickzetta.zettapark import functions as F
160
-
161
- session = Session.builder.configs({
162
- "service": "cn-shanghai-alicloud.api.clickzetta.com",
163
- "instance": "my_instance", "workspace": "my_workspace",
164
- "schema": "public", "username": "my_user",
165
- "password": "my_password", "vcluster": "default_ap"
166
- }).create()
167
-
168
- session.table("sales") \
169
- .group_by(F.col("region")) \
170
- .agg(F.sum("revenue").as_("total_revenue")) \
171
- .write.save_as_table("sales_summary", mode="overwrite")
172
- session.close()
173
- ```
174
-
175
- ## 故障排除
176
-
177
- | 错误信息 | 原因 | 解决方案 |
178
- |:--|:--|:--|
179
- | `Connection refused` | service 地址不正确或网络不通 | 检查 service 是否匹配区域(参见步骤 2 区域表) |
180
- | `Authentication failed` | 用户名或密码错误 | 核实 username 和 password |
181
- | `Workspace not found` | 工作空间名称不存在 | 在控制台确认 workspace 拼写 |
182
- | `Instance not found` | 实例名称不存在 | 在控制台确认 instance 拼写 |
183
- | `Timeout` | 查询超时 | 增大 `hints` 中的 `sdk.job.timeout`(默认 300 秒) |
184
- | `VCluster not available` | 虚拟集群未启动或名称错误 | 确认 vcluster 名称,检查集群状态 |
185
- | SQLAlchemy URL 解析错误 | 密码含特殊字符 | 用 `urllib.parse.quote_plus()` 对密码 URL 编码 |
186
- | `ClassNotFoundException` | JDBC 驱动未在 classpath | 确保 `clickzetta-java` JAR 已加入 classpath |
187
-
188
- ## 安装
189
-
190
- > ⚠️ **Python 版本要求**:推荐 **Python 3.12**(最低 3.10)。Python 3.9 及以下不支持。
191
-
192
- | 连接方式 | 安装命令 |
193
- |:--|:--|
194
- | Python SDK | `pip install clickzetta-connector-python -i https://pypi.tuna.tsinghua.edu.cn/simple` |
195
- | ZettaPark | `pip install clickzetta-zettapark-python -i https://pypi.tuna.tsinghua.edu.cn/simple` |
196
- | SQLAlchemy | `pip install clickzetta-connector-python clickzetta-sqlalchemy -i https://pypi.tuna.tsinghua.edu.cn/simple` |
197
- | JDBC | Maven: `com.clickzetta:clickzetta-java` |
198
-
199
- ```bash
200
- # 方式 1:venv(Python 内置,推荐)
201
- python3.12 -m venv .venv
202
- source .venv/bin/activate # macOS/Linux
203
- # .venv\Scripts\activate # Windows
204
- pip install clickzetta-connector-python clickzetta-zettapark-python \
205
- -i https://pypi.tuna.tsinghua.edu.cn/simple
206
-
207
- # 方式 2:pyenv(需要切换 Python 版本时)
208
- pyenv install 3.12.9
209
- pyenv local 3.12.9
210
- python -m venv .venv && source .venv/bin/activate
211
- pip install clickzetta-connector-python clickzetta-zettapark-python \
212
- -i https://pypi.tuna.tsinghua.edu.cn/simple
213
-
214
- # 方式 3:conda(数据科学环境)
215
- conda create -n lakehouse python=3.12 -y && conda activate lakehouse
216
- pip install clickzetta-connector-python clickzetta-zettapark-python \
217
- -i https://pypi.tuna.tsinghua.edu.cn/simple
218
- ```
@@ -1,35 +0,0 @@
1
- {
2
- "skill_name": "clickzetta-lakehouse-connect",
3
- "evals": [
4
- {
5
- "id": 1,
6
- "prompt": "我需要用 Python 连接 ClickZetta,实例名是 my_instance,工作空间是 analytics,region 是上海阿里云,用户名 alice,密码 secret123。帮我写一段查询 orders 表前 10 行的代码。",
7
- "expected_output": "使用 clickzetta.connect() 或 clickzetta-connector-python,包含所有必填参数(service/instance/workspace/schema/username/password/vcluster),并演示 cursor.execute + fetchall 查询",
8
- "files": []
9
- },
10
- {
11
- "id": 2,
12
- "prompt": "我想用 ZettaPark 做数据工程,需要读取 sales 表,按 region 分组求 revenue 总和,然后写回到 sales_summary 表。帮我写完整代码。",
13
- "expected_output": "使用 Session.builder.configs().create(),展示 session.table() + group_by + agg + write.save_as_table(),包含连接参数配置",
14
- "files": []
15
- },
16
- {
17
- "id": 3,
18
- "prompt": "我在用 Apache Superset 连接 ClickZetta,SQLAlchemy URI 应该怎么填?密码是 P@ss#2024,需要注意什么?",
19
- "expected_output": "提供正确的 clickzetta:// URI 格式,指出密码特殊字符需要 quote_plus 编码,给出编码后的示例",
20
- "files": []
21
- },
22
- {
23
- "id": 4,
24
- "prompt": "连接云器 Lakehouse 报错 Connection refused,我的 service 填的是 api.clickzetta.com,实例在上海腾讯云,怎么排查?",
25
- "expected_output": "识别 service 地址填错,给出正确的上海腾讯云地址 ap-shanghai-tencentcloud.api.clickzetta.com,并提供排查步骤",
26
- "files": []
27
- },
28
- {
29
- "id": 5,
30
- "prompt": "我有三个环境:dev/staging/prod,都在同一个 ClickZetta 实例上但不同 workspace。想用 connections.json 统一管理,并在代码里切换。怎么配置?",
31
- "expected_output": "提供 connections.json 多连接配置示例(含 is_default),展示 switch_connection() 用法,说明文件放置路径",
32
- "files": []
33
- }
34
- ]
35
- }