@clickzetta/cz-cli-linux-x64 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
- package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
- package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
- package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
- package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
- package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
- package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
- package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -457
- package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
- package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
- package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
- package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
- package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
- package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
- package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
- package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
- package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
- package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
- package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
- package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -112
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
- package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
- package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -156
- package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
- package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
- package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
- package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
- package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
- package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
- package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
- package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
- package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
- package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -639
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
- package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
- package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
- package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
- package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
- package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
- package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
- package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
- package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
- package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
- package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
- package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
- package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -427
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
- package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -379
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
- package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
- package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
- package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
|
@@ -1,283 +0,0 @@
|
|
|
1
|
-
# ZettaPark 快速参考
|
|
2
|
-
|
|
3
|
-
> 来源:https://www.yunqi.tech/documents/ZettaparkQuickStart
|
|
4
|
-
|
|
5
|
-
## 安装
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
pip install clickzetta_zettapark_python -U -i https://pypi.tuna.tsinghua.edu.cn/simple
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
---
|
|
12
|
-
|
|
13
|
-
## 创建会话
|
|
14
|
-
|
|
15
|
-
```python
|
|
16
|
-
from clickzetta.zettapark.session import Session
|
|
17
|
-
|
|
18
|
-
connection_parameters = {
|
|
19
|
-
"username": "your_username",
|
|
20
|
-
"password": "your_password",
|
|
21
|
-
"service": "cn-shanghai-alicloud.api.clickzetta.com",
|
|
22
|
-
"instance": "your_instance_id",
|
|
23
|
-
"workspace": "your_workspace",
|
|
24
|
-
"schema": "public",
|
|
25
|
-
"vcluster": "default_ap",
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
session = Session.builder.configs(connection_parameters).create()
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
带 hints(超时、query_tag 等):
|
|
32
|
-
|
|
33
|
-
```python
|
|
34
|
-
connection_parameters = {
|
|
35
|
-
"username": "your_username",
|
|
36
|
-
"password": "your_password",
|
|
37
|
-
"service": "cn-shanghai-alicloud.api.clickzetta.com",
|
|
38
|
-
"instance": "your_instance_id",
|
|
39
|
-
"workspace": "your_workspace",
|
|
40
|
-
"schema": "public",
|
|
41
|
-
"vcluster": "default_ap",
|
|
42
|
-
"hints": {
|
|
43
|
-
"sdk.job.timeout": 300,
|
|
44
|
-
"query_tag": "my_zettapark_app",
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
session = Session.builder.configs(connection_parameters).create()
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
从 JSON 配置文件读取:
|
|
52
|
-
|
|
53
|
-
```python
|
|
54
|
-
import json
|
|
55
|
-
with open('config.json', 'r') as f:
|
|
56
|
-
config = json.load(f)
|
|
57
|
-
session = Session.builder.configs(config).create()
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
验证连接:
|
|
61
|
-
|
|
62
|
-
```python
|
|
63
|
-
session.sql("SELECT current_user(), current_workspace(), current_vcluster()").show()
|
|
64
|
-
```
|
|
65
|
-
|
|
66
|
-
关闭会话:
|
|
67
|
-
|
|
68
|
-
```python
|
|
69
|
-
session.close()
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
---
|
|
73
|
-
|
|
74
|
-
## 构建 DataFrame
|
|
75
|
-
|
|
76
|
-
```python
|
|
77
|
-
# 从表创建
|
|
78
|
-
df = session.table("my_schema.my_table")
|
|
79
|
-
|
|
80
|
-
# 从 SQL 创建
|
|
81
|
-
df = session.sql("SELECT * FROM orders WHERE year = 2024")
|
|
82
|
-
|
|
83
|
-
# 从 Python 数据创建
|
|
84
|
-
df = session.create_dataframe([1, 2, 3, 4]).to_df("id")
|
|
85
|
-
df = session.create_dataframe([[1, "Alice"], [2, "Bob"]], schema=["id", "name"])
|
|
86
|
-
|
|
87
|
-
# 从 Row 对象创建
|
|
88
|
-
from clickzetta.zettapark import Row
|
|
89
|
-
df = session.create_dataframe([Row(id=1, name="Alice"), Row(id=2, name="Bob")])
|
|
90
|
-
|
|
91
|
-
# 带 Schema 创建
|
|
92
|
-
from clickzetta.zettapark.types import IntegerType, StringType, StructType, StructField
|
|
93
|
-
schema = StructType([StructField("id", IntegerType()), StructField("name", StringType())])
|
|
94
|
-
df = session.create_dataframe([[1, "Alice"], [2, "Bob"]], schema)
|
|
95
|
-
|
|
96
|
-
# 范围序列
|
|
97
|
-
df = session.range(1, 10, 2).to_df("n") # 1,3,5,7,9
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
---
|
|
101
|
-
|
|
102
|
-
## DataFrame 转换操作
|
|
103
|
-
|
|
104
|
-
```python
|
|
105
|
-
from clickzetta.zettapark import functions as F
|
|
106
|
-
|
|
107
|
-
# 过滤行
|
|
108
|
-
df.filter(F.col("age") > 18)
|
|
109
|
-
df.filter(F.col("status") == "active")
|
|
110
|
-
df.where(F.col("amount") > 1000)
|
|
111
|
-
|
|
112
|
-
# 选择列
|
|
113
|
-
df.select("id", "name", "amount")
|
|
114
|
-
df.select(F.col("id"), F.col("name").as_("user_name"))
|
|
115
|
-
|
|
116
|
-
# 新增/修改列
|
|
117
|
-
df.with_column("total", F.col("price") * F.col("qty"))
|
|
118
|
-
df.with_column("upper_name", F.upper(F.col("name")))
|
|
119
|
-
|
|
120
|
-
# 重命名列
|
|
121
|
-
df.rename(F.col("old_name"), "new_name")
|
|
122
|
-
|
|
123
|
-
# 排序
|
|
124
|
-
df.sort(F.col("amount").desc())
|
|
125
|
-
df.order_by(F.col("created_at").asc())
|
|
126
|
-
|
|
127
|
-
# 去重
|
|
128
|
-
df.distinct()
|
|
129
|
-
df.drop_duplicates(["user_id"])
|
|
130
|
-
|
|
131
|
-
# 限制行数
|
|
132
|
-
df.limit(100)
|
|
133
|
-
|
|
134
|
-
# 删除列
|
|
135
|
-
df.drop("unnecessary_col")
|
|
136
|
-
```
|
|
137
|
-
|
|
138
|
-
---
|
|
139
|
-
|
|
140
|
-
## 聚合操作
|
|
141
|
-
|
|
142
|
-
```python
|
|
143
|
-
from clickzetta.zettapark import functions as F
|
|
144
|
-
|
|
145
|
-
# 分组聚合
|
|
146
|
-
df.group_by("category").agg(
|
|
147
|
-
F.sum("amount").as_("total_amount"),
|
|
148
|
-
F.count("*").as_("order_count"),
|
|
149
|
-
F.avg("price").as_("avg_price"),
|
|
150
|
-
F.max("amount").as_("max_amount"),
|
|
151
|
-
F.min("amount").as_("min_amount"),
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
# 全局聚合
|
|
155
|
-
df.agg(F.count("*"), F.sum("amount"))
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
---
|
|
159
|
-
|
|
160
|
-
## JOIN 操作
|
|
161
|
-
|
|
162
|
-
```python
|
|
163
|
-
# 内连接
|
|
164
|
-
df_orders.join(df_customers, df_orders["customer_id"] == df_customers["id"])
|
|
165
|
-
|
|
166
|
-
# 左连接
|
|
167
|
-
df_orders.join(df_customers, df_orders["customer_id"] == df_customers["id"], "left")
|
|
168
|
-
|
|
169
|
-
# 选择连接后的列(避免列名冲突)
|
|
170
|
-
result = df_orders.join(df_customers, df_orders["customer_id"] == df_customers["id"]) \
|
|
171
|
-
.select(df_orders["order_id"], df_customers["name"], df_orders["amount"])
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
---
|
|
175
|
-
|
|
176
|
-
## 执行与结果获取
|
|
177
|
-
|
|
178
|
-
```python
|
|
179
|
-
# 打印前 N 行(触发执行)
|
|
180
|
-
df.show()
|
|
181
|
-
df.show(20)
|
|
182
|
-
|
|
183
|
-
# 收集所有结果为 Row 列表
|
|
184
|
-
rows = df.collect()
|
|
185
|
-
for row in rows:
|
|
186
|
-
print(row["id"], row["name"])
|
|
187
|
-
|
|
188
|
-
# 转换为 Pandas DataFrame
|
|
189
|
-
pandas_df = df.to_pandas()
|
|
190
|
-
|
|
191
|
-
# 获取行数
|
|
192
|
-
count = df.count()
|
|
193
|
-
|
|
194
|
-
# 获取列名
|
|
195
|
-
print(df.columns)
|
|
196
|
-
|
|
197
|
-
# 查看 Schema
|
|
198
|
-
df.schema.print_tree()
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
---
|
|
202
|
-
|
|
203
|
-
## 写入数据
|
|
204
|
-
|
|
205
|
-
```python
|
|
206
|
-
# 写入已有表(追加)
|
|
207
|
-
df.write.save_as_table("my_table", mode="append")
|
|
208
|
-
|
|
209
|
-
# 覆盖写入
|
|
210
|
-
df.write.save_as_table("my_table", mode="overwrite")
|
|
211
|
-
|
|
212
|
-
# 自动建表并写入(overwrite 会重建表)
|
|
213
|
-
df.write.save_as_table("new_table", mode="overwrite")
|
|
214
|
-
|
|
215
|
-
# 写入指定 Schema 下的表
|
|
216
|
-
df.write.save_as_table("my_schema.my_table", mode="append")
|
|
217
|
-
```
|
|
218
|
-
|
|
219
|
-
---
|
|
220
|
-
|
|
221
|
-
## 执行 SQL
|
|
222
|
-
|
|
223
|
-
```python
|
|
224
|
-
# 执行 DDL/DML
|
|
225
|
-
session.sql("CREATE TABLE IF NOT EXISTS t (id INT, name STRING)").collect()
|
|
226
|
-
session.sql("INSERT INTO t VALUES (1, 'Alice')").collect()
|
|
227
|
-
|
|
228
|
-
# 执行查询并获取 DataFrame
|
|
229
|
-
df = session.sql("SELECT * FROM orders WHERE amount > 1000")
|
|
230
|
-
df.show()
|
|
231
|
-
|
|
232
|
-
# 切换 Schema
|
|
233
|
-
session.use_schema("my_schema")
|
|
234
|
-
```
|
|
235
|
-
|
|
236
|
-
---
|
|
237
|
-
|
|
238
|
-
## 文件操作(Volume)
|
|
239
|
-
|
|
240
|
-
```python
|
|
241
|
-
# 上传文件到 User Volume
|
|
242
|
-
session.file.put("/local/path/data.csv", "volume:user://~/data/")
|
|
243
|
-
|
|
244
|
-
# 下载文件
|
|
245
|
-
session.file.get("volume:user://~/data/data.csv", "/local/output/")
|
|
246
|
-
|
|
247
|
-
# 列出 User Volume 文件
|
|
248
|
-
session.sql("LIST USER VOLUME").show()
|
|
249
|
-
session.sql("SHOW USER VOLUME DIRECTORY").show()
|
|
250
|
-
```
|
|
251
|
-
|
|
252
|
-
---
|
|
253
|
-
|
|
254
|
-
## 常用 functions 速查
|
|
255
|
-
|
|
256
|
-
```python
|
|
257
|
-
from clickzetta.zettapark import functions as F
|
|
258
|
-
|
|
259
|
-
# 字符串
|
|
260
|
-
F.upper(col), F.lower(col), F.concat(col1, col2)
|
|
261
|
-
F.substring(col, 1, 3), F.trim(col), F.length(col)
|
|
262
|
-
|
|
263
|
-
# 数值
|
|
264
|
-
F.abs(col), F.round(col, 2), F.floor(col), F.ceil(col)
|
|
265
|
-
F.sqrt(col), F.pow(col, 2)
|
|
266
|
-
|
|
267
|
-
# 日期时间
|
|
268
|
-
F.current_date(), F.current_timestamp()
|
|
269
|
-
F.year(col), F.month(col), F.day(col)
|
|
270
|
-
F.date_add(col, 7), F.datediff(col1, col2)
|
|
271
|
-
|
|
272
|
-
# 条件
|
|
273
|
-
F.when(F.col("status") == "A", "Active").otherwise("Inactive")
|
|
274
|
-
F.coalesce(col1, col2) # 第一个非 null 值
|
|
275
|
-
F.isnull(col), F.isnotnull(col)
|
|
276
|
-
|
|
277
|
-
# 聚合
|
|
278
|
-
F.count("*"), F.sum(col), F.avg(col), F.max(col), F.min(col)
|
|
279
|
-
F.count_distinct(col)
|
|
280
|
-
|
|
281
|
-
# 类型转换
|
|
282
|
-
F.col("amount").cast(IntegerType())
|
|
283
|
-
```
|