@clickzetta/cz-cli-darwin-x64 0.3.16 → 0.3.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +386 -0
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +548 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +220 -0
- package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +112 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +257 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +124 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +96 -0
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +109 -0
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +156 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +751 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/eval_cases.jsonl +5 -0
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +324 -0
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +537 -0
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +156 -0
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +56 -0
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +78 -0
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +65 -0
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +49 -0
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +42 -0
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +276 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +379 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +166 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +185 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +129 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +222 -0
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +125 -0
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +206 -0
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +212 -0
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +54 -0
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +150 -0
- package/bin/skills/clickzetta-volume-manager/SKILL.md +292 -0
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +199 -0
- package/package.json +1 -1
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/SKILL.md +0 -0
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/dt-declaration-strategy.md +0 -0
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/incremental-config-reference.md +0 -0
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/refresh-history-guide.md +0 -0
- /package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/sql-limitations.md +0 -0
- /package/bin/skills/{dynamic-table-alter → clickzetta-dynamic-table/dynamic-table-alter}/SKILL.md +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: clickzetta-volume-manager
|
|
3
|
+
description: |
|
|
4
|
+
管理 ClickZetta Lakehouse Volume 对象,实现对象存储(OSS/COS/S3)的挂载、
|
|
5
|
+
文件查询与数据导入导出。覆盖外部 Volume 创建(OSS/COS/S3)、内部 User Volume
|
|
6
|
+
文件操作(PUT/GET/REMOVE)、SELECT FROM VOLUME 直接查询文件、
|
|
7
|
+
COPY INTO TABLE 导入、COPY INTO VOLUME 导出等完整工作流。
|
|
8
|
+
当用户说"创建Volume"、"挂载OSS"、"挂载S3"、"挂载COS"、"Volume管理"、
|
|
9
|
+
"查询OSS文件"、"查询S3文件"、"上传文件到Volume"、"PUT文件"、"GET文件"、
|
|
10
|
+
"从Volume导入数据"、"导出到Volume"、"COPY INTO VOLUME"、"SELECT FROM VOLUME"、
|
|
11
|
+
"User Volume"、"数据湖文件"、"数据导出"、"导出数据"、"导出CSV"、"导出Parquet"、
|
|
12
|
+
"COPY OVERWRITE INTO"时触发。
|
|
13
|
+
Keywords: Volume, OSS, COS, S3, mount, file query, COPY INTO, external storage
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# ClickZetta Volume 管理
|
|
17
|
+
|
|
18
|
+
阅读 [references/volume-ddl.md](references/volume-ddl.md) 了解完整语法。
|
|
19
|
+
|
|
20
|
+
## Volume 类型
|
|
21
|
+
|
|
22
|
+
| 类型 | 说明 | 典型用途 |
|
|
23
|
+
|---|---|---|
|
|
24
|
+
| 外部 Volume | 挂载 OSS/COS/S3 路径 | 访问已有对象存储数据 |
|
|
25
|
+
| User Volume | 用户专属内部存储 | 临时文件上传、本地文件导入 |
|
|
26
|
+
| Table Volume | 表关联内部存储 | 表数据文件管理 |
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## 创建外部 Volume
|
|
31
|
+
|
|
32
|
+
前提:先创建 STORAGE CONNECTION(对象存储认证配置)
|
|
33
|
+
|
|
34
|
+
> ⚠️ **跨云限制**:Storage Connection 必须与 Lakehouse 实例在同一云厂商。阿里云实例不能创建 COS/S3 Connection,腾讯云实例不能创建 OSS Connection。
|
|
35
|
+
|
|
36
|
+
> ⚠️ **阿里云 OSS 参数名**:
|
|
37
|
+
> - 小写形式:`access_id` / `access_key`(推荐)
|
|
38
|
+
> - 大写形式:`ACCESS_KEY_ID` / `ACCESS_KEY_SECRET`(也可以)
|
|
39
|
+
> - ⚠️ `ACCESS_KEY` / `SECRET_KEY` 会报错(缺少 `_ID` / `_SECRET` 后缀)
|
|
40
|
+
|
|
41
|
+
```sql
|
|
42
|
+
-- 阿里云 OSS
|
|
43
|
+
CREATE STORAGE CONNECTION IF NOT EXISTS my_oss_conn
|
|
44
|
+
TYPE OSS
|
|
45
|
+
access_id = 'LTAIxxxxxxxxxxxx'
|
|
46
|
+
access_key = 'T8Gexxxxxxmtxxxxxx'
|
|
47
|
+
ENDPOINT = 'oss-cn-hangzhou-internal.aliyuncs.com';
|
|
48
|
+
|
|
49
|
+
-- 腾讯云 COS
|
|
50
|
+
CREATE STORAGE CONNECTION IF NOT EXISTS my_cos_conn
|
|
51
|
+
TYPE COS
|
|
52
|
+
ACCESS_KEY = '<access_key>'
|
|
53
|
+
SECRET_KEY = '<secret_key>'
|
|
54
|
+
REGION = 'ap-shanghai'
|
|
55
|
+
APP_ID = '1310000503';
|
|
56
|
+
|
|
57
|
+
-- AWS S3
|
|
58
|
+
CREATE STORAGE CONNECTION IF NOT EXISTS my_s3_conn
|
|
59
|
+
TYPE S3
|
|
60
|
+
ACCESS_KEY = '<access_key>'
|
|
61
|
+
SECRET_KEY = '<secret_key>'
|
|
62
|
+
REGION = 'us-east-1';
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
```sql
|
|
66
|
+
-- 挂载阿里云 OSS
|
|
67
|
+
CREATE EXTERNAL VOLUME my_oss_volume
|
|
68
|
+
LOCATION 'oss://my-bucket/data-path/'
|
|
69
|
+
USING CONNECTION my_oss_conn
|
|
70
|
+
DIRECTORY = (ENABLE = TRUE, AUTO_REFRESH = TRUE)
|
|
71
|
+
RECURSIVE = TRUE;
|
|
72
|
+
|
|
73
|
+
-- 挂载腾讯云 COS
|
|
74
|
+
CREATE EXTERNAL VOLUME my_cos_volume
|
|
75
|
+
LOCATION 'cos://my-bucket/data-path/'
|
|
76
|
+
USING CONNECTION my_cos_conn
|
|
77
|
+
DIRECTORY = (ENABLE = TRUE)
|
|
78
|
+
RECURSIVE = TRUE;
|
|
79
|
+
|
|
80
|
+
-- 挂载 AWS S3
|
|
81
|
+
CREATE EXTERNAL VOLUME my_s3_volume
|
|
82
|
+
LOCATION 's3://my-bucket/data-path/'
|
|
83
|
+
USING CONNECTION my_s3_conn
|
|
84
|
+
DIRECTORY = (ENABLE = TRUE)
|
|
85
|
+
RECURSIVE = TRUE;
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## 查看 Volume
|
|
91
|
+
|
|
92
|
+
```sql
|
|
93
|
+
-- 列出所有 Volume
|
|
94
|
+
SHOW VOLUMES;
|
|
95
|
+
|
|
96
|
+
-- 过滤外部 Volume(SHOW VOLUMES 不支持 WHERE 过滤,使用 information_schema)
|
|
97
|
+
SELECT volume_name, volume_type, volume_region, volume_creator
|
|
98
|
+
FROM information_schema.volumes
|
|
99
|
+
WHERE volume_type = 'EXTERNAL';
|
|
100
|
+
|
|
101
|
+
-- 查看详情
|
|
102
|
+
DESC VOLUME my_oss_volume;
|
|
103
|
+
|
|
104
|
+
-- 查看目录下的文件
|
|
105
|
+
SHOW VOLUME DIRECTORY my_oss_volume;
|
|
106
|
+
|
|
107
|
+
-- 刷新目录元数据后查询(上传新文件后可能需要手动刷新)
|
|
108
|
+
ALTER VOLUME my_oss_volume REFRESH;
|
|
109
|
+
SELECT * FROM DIRECTORY(VOLUME my_oss_volume);
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
> ⚠️ **目录刷新注意**:上传文件到对象存储后,`SHOW VOLUME DIRECTORY` 可能不会立即显示新文件。
|
|
113
|
+
> 如果启用了 `AUTO_REFRESH = TRUE`,系统会定期自动刷新;否则需要手动执行 `ALTER VOLUME name REFRESH`。
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## 直接查询 Volume 中的文件
|
|
118
|
+
|
|
119
|
+
> ⚠️ **语法限制**:ClickZetta 不支持 `@volume_name` 简写(Snowflake Stage 语法),必须使用 `FROM VOLUME name USING format` 完整语法。
|
|
120
|
+
> ⚠️ **多格式文件处理**:如果 Volume 中包含多种格式的文件(如 .csv 和 .json 混合),不指定 `FILES()` 或 `SUBDIRECTORY` 时会尝试读取所有文件,可能因格式不匹配而报错。建议使用 `FILES('xxx.csv')` 指定文件或 `SUBDIRECTORY 'csv_data/'` 指定子目录。
|
|
121
|
+
> ⚠️ **JSON 嵌套字段访问**:使用 `data['key']` 语法(不是 Snowflake 的 `data:key` 语法)。
|
|
122
|
+
|
|
123
|
+
```sql
|
|
124
|
+
-- 查询 CSV 文件(自动推断 schema)
|
|
125
|
+
SELECT * FROM VOLUME my_oss_volume
|
|
126
|
+
USING CSV
|
|
127
|
+
OPTIONS('header' = 'true', 'sep' = ',')
|
|
128
|
+
SUBDIRECTORY 'orders/2024/'
|
|
129
|
+
LIMIT 100;
|
|
130
|
+
|
|
131
|
+
-- 查询 Parquet 文件
|
|
132
|
+
SELECT * FROM VOLUME my_oss_volume
|
|
133
|
+
USING PARQUET
|
|
134
|
+
REGEXP '.*2024-0[1-6].parquet';
|
|
135
|
+
|
|
136
|
+
-- 查询指定文件(推荐,避免多格式冲突)
|
|
137
|
+
SELECT * FROM VOLUME my_oss_volume
|
|
138
|
+
USING JSON
|
|
139
|
+
FILES('user_events.json');
|
|
140
|
+
|
|
141
|
+
-- 查询 JSON 嵌套字段
|
|
142
|
+
SELECT
|
|
143
|
+
data['event_id'] AS event_id,
|
|
144
|
+
data['properties']['device'] AS device
|
|
145
|
+
FROM VOLUME my_oss_volume
|
|
146
|
+
USING JSON
|
|
147
|
+
FILES('events.json');
|
|
148
|
+
|
|
149
|
+
-- 查询 User Volume 文件
|
|
150
|
+
SELECT * FROM USER VOLUME
|
|
151
|
+
USING CSV
|
|
152
|
+
OPTIONS('header' = 'true')
|
|
153
|
+
FILES('upload.csv');
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## User Volume 文件操作
|
|
159
|
+
|
|
160
|
+
```sql
|
|
161
|
+
-- 查看文件列表
|
|
162
|
+
SHOW USER VOLUME DIRECTORY;
|
|
163
|
+
|
|
164
|
+
-- 上传本地文件
|
|
165
|
+
PUT '/local/path/data.csv' TO USER VOLUME;
|
|
166
|
+
PUT '/local/path/data.csv' TO USER VOLUME FILE 'subdir/data.csv';
|
|
167
|
+
|
|
168
|
+
-- 下载文件
|
|
169
|
+
GET USER VOLUME FILE 'subdir/data.csv' TO '/local/output/';
|
|
170
|
+
|
|
171
|
+
-- 删除文件
|
|
172
|
+
REMOVE USER VOLUME FILE 'subdir/data.csv';
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## 数据导入导出
|
|
178
|
+
|
|
179
|
+
### 从 Volume 导入到表
|
|
180
|
+
|
|
181
|
+
```sql
|
|
182
|
+
-- CSV 导入
|
|
183
|
+
COPY INTO my_table
|
|
184
|
+
FROM VOLUME my_oss_volume
|
|
185
|
+
USING CSV
|
|
186
|
+
OPTIONS('header' = 'true')
|
|
187
|
+
SUBDIRECTORY 'data/';
|
|
188
|
+
|
|
189
|
+
-- 指定文件导入
|
|
190
|
+
COPY INTO my_table
|
|
191
|
+
FROM VOLUME my_oss_volume
|
|
192
|
+
USING PARQUET
|
|
193
|
+
FILES('data_2024.parquet');
|
|
194
|
+
|
|
195
|
+
-- 正则匹配文件导入
|
|
196
|
+
COPY INTO my_table
|
|
197
|
+
FROM VOLUME my_oss_volume
|
|
198
|
+
USING PARQUET
|
|
199
|
+
REGEXP '.*2024-0[1-6].parquet';
|
|
200
|
+
|
|
201
|
+
-- 覆盖写入(清空表后导入)
|
|
202
|
+
COPY OVERWRITE INTO my_table
|
|
203
|
+
FROM VOLUME my_oss_volume
|
|
204
|
+
USING CSV
|
|
205
|
+
OPTIONS('header' = 'true');
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### 导出表到 Volume
|
|
209
|
+
|
|
210
|
+
```sql
|
|
211
|
+
-- 导出整张表为 Parquet(到 External Volume)
|
|
212
|
+
COPY INTO VOLUME my_oss_volume
|
|
213
|
+
SUBDIRECTORY 'export/'
|
|
214
|
+
FROM TABLE my_table
|
|
215
|
+
FILE_FORMAT = (TYPE = PARQUET);
|
|
216
|
+
|
|
217
|
+
-- 导出查询结果为 CSV(带压缩)
|
|
218
|
+
COPY INTO VOLUME my_oss_volume
|
|
219
|
+
SUBDIRECTORY 'export/2024/'
|
|
220
|
+
FROM (SELECT * FROM orders WHERE year = 2024)
|
|
221
|
+
FILE_FORMAT = (TYPE = CSV COMPRESSION = 'GZIP');
|
|
222
|
+
|
|
223
|
+
-- 导出到 User Volume
|
|
224
|
+
COPY INTO USER VOLUME
|
|
225
|
+
SUBDIRECTORY 'my_export/'
|
|
226
|
+
FROM TABLE my_table
|
|
227
|
+
FILE_FORMAT = (TYPE = CSV);
|
|
228
|
+
|
|
229
|
+
-- 导出到 Table Volume
|
|
230
|
+
COPY INTO TABLE VOLUME my_table
|
|
231
|
+
SUBDIRECTORY 'backup/'
|
|
232
|
+
FROM TABLE my_table
|
|
233
|
+
FILE_FORMAT = (TYPE = PARQUET);
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
> ⚠️ `COPY INTO VOLUME` 导出使用 `FILE_FORMAT = (TYPE = CSV/PARQUET)`,不是 `USING CSV`。
|
|
237
|
+
> `USING` 关键字仅用于 `SELECT FROM VOLUME` 查询文件。
|
|
238
|
+
|
|
239
|
+
### 导出到本地(GET 命令)
|
|
240
|
+
|
|
241
|
+
```sql
|
|
242
|
+
-- 从 Volume 下载文件到本地
|
|
243
|
+
GET VOLUME my_oss_volume FILE 'export/data.csv' TO '/local/output/';
|
|
244
|
+
|
|
245
|
+
-- 从 User Volume 下载
|
|
246
|
+
GET USER VOLUME FILE 'my_export/data.csv' TO '/local/output/';
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### 通过 Studio 导出
|
|
250
|
+
|
|
251
|
+
在 Lakehouse Studio 中:
|
|
252
|
+
- 执行 SQL 查询后,点击结果区域的「导出」按钮,可导出为 CSV 或 Excel 文件
|
|
253
|
+
- 支持导出最多 10 万行查询结果
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## 删除 Volume
|
|
258
|
+
|
|
259
|
+
```sql
|
|
260
|
+
DROP VOLUME IF EXISTS my_oss_volume;
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
---
|
|
264
|
+
|
|
265
|
+
## 常见问题
|
|
266
|
+
|
|
267
|
+
| 问题 | 原因 | 解决方案 |
|
|
268
|
+
|---|---|---|
|
|
269
|
+
| SHOW VOLUME DIRECTORY 无文件 | 目录未刷新 | 执行 `ALTER VOLUME name REFRESH` |
|
|
270
|
+
| SELECT FROM VOLUME 报错 | 格式不匹配 | 确认 USING 后的格式与实际文件格式一致;使用 `FILES()` 指定文件 |
|
|
271
|
+
| COPY INTO 读取多格式文件失败 | Volume 中有混合格式文件 | 使用 `FILES('xxx.csv')` 指定文件或 `SUBDIRECTORY` 指定子目录 |
|
|
272
|
+
| PUT 命令失败 | 本地路径不存在 | 确认本地文件路径正确 |
|
|
273
|
+
| COPY INTO 报错 | 权限不足 | 检查 STORAGE CONNECTION 的访问密钥权限 |
|
|
274
|
+
| `@volume` 语法报错 | ClickZetta 不支持 | 使用 `FROM VOLUME name USING format` 完整语法 |
|
|
275
|
+
| `data:key` 语法报错 | Snowflake JSON 语法不适用 | 使用 `data['key']` 语法访问 JSON 嵌套字段 |
|
|
276
|
+
| `METADATA$FILENAME` 报错 | ClickZetta 不支持此元数据字段 | 使用字符串字面量或在 INSERT 时手动添加文件路径列 |
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## Snowflake 迁移对照
|
|
281
|
+
|
|
282
|
+
| Snowflake 语法 | ClickZetta 等价语法 | 说明 |
|
|
283
|
+
|---|---|---|
|
|
284
|
+
| `@my_stage` | `VOLUME my_volume` | Stage → Volume |
|
|
285
|
+
| `SELECT * FROM @stage/path` | `SELECT * FROM VOLUME vol USING CSV SUBDIRECTORY 'path/'` | 必须指定 USING 格式 |
|
|
286
|
+
| `data:key::STRING` | `data['key']` | JSON 字段访问 |
|
|
287
|
+
| `data:nested.key` | `data['nested']['key']` | 嵌套 JSON 访问 |
|
|
288
|
+
| `METADATA$FILENAME` | 不支持 | 需手动添加文件路径列 |
|
|
289
|
+
| `METADATA$FILE_ROW_NUMBER` | 不支持 | 无等价功能 |
|
|
290
|
+
| `FILE_FORMAT = (TYPE = CSV)` | `USING CSV OPTIONS(...)` | 导入时用 USING,导出时用 FILE_FORMAT |
|
|
291
|
+
| `COPY INTO table FROM @stage` | `COPY INTO table FROM VOLUME vol USING format` | 导入语法 |
|
|
292
|
+
| `COPY INTO @stage FROM table` | `COPY INTO VOLUME vol SUBDIRECTORY '/' FROM TABLE t FILE_FORMAT=(...)` | 导出语法 |
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Volume 管理参考
|
|
2
|
+
|
|
3
|
+
> 来源:https://www.yunqi.tech/documents/datalake_volume_object 等
|
|
4
|
+
|
|
5
|
+
## Volume 类型
|
|
6
|
+
|
|
7
|
+
| 类型 | 说明 |
|
|
8
|
+
|---|---|
|
|
9
|
+
| 外部 Volume(External Volume) | 挂载 OSS/COS/S3 等对象存储路径 |
|
|
10
|
+
| 内部 Volume(Internal Volume) | 系统托管存储,含 User Volume、Table Volume、命名 Volume |
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## CREATE EXTERNAL VOLUME
|
|
15
|
+
|
|
16
|
+
```sql
|
|
17
|
+
-- OSS(Connection 必须使用小写 access_id/access_key)
|
|
18
|
+
CREATE EXTERNAL VOLUME my_oss_volume
|
|
19
|
+
LOCATION 'oss://<bucket>/<path>'
|
|
20
|
+
USING CONNECTION my_oss_conn
|
|
21
|
+
DIRECTORY = (ENABLE = TRUE, AUTO_REFRESH = TRUE)
|
|
22
|
+
RECURSIVE = TRUE;
|
|
23
|
+
|
|
24
|
+
-- COS
|
|
25
|
+
CREATE EXTERNAL VOLUME my_cos_volume
|
|
26
|
+
LOCATION 'cos://<bucket>/<path>'
|
|
27
|
+
USING CONNECTION my_cos_conn
|
|
28
|
+
DIRECTORY = (ENABLE = TRUE)
|
|
29
|
+
RECURSIVE = TRUE;
|
|
30
|
+
|
|
31
|
+
-- S3
|
|
32
|
+
CREATE EXTERNAL VOLUME my_s3_volume
|
|
33
|
+
LOCATION 's3://<bucket>/<path>'
|
|
34
|
+
USING CONNECTION my_s3_conn
|
|
35
|
+
DIRECTORY = (ENABLE = TRUE)
|
|
36
|
+
RECURSIVE = TRUE;
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
参数说明:
|
|
40
|
+
- `LOCATION`:对象存储路径
|
|
41
|
+
- `USING CONNECTION`:已创建的 STORAGE CONNECTION 名称
|
|
42
|
+
- `DIRECTORY`:目录功能配置,`ENABLE=TRUE` 开启目录索引,`AUTO_REFRESH=TRUE` 自动刷新
|
|
43
|
+
- `RECURSIVE`:是否递归扫描子目录
|
|
44
|
+
|
|
45
|
+
> ⚠️ 上传新文件后如果 `SHOW VOLUME DIRECTORY` 未显示,执行 `ALTER VOLUME name REFRESH` 手动刷新。
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## ALTER VOLUME
|
|
50
|
+
|
|
51
|
+
```sql
|
|
52
|
+
-- 刷新目录元数据
|
|
53
|
+
ALTER VOLUME my_oss_volume REFRESH;
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## DROP VOLUME
|
|
59
|
+
|
|
60
|
+
```sql
|
|
61
|
+
DROP VOLUME IF EXISTS my_oss_volume;
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## SHOW / DESC VOLUME
|
|
67
|
+
|
|
68
|
+
```sql
|
|
69
|
+
-- 列出所有 Volume
|
|
70
|
+
SHOW VOLUMES;
|
|
71
|
+
|
|
72
|
+
-- 按条件过滤(SHOW VOLUMES 不支持 WHERE,使用 information_schema)
|
|
73
|
+
SELECT volume_name, volume_type, volume_region, volume_creator
|
|
74
|
+
FROM information_schema.volumes
|
|
75
|
+
WHERE volume_type = 'EXTERNAL';
|
|
76
|
+
|
|
77
|
+
-- 按名称查找
|
|
78
|
+
SELECT * FROM information_schema.volumes
|
|
79
|
+
WHERE volume_name = 'my_oss_volume';
|
|
80
|
+
|
|
81
|
+
-- 查看 Volume 详情
|
|
82
|
+
DESC VOLUME my_oss_volume;
|
|
83
|
+
|
|
84
|
+
-- 查看 Volume 目录下的文件
|
|
85
|
+
SHOW VOLUME DIRECTORY my_oss_volume;
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## 查看目录元数据(DIRECTORY 函数)
|
|
91
|
+
|
|
92
|
+
```sql
|
|
93
|
+
-- 查看 Volume 目录元数据(需先 ALTER VOLUME REFRESH)
|
|
94
|
+
SELECT * FROM DIRECTORY(VOLUME my_oss_volume);
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## User Volume 操作
|
|
100
|
+
|
|
101
|
+
```sql
|
|
102
|
+
-- 查看 User Volume 文件列表
|
|
103
|
+
SHOW USER VOLUME DIRECTORY;
|
|
104
|
+
|
|
105
|
+
-- 上传文件到 User Volume 根目录
|
|
106
|
+
PUT '/local/path/file.csv' TO USER VOLUME;
|
|
107
|
+
|
|
108
|
+
-- 上传并指定目标路径
|
|
109
|
+
PUT '/local/path/file.csv' TO USER VOLUME FILE 'subdir/file.csv';
|
|
110
|
+
|
|
111
|
+
-- 通配符上传多个文件
|
|
112
|
+
PUT '/local/path/images/*' TO USER VOLUME SUBDIRECTORY 'images/';
|
|
113
|
+
|
|
114
|
+
-- 下载文件
|
|
115
|
+
GET USER VOLUME FILE 'subdir/file.csv' TO '/local/output/';
|
|
116
|
+
|
|
117
|
+
-- 删除文件
|
|
118
|
+
REMOVE USER VOLUME FILE 'subdir/file.csv';
|
|
119
|
+
|
|
120
|
+
-- 删除目录下所有文件
|
|
121
|
+
REMOVE USER VOLUME SUBDIRECTORY '/';
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## 从 Volume 查询数据(SELECT FROM VOLUME)
|
|
127
|
+
|
|
128
|
+
```sql
|
|
129
|
+
-- 查询 CSV 文件
|
|
130
|
+
SELECT * FROM VOLUME my_oss_volume
|
|
131
|
+
USING CSV
|
|
132
|
+
OPTIONS('header' = 'true', 'sep' = ',')
|
|
133
|
+
SUBDIRECTORY 'data/'
|
|
134
|
+
LIMIT 100;
|
|
135
|
+
|
|
136
|
+
-- 查询 Parquet 文件
|
|
137
|
+
SELECT * FROM VOLUME my_oss_volume
|
|
138
|
+
USING PARQUET
|
|
139
|
+
FILES('part-00001.parquet', 'part-00002.parquet');
|
|
140
|
+
|
|
141
|
+
-- 正则匹配文件
|
|
142
|
+
SELECT * FROM VOLUME my_oss_volume
|
|
143
|
+
USING PARQUET
|
|
144
|
+
REGEXP '.*2024-0[1-3].parquet';
|
|
145
|
+
|
|
146
|
+
-- 查询 User Volume 文件
|
|
147
|
+
SELECT * FROM USER VOLUME
|
|
148
|
+
USING CSV
|
|
149
|
+
OPTIONS('header' = 'true')
|
|
150
|
+
FILES('data.csv')
|
|
151
|
+
LIMIT 10;
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
支持格式:`CSV`、`PARQUET`、`ORC`、`JSON`、`BSON`
|
|
155
|
+
|
|
156
|
+
CSV OPTIONS 常用参数:
|
|
157
|
+
- `header`:是否有表头,默认 `false`
|
|
158
|
+
- `sep`:列分隔符,默认 `,`
|
|
159
|
+
- `compression`:压缩格式(gzip/zstd/zlib)
|
|
160
|
+
- `multiLine`:是否支持多行字段,默认 `false`
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## COPY INTO TABLE(从 Volume 导入)
|
|
165
|
+
|
|
166
|
+
```sql
|
|
167
|
+
COPY INTO my_table
|
|
168
|
+
FROM VOLUME my_oss_volume
|
|
169
|
+
USING CSV
|
|
170
|
+
OPTIONS('header' = 'true')
|
|
171
|
+
SUBDIRECTORY 'data/';
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## COPY INTO VOLUME(导出到 Volume)
|
|
175
|
+
|
|
176
|
+
```sql
|
|
177
|
+
-- 导出表到 External Volume
|
|
178
|
+
COPY INTO VOLUME my_oss_volume
|
|
179
|
+
SUBDIRECTORY 'export/'
|
|
180
|
+
FROM TABLE my_table
|
|
181
|
+
FILE_FORMAT = (TYPE = CSV);
|
|
182
|
+
|
|
183
|
+
-- 导出查询结果
|
|
184
|
+
COPY INTO VOLUME my_oss_volume
|
|
185
|
+
SUBDIRECTORY 'export/'
|
|
186
|
+
FROM (SELECT * FROM orders WHERE year = 2024)
|
|
187
|
+
FILE_FORMAT = (TYPE = PARQUET COMPRESSION = 'GZIP');
|
|
188
|
+
|
|
189
|
+
-- 导出到 User Volume
|
|
190
|
+
COPY INTO USER VOLUME
|
|
191
|
+
SUBDIRECTORY 'export/'
|
|
192
|
+
FROM TABLE my_table
|
|
193
|
+
FILE_FORMAT = (TYPE = CSV);
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
> ⚠️ **关键区分**:
|
|
197
|
+
> - **导入**(COPY INTO TABLE / SELECT FROM VOLUME):用 `USING CSV/PARQUET/JSON` + `OPTIONS(...)`
|
|
198
|
+
> - **导出**(COPY INTO VOLUME):用 `FILE_FORMAT = (TYPE = CSV/PARQUET/JSON)`
|
|
199
|
+
> - 两者语法不可混用!
|
package/package.json
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/package/bin/skills/{dt-creator → clickzetta-dynamic-table/dt-creator}/references/sql-limitations.md
RENAMED
|
File without changes
|
/package/bin/skills/{dynamic-table-alter → clickzetta-dynamic-table/dynamic-table-alter}/SKILL.md
RENAMED
|
File without changes
|