@clickzetta/cz-cli-linux-x64 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
- package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
- package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
- package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
- package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
- package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
- package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
- package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -457
- package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
- package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
- package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
- package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
- package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
- package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
- package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
- package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
- package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
- package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
- package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
- package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -112
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
- package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
- package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -156
- package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
- package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
- package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
- package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
- package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
- package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
- package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
- package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
- package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
- package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -639
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
- package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
- package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
- package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
- package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
- package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
- package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
- package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
- package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
- package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
- package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
- package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
- package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -427
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
- package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -379
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
- package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
- package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
- package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
# External Catalog 参考
|
|
2
|
-
|
|
3
|
-
> 来源:https://www.yunqi.tech/documents/external-catalog-summary 等
|
|
4
|
-
|
|
5
|
-
> ⚠️ External Catalog 当前处于公开预览阶段。目前只有 instance admin 角色可以查询 Catalog。
|
|
6
|
-
|
|
7
|
-
## 概述
|
|
8
|
-
|
|
9
|
-
External Catalog 映射外部数据系统(Hive、Iceberg、Databricks)的数据库,使 Lakehouse 可对其执行**只读**联邦查询。
|
|
10
|
-
|
|
11
|
-
**支持的数据源**:
|
|
12
|
-
- Apache Hive(通过 Hive Metastore)
|
|
13
|
-
- Iceberg REST Catalog(如 Snowflake OpenCatalog)
|
|
14
|
-
- Databricks Unity Catalog
|
|
15
|
-
|
|
16
|
-
---
|
|
17
|
-
|
|
18
|
-
## 创建流程(以 Hive 为例)
|
|
19
|
-
|
|
20
|
-
### 步骤 1:创建存储连接
|
|
21
|
-
|
|
22
|
-
```sql
|
|
23
|
-
-- OSS
|
|
24
|
-
CREATE STORAGE CONNECTION IF NOT EXISTS catalog_storage_oss
|
|
25
|
-
TYPE OSS
|
|
26
|
-
ACCESS_ID = 'LTAIxxxxxxxxxxxx'
|
|
27
|
-
ACCESS_KEY = 'T8Gexxxxxxmtxxxxxx'
|
|
28
|
-
ENDPOINT = 'oss-cn-hangzhou-internal.aliyuncs.com';
|
|
29
|
-
|
|
30
|
-
-- COS
|
|
31
|
-
CREATE STORAGE CONNECTION IF NOT EXISTS catalog_storage_cos
|
|
32
|
-
TYPE COS
|
|
33
|
-
ACCESS_KEY = '<access_key>'
|
|
34
|
-
SECRET_KEY = '<secret_key>'
|
|
35
|
-
REGION = 'ap-shanghai'
|
|
36
|
-
APP_ID = '1310000503';
|
|
37
|
-
|
|
38
|
-
-- S3
|
|
39
|
-
CREATE STORAGE CONNECTION IF NOT EXISTS catalog_storage_s3
|
|
40
|
-
TYPE S3
|
|
41
|
-
ACCESS_KEY = '<access_key>'
|
|
42
|
-
SECRET_KEY = '<secret_key>'
|
|
43
|
-
REGION = 'us-east-1';
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
### 步骤 2:创建 Catalog Connection
|
|
47
|
-
|
|
48
|
-
```sql
|
|
49
|
-
-- Hive Metastore
|
|
50
|
-
CREATE CATALOG CONNECTION IF NOT EXISTS catalog_api_connection
|
|
51
|
-
TYPE hms
|
|
52
|
-
hive_metastore_uris = 'host:9083'
|
|
53
|
-
storage_connection = 'catalog_storage_oss';
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
参数说明:
|
|
57
|
-
- `type`:连接类型,目前支持 `hms`(Hive Metastore Service)
|
|
58
|
-
- `hive_metastore_uris`:HMS 服务地址,格式 `host:port`,端口通常为 9083
|
|
59
|
-
- `storage_connection`:已创建的存储连接名称
|
|
60
|
-
|
|
61
|
-
### 步骤 3:创建 External Catalog
|
|
62
|
-
|
|
63
|
-
```sql
|
|
64
|
-
CREATE EXTERNAL CATALOG my_external_catalog
|
|
65
|
-
CONNECTION catalog_api_connection;
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
---
|
|
69
|
-
|
|
70
|
-
## 查看 Catalog
|
|
71
|
-
|
|
72
|
-
```sql
|
|
73
|
-
-- 列出所有 Catalog
|
|
74
|
-
SHOW CATALOGS;
|
|
75
|
-
|
|
76
|
-
-- 查看 Catalog 详情
|
|
77
|
-
DESC CATALOG my_external_catalog;
|
|
78
|
-
DESC CATALOG EXTENDED my_external_catalog;
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
---
|
|
82
|
-
|
|
83
|
-
## 查看 Catalog 下的对象
|
|
84
|
-
|
|
85
|
-
```sql
|
|
86
|
-
-- 查看 Schema 列表
|
|
87
|
-
SHOW SCHEMAS IN my_external_catalog;
|
|
88
|
-
|
|
89
|
-
-- 查看 Schema 列表(含类型:managed/external)
|
|
90
|
-
SHOW SCHEMAS EXTENDED IN my_external_catalog;
|
|
91
|
-
|
|
92
|
-
-- 查看表列表
|
|
93
|
-
SHOW TABLES IN my_external_catalog.my_schema;
|
|
94
|
-
|
|
95
|
-
-- 查看表结构
|
|
96
|
-
DESC TABLE my_external_catalog.my_schema.my_table;
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
---
|
|
100
|
-
|
|
101
|
-
## 查询外部数据
|
|
102
|
-
|
|
103
|
-
```sql
|
|
104
|
-
-- 三层命名空间语法(必须)
|
|
105
|
-
SELECT * FROM my_external_catalog.my_schema.my_table;
|
|
106
|
-
|
|
107
|
-
-- 联邦查询(外部表 JOIN 内部表)
|
|
108
|
-
SELECT e.*, i.region
|
|
109
|
-
FROM my_external_catalog.hive_schema.orders e
|
|
110
|
-
JOIN public.dim_region i ON e.region_id = i.id;
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
⚠️ 查询 External Catalog 下的表**必须**使用三层结构语法(catalog.schema.table),不支持 `USE` 切换 catalog。
|
|
114
|
-
|
|
115
|
-
---
|
|
116
|
-
|
|
117
|
-
## 删除 Catalog
|
|
118
|
-
|
|
119
|
-
```sql
|
|
120
|
-
DROP CATALOG IF EXISTS my_external_catalog;
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
---
|
|
124
|
-
|
|
125
|
-
## 注意事项
|
|
126
|
-
|
|
127
|
-
- External Catalog 为**只读**,不支持写入操作
|
|
128
|
-
- HMS 所在服务器网络需与 Lakehouse 打通(可通过 PrivateLink 实现)
|
|
129
|
-
- 目前只有 `instance_admin` 角色可以创建和查询 External Catalog
|
|
130
|
-
- Databricks Unity Catalog 要求与 Lakehouse 在同一云平台(如同在 AWS 上)
|
|
@@ -1,203 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: clickzetta-external-function
|
|
3
|
-
description: |
|
|
4
|
-
在 ClickZetta Lakehouse 中创建和使用外部函数(External Function / UDF),
|
|
5
|
-
通过 Python 或 Java 扩展 SQL 计算能力,调用 LLM、图像识别、自定义算法等外部服务。
|
|
6
|
-
覆盖 CREATE API CONNECTION(阿里云FC/腾讯云SCF/AWS Lambda)、
|
|
7
|
-
CREATE EXTERNAL FUNCTION、Python UDF 代码结构与打包、
|
|
8
|
-
内置 AI_COMPLETE 和 AI_EMBEDDING 函数的使用。
|
|
9
|
-
当用户说"外部函数"、"UDF"、"自定义函数"、"External Function"、
|
|
10
|
-
"Remote Function"、"调用 LLM"、"AI_COMPLETE"、"AI_EMBEDDING"、
|
|
11
|
-
"文本向量化"、"调用阿里云函数计算"、"调用云函数"、"Python UDF"、
|
|
12
|
-
"Java UDF"、"CREATE EXTERNAL FUNCTION"时触发。
|
|
13
|
-
Keywords: external function, UDF, Python UDF, Java UDF, LLM, custom function
|
|
14
|
-
---
|
|
15
|
-
|
|
16
|
-
# ClickZetta External Function
|
|
17
|
-
|
|
18
|
-
External Function 让 SQL 可以调用外部计算能力(LLM、图像识别、自定义算法),通过 Python/Java 编写函数逻辑,部署在云函数服务上执行。
|
|
19
|
-
|
|
20
|
-
阅读 [references/external-function-ddl.md](references/external-function-ddl.md) 了解完整语法。
|
|
21
|
-
|
|
22
|
-
---
|
|
23
|
-
|
|
24
|
-
## 两种使用路径
|
|
25
|
-
|
|
26
|
-
| 路径 | 适用场景 | 复杂度 |
|
|
27
|
-
|---|---|---|
|
|
28
|
-
| **内置 AI 函数**(AI_COMPLETE / AI_EMBEDDING) | 调用 LLM 生成文本、文本向量化 | 低,只需创建 API Connection |
|
|
29
|
-
| **External Function** | 自定义算法、图像处理、私有模型 | 高,需部署云函数 |
|
|
30
|
-
|
|
31
|
-
---
|
|
32
|
-
|
|
33
|
-
## 路径一:内置 AI 函数(推荐)
|
|
34
|
-
|
|
35
|
-
### 1. 创建 AI API Connection
|
|
36
|
-
|
|
37
|
-
```sql
|
|
38
|
-
CREATE API CONNECTION conn_bailian
|
|
39
|
-
TYPE ai_function
|
|
40
|
-
PROVIDER = 'bailian'
|
|
41
|
-
BASE_URL = 'https://dashscope.aliyuncs.com/api/v1'
|
|
42
|
-
API_KEY = 'sk-xxxxxxxxxxxxxxxxxxxxxxxx';
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
### 2. AI_COMPLETE — 调用 LLM
|
|
46
|
-
|
|
47
|
-
```sql
|
|
48
|
-
-- 文本摘要
|
|
49
|
-
SELECT id,
|
|
50
|
-
AI_COMPLETE('connection:conn_bailian', '请用一句话总结:' || content) AS summary
|
|
51
|
-
FROM articles;
|
|
52
|
-
|
|
53
|
-
-- 情感分析
|
|
54
|
-
SELECT id, review,
|
|
55
|
-
AI_COMPLETE('connection:conn_bailian',
|
|
56
|
-
'判断以下评论的情感(正面/负面/中性),只返回一个词:' || review) AS sentiment
|
|
57
|
-
FROM user_reviews;
|
|
58
|
-
|
|
59
|
-
-- 通过平台 Endpoint(管理员预配置)
|
|
60
|
-
SELECT AI_COMPLETE('endpoint:my_llm_endpoint', prompt_col) AS result
|
|
61
|
-
FROM my_table;
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
### 3. AI_EMBEDDING — 文本向量化
|
|
65
|
-
|
|
66
|
-
```sql
|
|
67
|
-
-- 批量生成 embedding
|
|
68
|
-
SELECT id, content,
|
|
69
|
-
AI_EMBEDDING('connection:conn_bailian', content) AS vec
|
|
70
|
-
FROM documents;
|
|
71
|
-
|
|
72
|
-
-- 语义搜索(结合向量索引)
|
|
73
|
-
SELECT id, content,
|
|
74
|
-
cosine_distance(vec, AI_EMBEDDING('connection:conn_bailian', '用户查询')) AS dist
|
|
75
|
-
FROM doc_embeddings
|
|
76
|
-
ORDER BY dist
|
|
77
|
-
LIMIT 10;
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
---
|
|
81
|
-
|
|
82
|
-
## 路径二:External Function(自定义 UDF)
|
|
83
|
-
|
|
84
|
-
### 整体流程
|
|
85
|
-
|
|
86
|
-
```
|
|
87
|
-
1. 开通云函数服务(阿里云FC / 腾讯云SCF / AWS Lambda)
|
|
88
|
-
2. 编写 Python/Java 函数代码
|
|
89
|
-
3. 打包上传到对象存储或 User Volume
|
|
90
|
-
4. 授权 Lakehouse 访问云函数服务(RAM 角色)
|
|
91
|
-
5. CREATE API CONNECTION
|
|
92
|
-
6. CREATE EXTERNAL FUNCTION
|
|
93
|
-
7. 在 SQL 中调用
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
### 步骤 1:创建云函数 API Connection
|
|
97
|
-
|
|
98
|
-
```sql
|
|
99
|
-
-- 阿里云 FC
|
|
100
|
-
CREATE API CONNECTION IF NOT EXISTS my_fc_conn
|
|
101
|
-
TYPE CLOUD_FUNCTION
|
|
102
|
-
PROVIDER = 'aliyun'
|
|
103
|
-
REGION = 'cn-shanghai'
|
|
104
|
-
ROLE_ARN = 'acs:ram::1234567890:role/CzUDFRole'
|
|
105
|
-
NAMESPACE = 'default'
|
|
106
|
-
CODE_BUCKET = 'my-oss-bucket';
|
|
107
|
-
|
|
108
|
-
-- 腾讯云 SCF
|
|
109
|
-
CREATE API CONNECTION IF NOT EXISTS my_scf_conn
|
|
110
|
-
TYPE CLOUD_FUNCTION
|
|
111
|
-
PROVIDER = 'tencent'
|
|
112
|
-
REGION = 'ap-shanghai'
|
|
113
|
-
ROLE_ARN = 'qcs::cam::uin/1234567890:roleName/CzUDFRole'
|
|
114
|
-
NAMESPACE = 'default'
|
|
115
|
-
CODE_BUCKET = 'my-cos-bucket';
|
|
116
|
-
```
|
|
117
|
-
|
|
118
|
-
### 步骤 2:编写 Python UDF
|
|
119
|
-
|
|
120
|
-
```python
|
|
121
|
-
# upper.py
|
|
122
|
-
try:
|
|
123
|
-
from cz.udf import annotate
|
|
124
|
-
except ImportError:
|
|
125
|
-
annotate = lambda _: lambda _: _
|
|
126
|
-
|
|
127
|
-
@annotate("string->string")
|
|
128
|
-
class Upper(object):
|
|
129
|
-
def evaluate(self, arg):
|
|
130
|
-
if arg is None:
|
|
131
|
-
return None
|
|
132
|
-
return arg.upper()
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
打包上传:
|
|
136
|
-
```bash
|
|
137
|
-
zip -rq upper.zip upper.py
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
```sql
|
|
141
|
-
-- 上传到 User Volume(在 ClickZetta Studio 或 CLI 中执行,source_path 使用绝对路径)
|
|
142
|
-
PUT '/path/to/upper.zip' TO USER VOLUME;
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
### 步骤 3:创建 External Function
|
|
146
|
-
|
|
147
|
-
```sql
|
|
148
|
-
-- ⚠️ CREATE EXTERNAL FUNCTION 不支持 OR REPLACE,只支持 IF NOT EXISTS
|
|
149
|
-
-- ❌ 错误:CREATE OR REPLACE EXTERNAL FUNCTION ...
|
|
150
|
-
-- ✅ 正确:
|
|
151
|
-
-- 使用 User Volume 存放代码(无需 OSS)
|
|
152
|
-
CREATE EXTERNAL FUNCTION IF NOT EXISTS public.str_upper
|
|
153
|
-
AS 'upper.Upper'
|
|
154
|
-
USING FILE = 'volume:user://~/upper.zip'
|
|
155
|
-
CONNECTION = my_fc_conn
|
|
156
|
-
WITH PROPERTIES ('remote.udf.api' = 'python3.mc.v0')
|
|
157
|
-
COMMENT '字符串转大写';
|
|
158
|
-
|
|
159
|
-
-- 使用 OSS 存放代码
|
|
160
|
-
CREATE EXTERNAL FUNCTION IF NOT EXISTS public.str_upper
|
|
161
|
-
AS 'upper.Upper'
|
|
162
|
-
USING FILE = 'oss://my-bucket/functions/upper.zip'
|
|
163
|
-
CONNECTION = my_fc_conn
|
|
164
|
-
WITH PROPERTIES ('remote.udf.api' = 'python3.mc.v0');
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
### 步骤 4:调用函数
|
|
168
|
-
|
|
169
|
-
```sql
|
|
170
|
-
-- ⚠️ 调用外部函数必须使用完整 Schema 路径,不能省略 schema
|
|
171
|
-
-- ❌ 错误:SELECT str_upper(name) FROM my_table;
|
|
172
|
-
-- ✅ 正确:
|
|
173
|
-
SELECT id, public.str_upper(name) AS upper_name FROM my_table;
|
|
174
|
-
```
|
|
175
|
-
|
|
176
|
-
---
|
|
177
|
-
|
|
178
|
-
## 管理操作
|
|
179
|
-
|
|
180
|
-
```sql
|
|
181
|
-
-- 查看所有外部函数
|
|
182
|
-
SHOW EXTERNAL FUNCTIONS;
|
|
183
|
-
SHOW EXTERNAL FUNCTIONS LIKE 'str_%';
|
|
184
|
-
|
|
185
|
-
-- 删除函数(注意:用 DROP FUNCTION,不是 DROP EXTERNAL FUNCTION)
|
|
186
|
-
DROP FUNCTION IF EXISTS public.str_upper;
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
> ⚠️ **注意**:`CREATE FUNCTION`(SQL 内联函数)只支持 SQL 表达式,不支持 Python/JavaScript 等编程语言。需要编程语言逻辑请使用 `CREATE EXTERNAL FUNCTION`。
|
|
190
|
-
|
|
191
|
-
---
|
|
192
|
-
|
|
193
|
-
## 常见问题
|
|
194
|
-
|
|
195
|
-
| 问题 | 原因 | 解决方案 |
|
|
196
|
-
|---|---|---|
|
|
197
|
-
| 函数调用超时 | 云函数冷启动或执行慢 | 增大超时配置,或预热函数 |
|
|
198
|
-
| 依赖库 ABI 不兼容 | 在 macOS/Windows 打包 | 用 `quay.io/pypa/manylinux2014_x86_64` 容器打包 |
|
|
199
|
-
| 代码包 > 500MB | 依赖过大 | 改用容器镜像方式部署 |
|
|
200
|
-
| AI_COMPLETE 报错 | API Key 无效或余额不足 | 检查 API Connection 的 API_KEY |
|
|
201
|
-
| ROLE_ARN 权限不足 | RAM 角色未授权 | 参考文档配置 AliyunFCFullAccess + OSS 权限 |
|
|
202
|
-
| 函数调用报"not found" | 省略了 Schema 前缀 | 必须用完整路径:`schema.function_name(...)` |
|
|
203
|
-
| CREATE OR REPLACE 报错 | EXTERNAL FUNCTION 不支持 OR REPLACE | 改用 `CREATE EXTERNAL FUNCTION IF NOT EXISTS` |
|
|
@@ -1,171 +0,0 @@
|
|
|
1
|
-
# External Function DDL 参考
|
|
2
|
-
|
|
3
|
-
> 来源:https://www.yunqi.tech/documents/CREATE_EXTERNATL_FUNCTION 等
|
|
4
|
-
|
|
5
|
-
## 概念
|
|
6
|
-
|
|
7
|
-
External Function(外部函数)是通过 Python/Java 编写、在云函数服务(阿里云 FC / 腾讯云 SCF / AWS Lambda)上执行的自定义 UDF。可调用:
|
|
8
|
-
- **在线服务**:LLM API、图像识别 API 等
|
|
9
|
-
- **离线模型**:打包上传的 Hugging Face 模型等
|
|
10
|
-
|
|
11
|
-
支持函数类型:UDF(标量)、UDAF(聚合,仅 Java)、UDTF(表函数,仅 Java)
|
|
12
|
-
|
|
13
|
-
---
|
|
14
|
-
|
|
15
|
-
## CREATE API CONNECTION(云函数连接)
|
|
16
|
-
|
|
17
|
-
```sql
|
|
18
|
-
CREATE API CONNECTION IF NOT EXISTS my_fc_conn
|
|
19
|
-
TYPE CLOUD_FUNCTION
|
|
20
|
-
PROVIDER = 'aliyun' -- 'aliyun' | 'tencent' | 'aws'
|
|
21
|
-
REGION = 'cn-shanghai'
|
|
22
|
-
ROLE_ARN = 'acs:ram::1234567890:role/CzUDFRole'
|
|
23
|
-
NAMESPACE = 'default' -- 腾讯云必填,其他填 'default'
|
|
24
|
-
CODE_BUCKET = 'my-oss-bucket';
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
| 参数 | 说明 |
|
|
28
|
-
|---|---|
|
|
29
|
-
| PROVIDER | `'aliyun'` / `'tencent'` / `'aws'` |
|
|
30
|
-
| REGION | 阿里云:`cn-shanghai`;腾讯云:`ap-beijing`;AWS:`cn-northwest-1` |
|
|
31
|
-
| ROLE_ARN | 授权给 Lakehouse 的 RAM 角色 ARN |
|
|
32
|
-
| NAMESPACE | 腾讯云命名空间(必填);其他填 `'default'` |
|
|
33
|
-
| CODE_BUCKET | 存放函数代码包的 OSS/COS/S3 bucket 名称 |
|
|
34
|
-
|
|
35
|
-
---
|
|
36
|
-
|
|
37
|
-
## CREATE EXTERNAL FUNCTION
|
|
38
|
-
|
|
39
|
-
```sql
|
|
40
|
-
CREATE EXTERNAL FUNCTION IF NOT EXISTS my_schema.my_udf
|
|
41
|
-
AS 'module_name.ClassName'
|
|
42
|
-
USING FILE = 'oss://my-bucket/functions/code.zip'
|
|
43
|
-
CONNECTION = my_fc_conn
|
|
44
|
-
WITH PROPERTIES (
|
|
45
|
-
'remote.udf.api' = 'python3.mc.v0' -- Python: python3.mc.v0 | Java: java8.hive2.v0
|
|
46
|
-
)
|
|
47
|
-
COMMENT '自定义函数说明';
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### 资源文件地址格式
|
|
51
|
-
|
|
52
|
-
```
|
|
53
|
-
-- OSS/COS/S3
|
|
54
|
-
oss://bucket-name/path/to/code.zip
|
|
55
|
-
cos://bucket-name/path/to/code.zip
|
|
56
|
-
s3://bucket-name/path/to/code.zip
|
|
57
|
-
|
|
58
|
-
-- User Volume(无需开通对象存储)
|
|
59
|
-
volume:user://~/code.zip
|
|
60
|
-
|
|
61
|
-
-- External Volume
|
|
62
|
-
volume://workspace.schema.volume_name/code.zip
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
### WITH PROPERTIES 参数
|
|
66
|
-
|
|
67
|
-
| 参数 | 值 | 说明 |
|
|
68
|
-
|---|---|---|
|
|
69
|
-
| `remote.udf.api` | `python3.mc.v0` | Python 3.10 运行时 |
|
|
70
|
-
| `remote.udf.api` | `java8.hive2.v0` | Java 8 Hive 风格 UDF |
|
|
71
|
-
| `remote.udf.protocol` | `http.arrow.v0` | 默认,访问云函数的协议 |
|
|
72
|
-
|
|
73
|
-
---
|
|
74
|
-
|
|
75
|
-
## Python UDF 代码结构
|
|
76
|
-
|
|
77
|
-
```python
|
|
78
|
-
#!/usr/bin/env python
|
|
79
|
-
try:
|
|
80
|
-
from cz.udf import annotate
|
|
81
|
-
except ImportError:
|
|
82
|
-
annotate = lambda _: lambda _: _
|
|
83
|
-
|
|
84
|
-
@annotate("string->string") # 函数签名:输入类型->返回类型
|
|
85
|
-
class Upper(object):
|
|
86
|
-
def evaluate(self, arg):
|
|
87
|
-
if arg is None:
|
|
88
|
-
return None
|
|
89
|
-
return arg.upper()
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### 函数签名格式
|
|
93
|
-
|
|
94
|
-
```
|
|
95
|
-
"input_type1,input_type2->return_type"
|
|
96
|
-
|
|
97
|
-
# 示例
|
|
98
|
-
"string->string" # 字符串转字符串
|
|
99
|
-
"string,int->double" # 两个输入,返回 double
|
|
100
|
-
"string->array<string>" # 返回数组
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
支持类型:`string`、`int`、`bigint`、`double`、`float`、`boolean`、`array<T>`、`map<K,V>`
|
|
104
|
-
|
|
105
|
-
### 打包上传
|
|
106
|
-
|
|
107
|
-
```bash
|
|
108
|
-
# 安装依赖到当前目录
|
|
109
|
-
pip3 install httpx pydantic -t .
|
|
110
|
-
|
|
111
|
-
# 打包(< 500MB)
|
|
112
|
-
zip -rq code.zip ./*
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
```sql
|
|
116
|
-
-- 上传到 User Volume(在 ClickZetta Studio 或 CLI 中执行,source_path 使用绝对路径)
|
|
117
|
-
PUT '/path/to/code.zip' TO USER VOLUME;
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
---
|
|
121
|
-
|
|
122
|
-
## 管理操作
|
|
123
|
-
|
|
124
|
-
```sql
|
|
125
|
-
-- 查看外部函数列表
|
|
126
|
-
SHOW EXTERNAL FUNCTIONS;
|
|
127
|
-
SHOW EXTERNAL FUNCTIONS LIKE 'my_%';
|
|
128
|
-
|
|
129
|
-
-- 删除外部函数
|
|
130
|
-
DROP FUNCTION IF EXISTS my_schema.my_udf;
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
---
|
|
134
|
-
|
|
135
|
-
## 内置 AI 函数(无需部署云函数)
|
|
136
|
-
|
|
137
|
-
### AI_COMPLETE(调用 LLM)
|
|
138
|
-
|
|
139
|
-
```sql
|
|
140
|
-
-- 通过 API Connection 调用(需先创建连接)
|
|
141
|
-
CREATE API CONNECTION conn_bailian
|
|
142
|
-
TYPE ai_function
|
|
143
|
-
PROVIDER = 'bailian'
|
|
144
|
-
BASE_URL = 'https://dashscope.aliyuncs.com/api/v1'
|
|
145
|
-
API_KEY = 'sk-xxxxxxxxxxxxxxxxxxxxxxxx';
|
|
146
|
-
|
|
147
|
-
-- 调用 LLM 生成文本
|
|
148
|
-
SELECT AI_COMPLETE('connection:conn_bailian', '请用一句话总结:' || content) AS summary
|
|
149
|
-
FROM articles
|
|
150
|
-
LIMIT 10;
|
|
151
|
-
|
|
152
|
-
-- 通过平台 Endpoint 调用(管理员预配置)
|
|
153
|
-
SELECT AI_COMPLETE('endpoint:my_llm_endpoint', prompt_col) AS result
|
|
154
|
-
FROM my_table;
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
### AI_EMBEDDING(文本向量化)
|
|
158
|
-
|
|
159
|
-
```sql
|
|
160
|
-
-- 将文本转为向量(用于语义搜索)
|
|
161
|
-
SELECT id, content,
|
|
162
|
-
AI_EMBEDDING('connection:conn_bailian', content) AS embedding
|
|
163
|
-
FROM documents;
|
|
164
|
-
|
|
165
|
-
-- 结合向量索引做语义搜索
|
|
166
|
-
SELECT id, content,
|
|
167
|
-
cosine_distance(embedding, AI_EMBEDDING('connection:conn_bailian', '查询文本')) AS dist
|
|
168
|
-
FROM doc_embeddings
|
|
169
|
-
ORDER BY dist
|
|
170
|
-
LIMIT 10;
|
|
171
|
-
```
|
|
@@ -1,156 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: clickzetta-file-import-pipeline
|
|
3
|
-
description: |
|
|
4
|
-
从 URL、本地文件或 Volume 路径将数据导入到 ClickZetta 表中,覆盖文件下载、格式推断、
|
|
5
|
-
表创建、COPY INTO 导入、结果验证的完整流程。当用户说"导入数据"、"从 URL 加载"、
|
|
6
|
-
"上传 CSV 到表"、"文件导入"、"COPY INTO"时触发。包含 ClickZetta USER VOLUME 机制、
|
|
7
|
-
COPY INTO 语法、格式推断规则、写入模式语义等平台特有知识。
|
|
8
|
-
Keywords: file import, URL, CSV, JSON, Parquet, COPY INTO, Volume
|
|
9
|
-
---
|
|
10
|
-
|
|
11
|
-
# URL/文件数据导入工作流
|
|
12
|
-
|
|
13
|
-
## 指令
|
|
14
|
-
|
|
15
|
-
### 步骤 1:获取源文件并上传到 Volume
|
|
16
|
-
根据数据来源选择对应方式:
|
|
17
|
-
- **HTTP/HTTPS URL**:需要先用外部工具下载到本地,然后用 `PUT` 命令上传到 User Volume
|
|
18
|
-
- **本地文件**:执行 SQL `PUT '/local/path/file.csv' TO USER VOLUME` 上传
|
|
19
|
-
- **Volume 路径**:文件已在 Volume 上,跳过此步骤
|
|
20
|
-
- **外部 Volume(OSS/S3/COS)**:文件已在外部 Volume,直接使用
|
|
21
|
-
- 记录上传后的 Volume 名称和文件名,后续步骤需要
|
|
22
|
-
|
|
23
|
-
> ⚠️ **注意**:文件上传操作参考 `clickzetta-volume-manager` skill。
|
|
24
|
-
|
|
25
|
-
### 步骤 2:推断文件格式
|
|
26
|
-
根据文件扩展名推断格式(ClickZetta COPY INTO 支持的格式):
|
|
27
|
-
- `.csv`, `.tsv`, `.txt` → CSV 格式
|
|
28
|
-
- `.json`, `.jsonl`, `.ndjson` → JSON 格式
|
|
29
|
-
- `.parquet`, `.pq` → PARQUET 格式
|
|
30
|
-
- `.orc` → ORC 格式
|
|
31
|
-
- `.bson` → BSON 格式
|
|
32
|
-
如果扩展名不明确,执行 `SELECT FROM VOLUME ... USING format` 预览文件内容来确认格式和 schema。
|
|
33
|
-
|
|
34
|
-
### 步骤 3:确认或创建目标表
|
|
35
|
-
根据写入模式处理目标表:
|
|
36
|
-
- **create 模式**:表必须不存在。执行 `SELECT FROM VOLUME ... LIMIT 5` 推断 schema,然后执行 `CREATE TABLE` 创建表
|
|
37
|
-
- **append 模式**:表必须已存在。用 `DESC TABLE <table_name>` 确认表存在并检查列兼容性
|
|
38
|
-
- **overwrite 模式**:表存在则先清空。执行 `TRUNCATE TABLE table_name`,再执行 COPY INTO(⚠️ 不支持 `COPY OVERWRITE INTO` 语法)
|
|
39
|
-
|
|
40
|
-
### 步骤 4:执行 COPY INTO 导入数据
|
|
41
|
-
执行 COPY INTO 语句。核心语法:
|
|
42
|
-
|
|
43
|
-
```sql
|
|
44
|
-
COPY INTO target_table
|
|
45
|
-
FROM VOLUME volume_name
|
|
46
|
-
USING format_type
|
|
47
|
-
OPTIONS('option_name' = 'value')
|
|
48
|
-
FILES('filename');
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
对于 USER VOLUME(通过 PUT 命令上传的文件):
|
|
52
|
-
```sql
|
|
53
|
-
COPY INTO target_table
|
|
54
|
-
FROM USER VOLUME
|
|
55
|
-
USING CSV
|
|
56
|
-
OPTIONS('header' = 'true')
|
|
57
|
-
FILES('uploaded_filename');
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
CSV 格式可附加 OPTIONS:
|
|
61
|
-
```sql
|
|
62
|
-
COPY INTO target_table
|
|
63
|
-
FROM VOLUME vol
|
|
64
|
-
USING CSV
|
|
65
|
-
OPTIONS('header' = 'true', 'sep' = ',', 'quote' = '"', 'nullValue' = '')
|
|
66
|
-
FILES('data.csv');
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
⚠️ **语法顺序要求**:`OPTIONS` 必须在 `FILES` 之前,否则报错 `Syntax error - missing EQ at '('`
|
|
70
|
-
|
|
71
|
-
overwrite 模式(⚠️ 不支持 `COPY OVERWRITE INTO`):
|
|
72
|
-
```sql
|
|
73
|
-
-- 正确方式:先 TRUNCATE 再 COPY
|
|
74
|
-
TRUNCATE TABLE target_table;
|
|
75
|
-
COPY INTO target_table FROM VOLUME vol USING CSV FILES('data.csv');
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
### 步骤 5:验证导入结果
|
|
79
|
-
执行验证查询:
|
|
80
|
-
```sql
|
|
81
|
-
SELECT COUNT(*) as row_count FROM target_table;
|
|
82
|
-
SELECT * FROM target_table LIMIT 5;
|
|
83
|
-
```
|
|
84
|
-
确认行数符合预期,数据内容正确。
|
|
85
|
-
|
|
86
|
-
## 示例
|
|
87
|
-
|
|
88
|
-
### 示例 1:从 URL 导入 CSV 到新表
|
|
89
|
-
```sql
|
|
90
|
-
-- 1. 下载 URL 文件到本地,然后上传到 User Volume
|
|
91
|
-
PUT '/tmp/data.csv' TO USER VOLUME;
|
|
92
|
-
|
|
93
|
-
-- 2. 预览文件内容推断 schema
|
|
94
|
-
SELECT * FROM USER VOLUME USING CSV OPTIONS('header' = 'true') FILES('data.csv') LIMIT 5;
|
|
95
|
-
-- 推断出列:id INT, name STRING, value DOUBLE
|
|
96
|
-
|
|
97
|
-
-- 3. 创建目标表
|
|
98
|
-
CREATE TABLE imported_data (id INT, name STRING, value DOUBLE);
|
|
99
|
-
|
|
100
|
-
-- 4. 执行 COPY INTO 导入(注意:OPTIONS 必须在 FILES 之前)
|
|
101
|
-
COPY INTO imported_data FROM USER VOLUME USING CSV OPTIONS('header' = 'true') FILES('data.csv');
|
|
102
|
-
|
|
103
|
-
-- 5. 验证导入结果
|
|
104
|
-
SELECT COUNT(*) FROM imported_data;
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
### 示例 2:追加 Parquet 数据到已有表
|
|
108
|
-
```sql
|
|
109
|
-
-- 1. 上传本地文件到 User Volume
|
|
110
|
-
PUT '/local/new_batch.parquet' TO USER VOLUME;
|
|
111
|
-
|
|
112
|
-
-- 2. 确认目标表存在
|
|
113
|
-
DESC TABLE existing_table;
|
|
114
|
-
|
|
115
|
-
-- 3. 执行 COPY INTO 导入(Parquet 格式通常不需要 OPTIONS)
|
|
116
|
-
COPY INTO existing_table FROM USER VOLUME USING PARQUET FILES('new_batch.parquet');
|
|
117
|
-
|
|
118
|
-
-- 4. 验证导入结果
|
|
119
|
-
SELECT COUNT(*) FROM existing_table;
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
### 示例 3:从外部 Volume(OSS)导入
|
|
123
|
-
```sql
|
|
124
|
-
-- 1. 查看 Volume 中的文件列表
|
|
125
|
-
SHOW VOLUME DIRECTORY my_oss_volume;
|
|
126
|
-
|
|
127
|
-
-- 2. 预览文件内容
|
|
128
|
-
SELECT * FROM VOLUME my_oss_volume USING CSV OPTIONS('header' = 'true') FILES('data.csv') LIMIT 5;
|
|
129
|
-
|
|
130
|
-
-- 3. 创建目标表并导入(注意:OPTIONS 必须在 FILES 之前)
|
|
131
|
-
CREATE TABLE imported_data (col1 INT, col2 STRING);
|
|
132
|
-
COPY INTO imported_data FROM VOLUME my_oss_volume USING CSV OPTIONS('header' = 'true') FILES('data.csv');
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
## 故障排除
|
|
136
|
-
|
|
137
|
-
| 错误 | 原因 | 解决方案 |
|
|
138
|
-
|------|------|----------|
|
|
139
|
-
| COPY INTO 报 "table not found" | create 模式下表未创建,或 append 模式下表名拼写错误 | 先用 `SHOW TABLES` 确认表是否存在 |
|
|
140
|
-
| COPY INTO 报 "file not found" | FILES 中的文件名与 Volume 上的实际文件名不匹配 | 执行 `SHOW VOLUME DIRECTORY vol_name` 或 `SHOW USER VOLUME DIRECTORY` 确认文件名,注意大小写敏感 |
|
|
141
|
-
| COPY INTO 报语法错误 "missing EQ at '('" | OPTIONS 放在了 FILES 之后 | 调整顺序,确保 `OPTIONS` 在 `FILES` 之前:`USING CSV OPTIONS(...) FILES(...)` |
|
|
142
|
-
| CSV 导入列数不匹配 | CSV 文件有 header 行但未指定 `OPTIONS('header'='true')`,导致 header 被当作数据行 | 添加 `OPTIONS('header' = 'true')`,或检查 CSV 分隔符是否正确(sep 参数) |
|
|
143
|
-
| COPY INTO 报 "schema mismatch" | 文件中的数据类型与目标表列定义不兼容 | 执行 `SELECT FROM VOLUME ... USING format LIMIT 5` 预览实际数据,调整表定义或使用列映射 |
|
|
144
|
-
| overwrite 模式数据未清空 | 使用了 `COPY OVERWRITE INTO` 语法(不支持) | overwrite 模式应先用 `TRUNCATE TABLE` 清空表,再执行 `COPY INTO` |
|
|
145
|
-
| SELECT FROM VOLUME 报错 | 格式不匹配或多格式文件混合 | 确认 USING 后的格式与实际文件格式一致;使用 `FILES()` 指定文件或 `SUBDIRECTORY` 指定子目录 |
|
|
146
|
-
| PUT 命令失败 | 本地文件路径不存在 | 确认本地文件路径正确,文件存在 |
|
|
147
|
-
|
|
148
|
-
---
|
|
149
|
-
|
|
150
|
-
## 依赖的 Skills
|
|
151
|
-
|
|
152
|
-
| 操作 | 需要加载的 Skill |
|
|
153
|
-
|------|-----------------|
|
|
154
|
-
| 文件上传/下载/删除 | `clickzetta-volume-manager` |
|
|
155
|
-
| 查询 Volume 文件内容 | `clickzetta-volume-manager` |
|
|
156
|
-
| COPY INTO 导入 | 本 Skill |
|