@clickzetta/cz-cli-linux-x64 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/bin/cz-cli +0 -0
  2. package/package.json +1 -1
  3. package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
  4. package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
  5. package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
  6. package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
  7. package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
  8. package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
  9. package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
  10. package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
  11. package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
  12. package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
  13. package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
  14. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
  15. package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
  16. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
  17. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -457
  18. package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
  19. package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
  20. package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
  21. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
  22. package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
  23. package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
  24. package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
  25. package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
  26. package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
  27. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
  28. package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
  29. package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
  30. package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
  31. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
  32. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
  33. package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
  34. package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
  35. package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
  36. package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
  37. package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
  38. package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -112
  39. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
  40. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
  41. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
  42. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
  43. package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
  44. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  45. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
  46. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
  47. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
  48. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
  49. package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
  50. package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
  51. package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
  52. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
  53. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -156
  54. package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
  55. package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
  56. package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
  57. package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
  58. package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
  59. package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
  60. package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
  61. package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
  62. package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
  63. package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
  64. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
  65. package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
  66. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -639
  67. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
  68. package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
  69. package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
  70. package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
  71. package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
  72. package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
  73. package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
  74. package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
  75. package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
  76. package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
  77. package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
  78. package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
  79. package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
  80. package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
  81. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -427
  82. package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
  83. package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
  84. package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
  85. package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
  86. package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
  87. package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
  88. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
  89. package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
  90. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
  91. package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
  92. package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
  93. package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
  94. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -379
  95. package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
  96. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
  97. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
  98. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
  99. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
  100. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
  101. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  102. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  103. package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
  104. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  105. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  106. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
  107. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  108. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  109. package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
  110. package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
  111. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
  112. package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
  113. package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
  114. package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
  115. package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
  116. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
  117. package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
  118. package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
@@ -1,130 +0,0 @@
1
- # External Catalog 参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/external-catalog-summary 等
4
-
5
- > ⚠️ External Catalog 当前处于公开预览阶段。目前只有 instance admin 角色可以查询 Catalog。
6
-
7
- ## 概述
8
-
9
- External Catalog 映射外部数据系统(Hive、Iceberg、Databricks)的数据库,使 Lakehouse 可对其执行**只读**联邦查询。
10
-
11
- **支持的数据源**:
12
- - Apache Hive(通过 Hive Metastore)
13
- - Iceberg REST Catalog(如 Snowflake OpenCatalog)
14
- - Databricks Unity Catalog
15
-
16
- ---
17
-
18
- ## 创建流程(以 Hive 为例)
19
-
20
- ### 步骤 1:创建存储连接
21
-
22
- ```sql
23
- -- OSS
24
- CREATE STORAGE CONNECTION IF NOT EXISTS catalog_storage_oss
25
- TYPE OSS
26
- ACCESS_ID = 'LTAIxxxxxxxxxxxx'
27
- ACCESS_KEY = 'T8Gexxxxxxmtxxxxxx'
28
- ENDPOINT = 'oss-cn-hangzhou-internal.aliyuncs.com';
29
-
30
- -- COS
31
- CREATE STORAGE CONNECTION IF NOT EXISTS catalog_storage_cos
32
- TYPE COS
33
- ACCESS_KEY = '<access_key>'
34
- SECRET_KEY = '<secret_key>'
35
- REGION = 'ap-shanghai'
36
- APP_ID = '1310000503';
37
-
38
- -- S3
39
- CREATE STORAGE CONNECTION IF NOT EXISTS catalog_storage_s3
40
- TYPE S3
41
- ACCESS_KEY = '<access_key>'
42
- SECRET_KEY = '<secret_key>'
43
- REGION = 'us-east-1';
44
- ```
45
-
46
- ### 步骤 2:创建 Catalog Connection
47
-
48
- ```sql
49
- -- Hive Metastore
50
- CREATE CATALOG CONNECTION IF NOT EXISTS catalog_api_connection
51
- TYPE hms
52
- hive_metastore_uris = 'host:9083'
53
- storage_connection = 'catalog_storage_oss';
54
- ```
55
-
56
- 参数说明:
57
- - `type`:连接类型,目前支持 `hms`(Hive Metastore Service)
58
- - `hive_metastore_uris`:HMS 服务地址,格式 `host:port`,端口通常为 9083
59
- - `storage_connection`:已创建的存储连接名称
60
-
61
- ### 步骤 3:创建 External Catalog
62
-
63
- ```sql
64
- CREATE EXTERNAL CATALOG my_external_catalog
65
- CONNECTION catalog_api_connection;
66
- ```
67
-
68
- ---
69
-
70
- ## 查看 Catalog
71
-
72
- ```sql
73
- -- 列出所有 Catalog
74
- SHOW CATALOGS;
75
-
76
- -- 查看 Catalog 详情
77
- DESC CATALOG my_external_catalog;
78
- DESC CATALOG EXTENDED my_external_catalog;
79
- ```
80
-
81
- ---
82
-
83
- ## 查看 Catalog 下的对象
84
-
85
- ```sql
86
- -- 查看 Schema 列表
87
- SHOW SCHEMAS IN my_external_catalog;
88
-
89
- -- 查看 Schema 列表(含类型:managed/external)
90
- SHOW SCHEMAS EXTENDED IN my_external_catalog;
91
-
92
- -- 查看表列表
93
- SHOW TABLES IN my_external_catalog.my_schema;
94
-
95
- -- 查看表结构
96
- DESC TABLE my_external_catalog.my_schema.my_table;
97
- ```
98
-
99
- ---
100
-
101
- ## 查询外部数据
102
-
103
- ```sql
104
- -- 三层命名空间语法(必须)
105
- SELECT * FROM my_external_catalog.my_schema.my_table;
106
-
107
- -- 联邦查询(外部表 JOIN 内部表)
108
- SELECT e.*, i.region
109
- FROM my_external_catalog.hive_schema.orders e
110
- JOIN public.dim_region i ON e.region_id = i.id;
111
- ```
112
-
113
- ⚠️ 查询 External Catalog 下的表**必须**使用三层结构语法(catalog.schema.table),不支持 `USE` 切换 catalog。
114
-
115
- ---
116
-
117
- ## 删除 Catalog
118
-
119
- ```sql
120
- DROP CATALOG IF EXISTS my_external_catalog;
121
- ```
122
-
123
- ---
124
-
125
- ## 注意事项
126
-
127
- - External Catalog 为**只读**,不支持写入操作
128
- - HMS 所在服务器网络需与 Lakehouse 打通(可通过 PrivateLink 实现)
129
- - 目前只有 `instance_admin` 角色可以创建和查询 External Catalog
130
- - Databricks Unity Catalog 要求与 Lakehouse 在同一云平台(如同在 AWS 上)
@@ -1,203 +0,0 @@
1
- ---
2
- name: clickzetta-external-function
3
- description: |
4
- 在 ClickZetta Lakehouse 中创建和使用外部函数(External Function / UDF),
5
- 通过 Python 或 Java 扩展 SQL 计算能力,调用 LLM、图像识别、自定义算法等外部服务。
6
- 覆盖 CREATE API CONNECTION(阿里云FC/腾讯云SCF/AWS Lambda)、
7
- CREATE EXTERNAL FUNCTION、Python UDF 代码结构与打包、
8
- 内置 AI_COMPLETE 和 AI_EMBEDDING 函数的使用。
9
- 当用户说"外部函数"、"UDF"、"自定义函数"、"External Function"、
10
- "Remote Function"、"调用 LLM"、"AI_COMPLETE"、"AI_EMBEDDING"、
11
- "文本向量化"、"调用阿里云函数计算"、"调用云函数"、"Python UDF"、
12
- "Java UDF"、"CREATE EXTERNAL FUNCTION"时触发。
13
- Keywords: external function, UDF, Python UDF, Java UDF, LLM, custom function
14
- ---
15
-
16
- # ClickZetta External Function
17
-
18
- External Function 让 SQL 可以调用外部计算能力(LLM、图像识别、自定义算法),通过 Python/Java 编写函数逻辑,部署在云函数服务上执行。
19
-
20
- 阅读 [references/external-function-ddl.md](references/external-function-ddl.md) 了解完整语法。
21
-
22
- ---
23
-
24
- ## 两种使用路径
25
-
26
- | 路径 | 适用场景 | 复杂度 |
27
- |---|---|---|
28
- | **内置 AI 函数**(AI_COMPLETE / AI_EMBEDDING) | 调用 LLM 生成文本、文本向量化 | 低,只需创建 API Connection |
29
- | **External Function** | 自定义算法、图像处理、私有模型 | 高,需部署云函数 |
30
-
31
- ---
32
-
33
- ## 路径一:内置 AI 函数(推荐)
34
-
35
- ### 1. 创建 AI API Connection
36
-
37
- ```sql
38
- CREATE API CONNECTION conn_bailian
39
- TYPE ai_function
40
- PROVIDER = 'bailian'
41
- BASE_URL = 'https://dashscope.aliyuncs.com/api/v1'
42
- API_KEY = 'sk-xxxxxxxxxxxxxxxxxxxxxxxx';
43
- ```
44
-
45
- ### 2. AI_COMPLETE — 调用 LLM
46
-
47
- ```sql
48
- -- 文本摘要
49
- SELECT id,
50
- AI_COMPLETE('connection:conn_bailian', '请用一句话总结:' || content) AS summary
51
- FROM articles;
52
-
53
- -- 情感分析
54
- SELECT id, review,
55
- AI_COMPLETE('connection:conn_bailian',
56
- '判断以下评论的情感(正面/负面/中性),只返回一个词:' || review) AS sentiment
57
- FROM user_reviews;
58
-
59
- -- 通过平台 Endpoint(管理员预配置)
60
- SELECT AI_COMPLETE('endpoint:my_llm_endpoint', prompt_col) AS result
61
- FROM my_table;
62
- ```
63
-
64
- ### 3. AI_EMBEDDING — 文本向量化
65
-
66
- ```sql
67
- -- 批量生成 embedding
68
- SELECT id, content,
69
- AI_EMBEDDING('connection:conn_bailian', content) AS vec
70
- FROM documents;
71
-
72
- -- 语义搜索(结合向量索引)
73
- SELECT id, content,
74
- cosine_distance(vec, AI_EMBEDDING('connection:conn_bailian', '用户查询')) AS dist
75
- FROM doc_embeddings
76
- ORDER BY dist
77
- LIMIT 10;
78
- ```
79
-
80
- ---
81
-
82
- ## 路径二:External Function(自定义 UDF)
83
-
84
- ### 整体流程
85
-
86
- ```
87
- 1. 开通云函数服务(阿里云FC / 腾讯云SCF / AWS Lambda)
88
- 2. 编写 Python/Java 函数代码
89
- 3. 打包上传到对象存储或 User Volume
90
- 4. 授权 Lakehouse 访问云函数服务(RAM 角色)
91
- 5. CREATE API CONNECTION
92
- 6. CREATE EXTERNAL FUNCTION
93
- 7. 在 SQL 中调用
94
- ```
95
-
96
- ### 步骤 1:创建云函数 API Connection
97
-
98
- ```sql
99
- -- 阿里云 FC
100
- CREATE API CONNECTION IF NOT EXISTS my_fc_conn
101
- TYPE CLOUD_FUNCTION
102
- PROVIDER = 'aliyun'
103
- REGION = 'cn-shanghai'
104
- ROLE_ARN = 'acs:ram::1234567890:role/CzUDFRole'
105
- NAMESPACE = 'default'
106
- CODE_BUCKET = 'my-oss-bucket';
107
-
108
- -- 腾讯云 SCF
109
- CREATE API CONNECTION IF NOT EXISTS my_scf_conn
110
- TYPE CLOUD_FUNCTION
111
- PROVIDER = 'tencent'
112
- REGION = 'ap-shanghai'
113
- ROLE_ARN = 'qcs::cam::uin/1234567890:roleName/CzUDFRole'
114
- NAMESPACE = 'default'
115
- CODE_BUCKET = 'my-cos-bucket';
116
- ```
117
-
118
- ### 步骤 2:编写 Python UDF
119
-
120
- ```python
121
- # upper.py
122
- try:
123
- from cz.udf import annotate
124
- except ImportError:
125
- annotate = lambda _: lambda _: _
126
-
127
- @annotate("string->string")
128
- class Upper(object):
129
- def evaluate(self, arg):
130
- if arg is None:
131
- return None
132
- return arg.upper()
133
- ```
134
-
135
- 打包上传:
136
- ```bash
137
- zip -rq upper.zip upper.py
138
- ```
139
-
140
- ```sql
141
- -- 上传到 User Volume(在 ClickZetta Studio 或 CLI 中执行,source_path 使用绝对路径)
142
- PUT '/path/to/upper.zip' TO USER VOLUME;
143
- ```
144
-
145
- ### 步骤 3:创建 External Function
146
-
147
- ```sql
148
- -- ⚠️ CREATE EXTERNAL FUNCTION 不支持 OR REPLACE,只支持 IF NOT EXISTS
149
- -- ❌ 错误:CREATE OR REPLACE EXTERNAL FUNCTION ...
150
- -- ✅ 正确:
151
- -- 使用 User Volume 存放代码(无需 OSS)
152
- CREATE EXTERNAL FUNCTION IF NOT EXISTS public.str_upper
153
- AS 'upper.Upper'
154
- USING FILE = 'volume:user://~/upper.zip'
155
- CONNECTION = my_fc_conn
156
- WITH PROPERTIES ('remote.udf.api' = 'python3.mc.v0')
157
- COMMENT '字符串转大写';
158
-
159
- -- 使用 OSS 存放代码
160
- CREATE EXTERNAL FUNCTION IF NOT EXISTS public.str_upper
161
- AS 'upper.Upper'
162
- USING FILE = 'oss://my-bucket/functions/upper.zip'
163
- CONNECTION = my_fc_conn
164
- WITH PROPERTIES ('remote.udf.api' = 'python3.mc.v0');
165
- ```
166
-
167
- ### 步骤 4:调用函数
168
-
169
- ```sql
170
- -- ⚠️ 调用外部函数必须使用完整 Schema 路径,不能省略 schema
171
- -- ❌ 错误:SELECT str_upper(name) FROM my_table;
172
- -- ✅ 正确:
173
- SELECT id, public.str_upper(name) AS upper_name FROM my_table;
174
- ```
175
-
176
- ---
177
-
178
- ## 管理操作
179
-
180
- ```sql
181
- -- 查看所有外部函数
182
- SHOW EXTERNAL FUNCTIONS;
183
- SHOW EXTERNAL FUNCTIONS LIKE 'str_%';
184
-
185
- -- 删除函数(注意:用 DROP FUNCTION,不是 DROP EXTERNAL FUNCTION)
186
- DROP FUNCTION IF EXISTS public.str_upper;
187
- ```
188
-
189
- > ⚠️ **注意**:`CREATE FUNCTION`(SQL 内联函数)只支持 SQL 表达式,不支持 Python/JavaScript 等编程语言。需要编程语言逻辑请使用 `CREATE EXTERNAL FUNCTION`。
190
-
191
- ---
192
-
193
- ## 常见问题
194
-
195
- | 问题 | 原因 | 解决方案 |
196
- |---|---|---|
197
- | 函数调用超时 | 云函数冷启动或执行慢 | 增大超时配置,或预热函数 |
198
- | 依赖库 ABI 不兼容 | 在 macOS/Windows 打包 | 用 `quay.io/pypa/manylinux2014_x86_64` 容器打包 |
199
- | 代码包 > 500MB | 依赖过大 | 改用容器镜像方式部署 |
200
- | AI_COMPLETE 报错 | API Key 无效或余额不足 | 检查 API Connection 的 API_KEY |
201
- | ROLE_ARN 权限不足 | RAM 角色未授权 | 参考文档配置 AliyunFCFullAccess + OSS 权限 |
202
- | 函数调用报"not found" | 省略了 Schema 前缀 | 必须用完整路径:`schema.function_name(...)` |
203
- | CREATE OR REPLACE 报错 | EXTERNAL FUNCTION 不支持 OR REPLACE | 改用 `CREATE EXTERNAL FUNCTION IF NOT EXISTS` |
@@ -1,171 +0,0 @@
1
- # External Function DDL 参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/CREATE_EXTERNATL_FUNCTION 等
4
-
5
- ## 概念
6
-
7
- External Function(外部函数)是通过 Python/Java 编写、在云函数服务(阿里云 FC / 腾讯云 SCF / AWS Lambda)上执行的自定义 UDF。可调用:
8
- - **在线服务**:LLM API、图像识别 API 等
9
- - **离线模型**:打包上传的 Hugging Face 模型等
10
-
11
- 支持函数类型:UDF(标量)、UDAF(聚合,仅 Java)、UDTF(表函数,仅 Java)
12
-
13
- ---
14
-
15
- ## CREATE API CONNECTION(云函数连接)
16
-
17
- ```sql
18
- CREATE API CONNECTION IF NOT EXISTS my_fc_conn
19
- TYPE CLOUD_FUNCTION
20
- PROVIDER = 'aliyun' -- 'aliyun' | 'tencent' | 'aws'
21
- REGION = 'cn-shanghai'
22
- ROLE_ARN = 'acs:ram::1234567890:role/CzUDFRole'
23
- NAMESPACE = 'default' -- 腾讯云必填,其他填 'default'
24
- CODE_BUCKET = 'my-oss-bucket';
25
- ```
26
-
27
- | 参数 | 说明 |
28
- |---|---|
29
- | PROVIDER | `'aliyun'` / `'tencent'` / `'aws'` |
30
- | REGION | 阿里云:`cn-shanghai`;腾讯云:`ap-beijing`;AWS:`cn-northwest-1` |
31
- | ROLE_ARN | 授权给 Lakehouse 的 RAM 角色 ARN |
32
- | NAMESPACE | 腾讯云命名空间(必填);其他填 `'default'` |
33
- | CODE_BUCKET | 存放函数代码包的 OSS/COS/S3 bucket 名称 |
34
-
35
- ---
36
-
37
- ## CREATE EXTERNAL FUNCTION
38
-
39
- ```sql
40
- CREATE EXTERNAL FUNCTION IF NOT EXISTS my_schema.my_udf
41
- AS 'module_name.ClassName'
42
- USING FILE = 'oss://my-bucket/functions/code.zip'
43
- CONNECTION = my_fc_conn
44
- WITH PROPERTIES (
45
- 'remote.udf.api' = 'python3.mc.v0' -- Python: python3.mc.v0 | Java: java8.hive2.v0
46
- )
47
- COMMENT '自定义函数说明';
48
- ```
49
-
50
- ### 资源文件地址格式
51
-
52
- ```
53
- -- OSS/COS/S3
54
- oss://bucket-name/path/to/code.zip
55
- cos://bucket-name/path/to/code.zip
56
- s3://bucket-name/path/to/code.zip
57
-
58
- -- User Volume(无需开通对象存储)
59
- volume:user://~/code.zip
60
-
61
- -- External Volume
62
- volume://workspace.schema.volume_name/code.zip
63
- ```
64
-
65
- ### WITH PROPERTIES 参数
66
-
67
- | 参数 | 值 | 说明 |
68
- |---|---|---|
69
- | `remote.udf.api` | `python3.mc.v0` | Python 3.10 运行时 |
70
- | `remote.udf.api` | `java8.hive2.v0` | Java 8 Hive 风格 UDF |
71
- | `remote.udf.protocol` | `http.arrow.v0` | 默认,访问云函数的协议 |
72
-
73
- ---
74
-
75
- ## Python UDF 代码结构
76
-
77
- ```python
78
- #!/usr/bin/env python
79
- try:
80
- from cz.udf import annotate
81
- except ImportError:
82
- annotate = lambda _: lambda _: _
83
-
84
- @annotate("string->string") # 函数签名:输入类型->返回类型
85
- class Upper(object):
86
- def evaluate(self, arg):
87
- if arg is None:
88
- return None
89
- return arg.upper()
90
- ```
91
-
92
- ### 函数签名格式
93
-
94
- ```
95
- "input_type1,input_type2->return_type"
96
-
97
- # 示例
98
- "string->string" # 字符串转字符串
99
- "string,int->double" # 两个输入,返回 double
100
- "string->array<string>" # 返回数组
101
- ```
102
-
103
- 支持类型:`string`、`int`、`bigint`、`double`、`float`、`boolean`、`array<T>`、`map<K,V>`
104
-
105
- ### 打包上传
106
-
107
- ```bash
108
- # 安装依赖到当前目录
109
- pip3 install httpx pydantic -t .
110
-
111
- # 打包(< 500MB)
112
- zip -rq code.zip ./*
113
- ```
114
-
115
- ```sql
116
- -- 上传到 User Volume(在 ClickZetta Studio 或 CLI 中执行,source_path 使用绝对路径)
117
- PUT '/path/to/code.zip' TO USER VOLUME;
118
- ```
119
-
120
- ---
121
-
122
- ## 管理操作
123
-
124
- ```sql
125
- -- 查看外部函数列表
126
- SHOW EXTERNAL FUNCTIONS;
127
- SHOW EXTERNAL FUNCTIONS LIKE 'my_%';
128
-
129
- -- 删除外部函数
130
- DROP FUNCTION IF EXISTS my_schema.my_udf;
131
- ```
132
-
133
- ---
134
-
135
- ## 内置 AI 函数(无需部署云函数)
136
-
137
- ### AI_COMPLETE(调用 LLM)
138
-
139
- ```sql
140
- -- 通过 API Connection 调用(需先创建连接)
141
- CREATE API CONNECTION conn_bailian
142
- TYPE ai_function
143
- PROVIDER = 'bailian'
144
- BASE_URL = 'https://dashscope.aliyuncs.com/api/v1'
145
- API_KEY = 'sk-xxxxxxxxxxxxxxxxxxxxxxxx';
146
-
147
- -- 调用 LLM 生成文本
148
- SELECT AI_COMPLETE('connection:conn_bailian', '请用一句话总结:' || content) AS summary
149
- FROM articles
150
- LIMIT 10;
151
-
152
- -- 通过平台 Endpoint 调用(管理员预配置)
153
- SELECT AI_COMPLETE('endpoint:my_llm_endpoint', prompt_col) AS result
154
- FROM my_table;
155
- ```
156
-
157
- ### AI_EMBEDDING(文本向量化)
158
-
159
- ```sql
160
- -- 将文本转为向量(用于语义搜索)
161
- SELECT id, content,
162
- AI_EMBEDDING('connection:conn_bailian', content) AS embedding
163
- FROM documents;
164
-
165
- -- 结合向量索引做语义搜索
166
- SELECT id, content,
167
- cosine_distance(embedding, AI_EMBEDDING('connection:conn_bailian', '查询文本')) AS dist
168
- FROM doc_embeddings
169
- ORDER BY dist
170
- LIMIT 10;
171
- ```
@@ -1,156 +0,0 @@
1
- ---
2
- name: clickzetta-file-import-pipeline
3
- description: |
4
- 从 URL、本地文件或 Volume 路径将数据导入到 ClickZetta 表中,覆盖文件下载、格式推断、
5
- 表创建、COPY INTO 导入、结果验证的完整流程。当用户说"导入数据"、"从 URL 加载"、
6
- "上传 CSV 到表"、"文件导入"、"COPY INTO"时触发。包含 ClickZetta USER VOLUME 机制、
7
- COPY INTO 语法、格式推断规则、写入模式语义等平台特有知识。
8
- Keywords: file import, URL, CSV, JSON, Parquet, COPY INTO, Volume
9
- ---
10
-
11
- # URL/文件数据导入工作流
12
-
13
- ## 指令
14
-
15
- ### 步骤 1:获取源文件并上传到 Volume
16
- 根据数据来源选择对应方式:
17
- - **HTTP/HTTPS URL**:需要先用外部工具下载到本地,然后用 `PUT` 命令上传到 User Volume
18
- - **本地文件**:执行 SQL `PUT '/local/path/file.csv' TO USER VOLUME` 上传
19
- - **Volume 路径**:文件已在 Volume 上,跳过此步骤
20
- - **外部 Volume(OSS/S3/COS)**:文件已在外部 Volume,直接使用
21
- - 记录上传后的 Volume 名称和文件名,后续步骤需要
22
-
23
- > ⚠️ **注意**:文件上传操作参考 `clickzetta-volume-manager` skill。
24
-
25
- ### 步骤 2:推断文件格式
26
- 根据文件扩展名推断格式(ClickZetta COPY INTO 支持的格式):
27
- - `.csv`, `.tsv`, `.txt` → CSV 格式
28
- - `.json`, `.jsonl`, `.ndjson` → JSON 格式
29
- - `.parquet`, `.pq` → PARQUET 格式
30
- - `.orc` → ORC 格式
31
- - `.bson` → BSON 格式
32
- 如果扩展名不明确,执行 `SELECT FROM VOLUME ... USING format` 预览文件内容来确认格式和 schema。
33
-
34
- ### 步骤 3:确认或创建目标表
35
- 根据写入模式处理目标表:
36
- - **create 模式**:表必须不存在。执行 `SELECT FROM VOLUME ... LIMIT 5` 推断 schema,然后执行 `CREATE TABLE` 创建表
37
- - **append 模式**:表必须已存在。用 `DESC TABLE <table_name>` 确认表存在并检查列兼容性
38
- - **overwrite 模式**:表存在则先清空。执行 `TRUNCATE TABLE table_name`,再执行 COPY INTO(⚠️ 不支持 `COPY OVERWRITE INTO` 语法)
39
-
40
- ### 步骤 4:执行 COPY INTO 导入数据
41
- 执行 COPY INTO 语句。核心语法:
42
-
43
- ```sql
44
- COPY INTO target_table
45
- FROM VOLUME volume_name
46
- USING format_type
47
- OPTIONS('option_name' = 'value')
48
- FILES('filename');
49
- ```
50
-
51
- 对于 USER VOLUME(通过 PUT 命令上传的文件):
52
- ```sql
53
- COPY INTO target_table
54
- FROM USER VOLUME
55
- USING CSV
56
- OPTIONS('header' = 'true')
57
- FILES('uploaded_filename');
58
- ```
59
-
60
- CSV 格式可附加 OPTIONS:
61
- ```sql
62
- COPY INTO target_table
63
- FROM VOLUME vol
64
- USING CSV
65
- OPTIONS('header' = 'true', 'sep' = ',', 'quote' = '"', 'nullValue' = '')
66
- FILES('data.csv');
67
- ```
68
-
69
- ⚠️ **语法顺序要求**:`OPTIONS` 必须在 `FILES` 之前,否则报错 `Syntax error - missing EQ at '('`
70
-
71
- overwrite 模式(⚠️ 不支持 `COPY OVERWRITE INTO`):
72
- ```sql
73
- -- 正确方式:先 TRUNCATE 再 COPY
74
- TRUNCATE TABLE target_table;
75
- COPY INTO target_table FROM VOLUME vol USING CSV FILES('data.csv');
76
- ```
77
-
78
- ### 步骤 5:验证导入结果
79
- 执行验证查询:
80
- ```sql
81
- SELECT COUNT(*) as row_count FROM target_table;
82
- SELECT * FROM target_table LIMIT 5;
83
- ```
84
- 确认行数符合预期,数据内容正确。
85
-
86
- ## 示例
87
-
88
- ### 示例 1:从 URL 导入 CSV 到新表
89
- ```sql
90
- -- 1. 下载 URL 文件到本地,然后上传到 User Volume
91
- PUT '/tmp/data.csv' TO USER VOLUME;
92
-
93
- -- 2. 预览文件内容推断 schema
94
- SELECT * FROM USER VOLUME USING CSV OPTIONS('header' = 'true') FILES('data.csv') LIMIT 5;
95
- -- 推断出列:id INT, name STRING, value DOUBLE
96
-
97
- -- 3. 创建目标表
98
- CREATE TABLE imported_data (id INT, name STRING, value DOUBLE);
99
-
100
- -- 4. 执行 COPY INTO 导入(注意:OPTIONS 必须在 FILES 之前)
101
- COPY INTO imported_data FROM USER VOLUME USING CSV OPTIONS('header' = 'true') FILES('data.csv');
102
-
103
- -- 5. 验证导入结果
104
- SELECT COUNT(*) FROM imported_data;
105
- ```
106
-
107
- ### 示例 2:追加 Parquet 数据到已有表
108
- ```sql
109
- -- 1. 上传本地文件到 User Volume
110
- PUT '/local/new_batch.parquet' TO USER VOLUME;
111
-
112
- -- 2. 确认目标表存在
113
- DESC TABLE existing_table;
114
-
115
- -- 3. 执行 COPY INTO 导入(Parquet 格式通常不需要 OPTIONS)
116
- COPY INTO existing_table FROM USER VOLUME USING PARQUET FILES('new_batch.parquet');
117
-
118
- -- 4. 验证导入结果
119
- SELECT COUNT(*) FROM existing_table;
120
- ```
121
-
122
- ### 示例 3:从外部 Volume(OSS)导入
123
- ```sql
124
- -- 1. 查看 Volume 中的文件列表
125
- SHOW VOLUME DIRECTORY my_oss_volume;
126
-
127
- -- 2. 预览文件内容
128
- SELECT * FROM VOLUME my_oss_volume USING CSV OPTIONS('header' = 'true') FILES('data.csv') LIMIT 5;
129
-
130
- -- 3. 创建目标表并导入(注意:OPTIONS 必须在 FILES 之前)
131
- CREATE TABLE imported_data (col1 INT, col2 STRING);
132
- COPY INTO imported_data FROM VOLUME my_oss_volume USING CSV OPTIONS('header' = 'true') FILES('data.csv');
133
- ```
134
-
135
- ## 故障排除
136
-
137
- | 错误 | 原因 | 解决方案 |
138
- |------|------|----------|
139
- | COPY INTO 报 "table not found" | create 模式下表未创建,或 append 模式下表名拼写错误 | 先用 `SHOW TABLES` 确认表是否存在 |
140
- | COPY INTO 报 "file not found" | FILES 中的文件名与 Volume 上的实际文件名不匹配 | 执行 `SHOW VOLUME DIRECTORY vol_name` 或 `SHOW USER VOLUME DIRECTORY` 确认文件名,注意大小写敏感 |
141
- | COPY INTO 报语法错误 "missing EQ at '('" | OPTIONS 放在了 FILES 之后 | 调整顺序,确保 `OPTIONS` 在 `FILES` 之前:`USING CSV OPTIONS(...) FILES(...)` |
142
- | CSV 导入列数不匹配 | CSV 文件有 header 行但未指定 `OPTIONS('header'='true')`,导致 header 被当作数据行 | 添加 `OPTIONS('header' = 'true')`,或检查 CSV 分隔符是否正确(sep 参数) |
143
- | COPY INTO 报 "schema mismatch" | 文件中的数据类型与目标表列定义不兼容 | 执行 `SELECT FROM VOLUME ... USING format LIMIT 5` 预览实际数据,调整表定义或使用列映射 |
144
- | overwrite 模式数据未清空 | 使用了 `COPY OVERWRITE INTO` 语法(不支持) | overwrite 模式应先用 `TRUNCATE TABLE` 清空表,再执行 `COPY INTO` |
145
- | SELECT FROM VOLUME 报错 | 格式不匹配或多格式文件混合 | 确认 USING 后的格式与实际文件格式一致;使用 `FILES()` 指定文件或 `SUBDIRECTORY` 指定子目录 |
146
- | PUT 命令失败 | 本地文件路径不存在 | 确认本地文件路径正确,文件存在 |
147
-
148
- ---
149
-
150
- ## 依赖的 Skills
151
-
152
- | 操作 | 需要加载的 Skill |
153
- |------|-----------------|
154
- | 文件上传/下载/删除 | `clickzetta-volume-manager` |
155
- | 查询 Volume 文件内容 | `clickzetta-volume-manager` |
156
- | COPY INTO 导入 | 本 Skill |