@clickzetta/cz-cli-linux-x64 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
- package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
- package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
- package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
- package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
- package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
- package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
- package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -457
- package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
- package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
- package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
- package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
- package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
- package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
- package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
- package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
- package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
- package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
- package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
- package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -112
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
- package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
- package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -156
- package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
- package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
- package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
- package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
- package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
- package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
- package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
- package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
- package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
- package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -639
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
- package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
- package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
- package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
- package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
- package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
- package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
- package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
- package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
- package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
- package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
- package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
- package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -427
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
- package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -379
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
- package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
- package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
- package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
|
@@ -1,268 +0,0 @@
|
|
|
1
|
-
# Dynamic Table 增量刷新历史查询指南
|
|
2
|
-
|
|
3
|
-
查看 DT/MV 的增量刷新历史有三种方式,适用于不同场景。
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## 方式一:SHOW DYNAMIC TABLE REFRESH HISTORY
|
|
8
|
-
|
|
9
|
-
查看 DT 的刷新作业级别信息,包括每次刷新的状态、耗时、触发方式、刷新模式等。
|
|
10
|
-
|
|
11
|
-
### 语法
|
|
12
|
-
|
|
13
|
-
```sql
|
|
14
|
-
-- 查看指定 DT 的刷新历史(使用 WHERE name = 过滤)
|
|
15
|
-
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'my_dt';
|
|
16
|
-
|
|
17
|
-
-- 限制返回行数
|
|
18
|
-
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'my_dt' LIMIT 10;
|
|
19
|
-
|
|
20
|
-
-- 组合 WHERE + LIMIT + 状态过滤
|
|
21
|
-
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'my_dt' AND state = 'SUCCEED' LIMIT 20;
|
|
22
|
-
|
|
23
|
-
-- MV 也支持同样的语法
|
|
24
|
-
SHOW MATERIALIZED VIEW REFRESH HISTORY WHERE name = 'my_mv' LIMIT 10;
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
> ⚠️ 注意:`FOR <table_name>` 语法在当前版本中可能返回空结果,请使用 `WHERE name = '<table_name>'` 语法。
|
|
28
|
-
|
|
29
|
-
### 输出列
|
|
30
|
-
|
|
31
|
-
| 列名 | 类型 | 说明 |
|
|
32
|
-
|------|------|------|
|
|
33
|
-
| workspace_name | STRING | 所属 Workspace |
|
|
34
|
-
| schema_name | STRING | 所属 Schema |
|
|
35
|
-
| name | STRING | DT/MV 名称 |
|
|
36
|
-
| virtual_cluster | STRING | 执行刷新的虚拟集群 |
|
|
37
|
-
| start_time | TIMESTAMP | 刷新开始时间 |
|
|
38
|
-
| end_time | TIMESTAMP | 刷新结束时间(运行中为 NULL) |
|
|
39
|
-
| duration | INTERVAL | 刷新耗时(运行中显示已经过的时间) |
|
|
40
|
-
| state | STRING | 刷新状态(SUCCEED / FAILED / RUNNING 等) |
|
|
41
|
-
| refresh_trigger | STRING | 触发方式:`SYSTEM_SCHEDULED`(系统调度自动触发)或 `MANUAL`(用户手动 REFRESH) |
|
|
42
|
-
| refresh_mode | STRING | 刷新模式,见下方详细说明 |
|
|
43
|
-
| error_message | STRING | 失败时的错误信息(成功时为 NULL) |
|
|
44
|
-
| source_tables | ARRAY<MAP<STRING,STRING>> | 源表列表,每个元素是一个 MAP,包含 `workspace`、`schema`、`table_name` 三个 key |
|
|
45
|
-
| stats | MAP<STRING,STRING> | 刷新统计,包含 `rows_inserted`(插入行数)和 `rows_deleted`(删除行数) |
|
|
46
|
-
| job_id | STRING | 对应的 Job ID,可用于关联 `information_schema.job_history` 查更多详情 |
|
|
47
|
-
|
|
48
|
-
### refresh_mode 详解
|
|
49
|
-
|
|
50
|
-
`refresh_mode` 是判断增量计算是否生效的关键字段:
|
|
51
|
-
|
|
52
|
-
| 值 | 含义 | 说明 |
|
|
53
|
-
|----|------|------|
|
|
54
|
-
| `INCREMENTAL` | 增量刷新 | 增量引擎成功生成了增量计划,只处理了源表的变更数据 |
|
|
55
|
-
| `FULL` | 全量刷新 | 回退到全量重算。可能原因:首次刷新、维度表变更、增量计划生成失败、用户强制全量等 |
|
|
56
|
-
| `NO_DATA` | 无数据变更 | 源表在上次刷新后没有新的数据变更,本次刷新跳过计算 |
|
|
57
|
-
|
|
58
|
-
### source_tables 详解
|
|
59
|
-
|
|
60
|
-
`source_tables` 列返回该次刷新涉及的所有输入表信息,每个元素是一个 MAP:
|
|
61
|
-
|
|
62
|
-
```
|
|
63
|
-
[
|
|
64
|
-
{"workspace": "my_ws", "schema": "public", "table_name": "orders"},
|
|
65
|
-
{"workspace": "my_ws", "schema": "public", "table_name": "dim_product"}
|
|
66
|
-
]
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
### stats 详解
|
|
70
|
-
|
|
71
|
-
`stats` 列返回该次刷新对目标表的写入统计:
|
|
72
|
-
|
|
73
|
-
```
|
|
74
|
-
{"rows_inserted": "1000", "rows_deleted": "50"}
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
- `rows_inserted`:本次刷新向目标表插入的行数
|
|
78
|
-
- `rows_deleted`:本次刷新从目标表删除的行数(增量模式下,更新操作会产生 delete + insert)
|
|
79
|
-
|
|
80
|
-
### 典型用法
|
|
81
|
-
|
|
82
|
-
```sql
|
|
83
|
-
-- 查看最近 5 次刷新是否成功
|
|
84
|
-
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'my_dt' LIMIT 5;
|
|
85
|
-
|
|
86
|
-
-- 查看失败的刷新记录
|
|
87
|
-
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'my_dt' AND state = 'FAILED';
|
|
88
|
-
|
|
89
|
-
-- 查看是否回退到了全量刷新(排查增量是否生效)
|
|
90
|
-
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'my_dt' AND refresh_mode = 'FULL';
|
|
91
|
-
|
|
92
|
-
-- 查看无数据变更的刷新(源表没有新数据时会出现)
|
|
93
|
-
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'my_dt' AND refresh_mode = 'NO_DATA';
|
|
94
|
-
|
|
95
|
-
-- 查看系统自动调度的刷新
|
|
96
|
-
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'my_dt' AND refresh_trigger = 'SYSTEM_SCHEDULED';
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
---
|
|
100
|
-
|
|
101
|
-
## 方式二:DESC HISTORY
|
|
102
|
-
|
|
103
|
-
查看表的版本级别历史,包括每个版本的行数、字节数、操作类型等。适用于了解数据变更粒度。
|
|
104
|
-
|
|
105
|
-
### 语法
|
|
106
|
-
|
|
107
|
-
```sql
|
|
108
|
-
-- 查看 DT 的版本历史
|
|
109
|
-
DESC HISTORY my_dt;
|
|
110
|
-
|
|
111
|
-
-- 查看源表的版本历史
|
|
112
|
-
DESC HISTORY source_table;
|
|
113
|
-
|
|
114
|
-
-- 支持 WHERE 过滤
|
|
115
|
-
DESC HISTORY my_dt WHERE version > 10;
|
|
116
|
-
|
|
117
|
-
-- 支持 LIMIT
|
|
118
|
-
DESC HISTORY my_dt LIMIT 20;
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
### 输出列
|
|
122
|
-
|
|
123
|
-
对于普通表(DESC_TABLE_HISTORY):
|
|
124
|
-
|
|
125
|
-
| 列名 | 类型 | 说明 |
|
|
126
|
-
|------|------|------|
|
|
127
|
-
| sequence | BIGINT | 序列号 |
|
|
128
|
-
| version | BIGINT | 版本号 |
|
|
129
|
-
| time | TIMESTAMP | 版本创建时间 |
|
|
130
|
-
| total_rows | BIGINT | 该版本的总行数 |
|
|
131
|
-
| total_bytes | BIGINT | 该版本的总字节数 |
|
|
132
|
-
| user | STRING | 操作用户 |
|
|
133
|
-
| operation | STRING | 操作类型(INSERT / COMPACTION / REFRESH 等) |
|
|
134
|
-
| job_id | STRING | 对应的 Job ID |
|
|
135
|
-
|
|
136
|
-
对于 DT/MV(DESC_MV_HISTORY),额外包含:
|
|
137
|
-
|
|
138
|
-
| 列名 | 类型 | 说明 |
|
|
139
|
-
|------|------|------|
|
|
140
|
-
| source_tables | ARRAY<MAP<STRING,STRING>> | 源表及其对应的版本信息 |
|
|
141
|
-
|
|
142
|
-
DESC HISTORY 对 DT/MV 的 `source_tables` 比 SHOW REFRESH HISTORY 更详细,包含每个源表在该版本对应的快照信息:
|
|
143
|
-
|
|
144
|
-
```
|
|
145
|
-
[
|
|
146
|
-
{"table_name": "orders", "workspace": "my_ws", "schema": "public", "version": "123", "sequence": "5", "commit_time": "2025-01-15 10:30:00"},
|
|
147
|
-
{"table_name": "dim_product", "workspace": "my_ws", "schema": "public", "version": "456", "sequence": "2", "commit_time": "2025-01-15 08:00:00"}
|
|
148
|
-
]
|
|
149
|
-
```
|
|
150
|
-
|
|
151
|
-
- `version`:源表的 snapshot_id
|
|
152
|
-
- `sequence`:源表的 sequence 号
|
|
153
|
-
- `commit_time`:源表该版本的提交时间
|
|
154
|
-
|
|
155
|
-
这些信息可以用来追溯某次刷新读取了源表的哪个版本数据。
|
|
156
|
-
|
|
157
|
-
### 典型用法
|
|
158
|
-
|
|
159
|
-
```sql
|
|
160
|
-
-- 查看 DT 最近的版本变化,确认 compaction 是否正常执行
|
|
161
|
-
DESC HISTORY my_dt LIMIT 10;
|
|
162
|
-
|
|
163
|
-
-- 查看源表的版本历史,判断数据写入频率
|
|
164
|
-
DESC HISTORY source_table LIMIT 20;
|
|
165
|
-
|
|
166
|
-
-- 查看 DT 的 compaction 记录
|
|
167
|
-
DESC HISTORY my_dt WHERE operation = 'COMPACTION';
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
---
|
|
171
|
-
|
|
172
|
-
## 方式三:information_schema.materialized_view_refresh_history
|
|
173
|
-
|
|
174
|
-
从 information_schema 查询刷新历史,适合跨表批量分析、与其他系统集成、或做长期趋势监控。数据按天分区(pt_date),保留天数由系统配置决定。
|
|
175
|
-
|
|
176
|
-
### 语法
|
|
177
|
-
|
|
178
|
-
```sql
|
|
179
|
-
-- 查看指定 DT 的刷新历史
|
|
180
|
-
SELECT *
|
|
181
|
-
FROM information_schema.materialized_view_refresh_history
|
|
182
|
-
WHERE materialized_view_name = 'my_dt'
|
|
183
|
-
ORDER BY start_time DESC
|
|
184
|
-
LIMIT 10;
|
|
185
|
-
|
|
186
|
-
-- 查看某天所有 DT 的刷新情况
|
|
187
|
-
SELECT materialized_view_name, status, start_time, end_time, error_message
|
|
188
|
-
FROM information_schema.materialized_view_refresh_history
|
|
189
|
-
WHERE pt_date = '2025-01-15'
|
|
190
|
-
ORDER BY start_time DESC;
|
|
191
|
-
|
|
192
|
-
-- 查看失败的刷新
|
|
193
|
-
SELECT materialized_view_name, error_code, error_message, start_time
|
|
194
|
-
FROM information_schema.materialized_view_refresh_history
|
|
195
|
-
WHERE status = 'FAILED' AND pt_date >= '2025-01-01'
|
|
196
|
-
ORDER BY start_time DESC;
|
|
197
|
-
```
|
|
198
|
-
|
|
199
|
-
### 输出列
|
|
200
|
-
|
|
201
|
-
| 列名 | 类型 | 说明 |
|
|
202
|
-
|------|------|------|
|
|
203
|
-
| workspace_name | STRING | 所属 Workspace |
|
|
204
|
-
| schema_name | STRING | 所属 Schema |
|
|
205
|
-
| materialized_view_name | STRING | DT/MV 名称 |
|
|
206
|
-
| cru | DOUBLE | 消耗的计算资源单位 |
|
|
207
|
-
| virtual_cluster_name | STRING | 执行刷新的虚拟集群 |
|
|
208
|
-
| status | STRING | 刷新状态 |
|
|
209
|
-
| scheduled_start_time | TIMESTAMP | 计划开始时间 |
|
|
210
|
-
| start_time | TIMESTAMP | 实际开始时间 |
|
|
211
|
-
| end_time | TIMESTAMP | 结束时间 |
|
|
212
|
-
| error_code | STRING | 错误码 |
|
|
213
|
-
| error_message | STRING | 错误信息 |
|
|
214
|
-
| pt_date | STRING | 分区日期 |
|
|
215
|
-
|
|
216
|
-
### 典型用法
|
|
217
|
-
|
|
218
|
-
```sql
|
|
219
|
-
-- 统计某个 DT 最近 7 天的刷新成功率
|
|
220
|
-
SELECT
|
|
221
|
-
pt_date,
|
|
222
|
-
COUNT(*) AS total,
|
|
223
|
-
SUM(CASE WHEN status = 'SUCCEED' THEN 1 ELSE 0 END) AS success,
|
|
224
|
-
SUM(CASE WHEN status = 'FAILED' THEN 1 ELSE 0 END) AS failed
|
|
225
|
-
FROM information_schema.materialized_view_refresh_history
|
|
226
|
-
WHERE materialized_view_name = 'my_dt'
|
|
227
|
-
AND pt_date >= DATE_FORMAT(DATEADD(DAY, -7, CURRENT_DATE()), '%Y-%m-%d')
|
|
228
|
-
GROUP BY pt_date
|
|
229
|
-
ORDER BY pt_date;
|
|
230
|
-
|
|
231
|
-
-- 查看消耗 CRU 最多的刷新
|
|
232
|
-
SELECT materialized_view_name, cru, start_time, end_time
|
|
233
|
-
FROM information_schema.materialized_view_refresh_history
|
|
234
|
-
WHERE pt_date >= '2025-01-01'
|
|
235
|
-
ORDER BY cru DESC
|
|
236
|
-
LIMIT 10;
|
|
237
|
-
```
|
|
238
|
-
|
|
239
|
-
### 与 information_schema.job_history 的区别
|
|
240
|
-
|
|
241
|
-
`information_schema.job_history` 记录所有类型的 Job(SQL 查询、DML、DDL 等),而 `materialized_view_refresh_history` 专门记录 DT/MV 的刷新历史,字段更有针对性。
|
|
242
|
-
|
|
243
|
-
如果需要查看刷新 Job 的完整信息(如 job_text、input_bytes 等),可以通过 job_id 关联:
|
|
244
|
-
|
|
245
|
-
```sql
|
|
246
|
-
-- 通过 SHOW DYNAMIC TABLE REFRESH HISTORY 获取 job_id,再到 job_history 查详情
|
|
247
|
-
SELECT *
|
|
248
|
-
FROM information_schema.job_history
|
|
249
|
-
WHERE job_id = '<从 SHOW REFRESH HISTORY 获取的 job_id>'
|
|
250
|
-
AND pt_date = '2025-01-15';
|
|
251
|
-
```
|
|
252
|
-
|
|
253
|
-
---
|
|
254
|
-
|
|
255
|
-
## 三种方式对比
|
|
256
|
-
|
|
257
|
-
| 特性 | SHOW REFRESH HISTORY | DESC HISTORY | information_schema |
|
|
258
|
-
|------|---------------------|--------------|-------------------|
|
|
259
|
-
| 粒度 | 刷新作业级别 | 表版本级别 | 刷新作业级别 |
|
|
260
|
-
| 刷新模式(增量/全量/无数据) | ✅ refresh_mode | ❌ | ❌ |
|
|
261
|
-
| 触发方式(调度/手动) | ✅ refresh_trigger | ❌ | ❌ |
|
|
262
|
-
| 写入统计(inserted/deleted) | ✅ stats | ❌ | ❌ |
|
|
263
|
-
| 源表列表 | ✅ 表名级别 | ✅ 含版本/sequence/commit_time | ❌ |
|
|
264
|
-
| 版本号/总行数/总字节数 | ❌ | ✅ version/total_rows/total_bytes | ❌ |
|
|
265
|
-
| CRU 消耗 | ❌ | ❌ | ✅ cru |
|
|
266
|
-
| 跨表批量查询 | ❌(单表) | ❌(单表) | ✅(可批量) |
|
|
267
|
-
| compaction 记录 | ❌ | ✅ | ❌ |
|
|
268
|
-
| 适用场景 | 排查增量是否生效、刷新状态 | 查看数据版本变化、追溯源表版本 | 批量分析/监控/CRU 统计 |
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
# Dynamic Table SQL 限制与支持矩阵
|
|
2
|
-
|
|
3
|
-
本文档列出 Dynamic Table 增量计算支持和不支持的 SQL 模式。
|
|
4
|
-
|
|
5
|
-
## JOIN 类型支持
|
|
6
|
-
|
|
7
|
-
| JOIN 类型 | 增量支持 | 说明 |
|
|
8
|
-
|-----------|---------|------|
|
|
9
|
-
| INNER JOIN | ✅ | 完全支持 |
|
|
10
|
-
| LEFT JOIN (LEFT OUTER) | ✅ | 完全支持 |
|
|
11
|
-
| RIGHT JOIN (RIGHT OUTER) | ✅ | 完全支持 |
|
|
12
|
-
| FULL OUTER JOIN | ✅ | 完全支持 |
|
|
13
|
-
| LEFT SEMI JOIN | ✅ | 完全支持 |
|
|
14
|
-
| LEFT ANTI JOIN | ✅ | 完全支持 |
|
|
15
|
-
|
|
16
|
-
## 聚合函数支持
|
|
17
|
-
|
|
18
|
-
### 支持增量计算的聚合函数
|
|
19
|
-
|
|
20
|
-
- `SUM`, `SUM0`, `COUNT`, `COUNT_IF`, `MIN`, `MAX`, `MIN_BY`, `MAX_BY`
|
|
21
|
-
- `AVG`, `STDDEV_SAMP`, `STDDEV_POP`, `VAR_SAMP`, `VAR_POP`
|
|
22
|
-
- `Percentile`, `Median`, `COUNT_DISTINCT`
|
|
23
|
-
- `BIT_OR`, `BIT_AND`, `BIT_XOR`, `BOOL_OR`, `BOOL_AND`
|
|
24
|
-
- `GROUP_BITMAP` 系列
|
|
25
|
-
- `COLLECT_SET`, `COLLECT_LIST`, `COLLECT_SET_ON_ARRAY`, `COLLECT_LIST_ON_ARRAY`
|
|
26
|
-
- `MAP_AGG`, `WM_CONCAT`
|
|
27
|
-
|
|
28
|
-
### 结果不稳定的聚合函数(增量结果可能与全量不一致)
|
|
29
|
-
|
|
30
|
-
- `ANY_VALUE`, `FIRST_VALUE`, `LAST_VALUE`
|
|
31
|
-
- `APPROX_COUNT_DISTINCT`, `APPROX_HISTOGRAM`, `APPROX_TOP_K`, `APPROX_PERCENTILE`
|
|
32
|
-
- `JSON_MERGE_AGG`
|
|
33
|
-
|
|
34
|
-
## 窗口函数支持
|
|
35
|
-
|
|
36
|
-
### 支持的窗口函数
|
|
37
|
-
|
|
38
|
-
- `ROW_NUMBER`, `RANK`, `DENSE_RANK`, `PERCENT_RANK`
|
|
39
|
-
- `FIRST_VALUE`, `LAST_VALUE`, `NTH_VALUE`
|
|
40
|
-
- `COUNT`, `SUM`, `SUM0`, `MIN`, `MAX`, `AVG`
|
|
41
|
-
- `LEAD`, `LAG`, `CUME_DIST`, `NTILE`
|
|
42
|
-
- `COLLECT_LIST`, `COLLECT_SET`, `COLLECT_SET_ON_ARRAY`, `COLLECT_LIST_ON_ARRAY`
|
|
43
|
-
|
|
44
|
-
## ORDER BY / LIMIT / OFFSET
|
|
45
|
-
|
|
46
|
-
支持 `ORDER BY`、`LIMIT`、`OFFSET` 语法。
|
|
47
|
-
|
|
48
|
-
⚠️ 不建议在 DT 中使用全局 `ORDER BY`。全局排序在每次增量刷新时开销非常大,推荐将排序逻辑放在下游查询数据时执行,而非 ETL 建模阶段。
|
|
49
|
-
|
|
50
|
-
## 非确定性函数
|
|
51
|
-
|
|
52
|
-
非确定性函数(如 `NOW()`、`CURRENT_TIMESTAMP`、`CURRENT_DATE`、`random()` 等)在不参与计算逻辑时默认支持。具体来说,只要这些函数不出现在以下位置,就可以正常使用:
|
|
53
|
-
- 窗口函数的 `PARTITION BY` key
|
|
54
|
-
- `JOIN` key
|
|
55
|
-
- `GROUP BY` key
|
|
56
|
-
- 其他函数的入参
|
|
57
|
-
|
|
58
|
-
典型场景:在 SELECT 中直接输出数据处理时间,记录每条数据被 DT 刷新处理的时刻:
|
|
59
|
-
|
|
60
|
-
```sql
|
|
61
|
-
CREATE DYNAMIC TABLE order_with_process_time AS
|
|
62
|
-
SELECT
|
|
63
|
-
id,
|
|
64
|
-
amount,
|
|
65
|
-
status,
|
|
66
|
-
CURRENT_TIMESTAMP AS process_time -- 记录刷新时的处理时间,直接输出到目标表
|
|
67
|
-
FROM orders
|
|
68
|
-
WHERE status = 'completed';
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
时间函数会在每次 REFRESH 时被常量折叠为当次刷新的时间戳。
|
|
72
|
-
|
|
73
|
-
## UDF / UDAF / UDTF
|
|
74
|
-
|
|
75
|
-
自定义函数需要在创建时声明为确定性函数(deterministic),才能在 DT 中使用增量计算。未声明确定性的自定义函数会导致增量计算被禁用。
|
|
76
|
-
|
|
77
|
-
## 源表类型限制
|
|
78
|
-
|
|
79
|
-
- **虚拟视图(VIEW)**:不能作为 DT 的输入表,会禁用增量计算
|
|
80
|
-
- **外部表(External Table)**:不支持增量计算
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: dynamic-table-alter
|
|
3
|
-
description: |
|
|
4
|
-
修改 ClickZetta 动态表(Dynamic Table)的结构和属性。支持直接 ALTER 操作(suspend、resume、
|
|
5
|
-
rename_column、set_comment、set_column_comment、set/unset properties)以及 CREATE OR REPLACE
|
|
6
|
-
重建操作(修改调度周期、计算集群、加列、减列、改列类型、改 SQL 定义)。当用户说"修改动态表"、
|
|
7
|
-
"动态表加列"、"改刷新间隔"、"暂停动态表"时触发。
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
# 动态表修改工作流
|
|
11
|
-
|
|
12
|
-
## 指令
|
|
13
|
-
|
|
14
|
-
### 步骤 1:确认动态表存在并获取当前定义
|
|
15
|
-
使用 `read_query` 执行 `SHOW CREATE TABLE schema_name.table_name` 获取动态表当前定义。
|
|
16
|
-
如果不确定是否为动态表,先用 `SHOW TABLES WHERE is_dynamic` 查看列表。
|
|
17
|
-
|
|
18
|
-
### 步骤 2:判断操作类型并选择执行方式
|
|
19
|
-
|
|
20
|
-
ClickZetta 动态表的修改操作分为两类:
|
|
21
|
-
|
|
22
|
-
**A. 直接 ALTER 操作**(6种,可直接执行):
|
|
23
|
-
|
|
24
|
-
1. **suspend** — 暂停调度任务:
|
|
25
|
-
```sql
|
|
26
|
-
ALTER DYNAMIC TABLE dt_name SUSPEND;
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
2. **resume** — 启动调度任务:
|
|
30
|
-
```sql
|
|
31
|
-
ALTER DYNAMIC TABLE dt_name RESUME;
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
3. **set_comment** — 修改表注释:
|
|
35
|
-
```sql
|
|
36
|
-
ALTER DYNAMIC TABLE dt_name SET COMMENT 'comment';
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
4. **rename_column** — 修改列名:
|
|
40
|
-
```sql
|
|
41
|
-
ALTER DYNAMIC TABLE dt_name RENAME COLUMN old_name TO new_name;
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
5. **set_column_comment** — 修改列注释(注意用 CHANGE COLUMN):
|
|
45
|
-
```sql
|
|
46
|
-
ALTER DYNAMIC TABLE dt_name CHANGE COLUMN column_name COMMENT 'comment';
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
6. **set/unset properties** — 修改表属性(目前为保留参数):
|
|
50
|
-
```sql
|
|
51
|
-
-- 设置属性
|
|
52
|
-
ALTER DYNAMIC TABLE dt_name SET PROPERTIES('key' = 'value');
|
|
53
|
-
-- 删除属性
|
|
54
|
-
ALTER DYNAMIC TABLE dt_name UNSET PROPERTIES('key');
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
**B. CREATE OR REPLACE 操作**(6种,需要重建动态表):
|
|
58
|
-
|
|
59
|
-
> ⚠️ **以下操作不支持 ALTER 语法**。`ALTER DYNAMIC TABLE ... SET REFRESH INTERVAL` 等语法不存在,会报语法错误。必须使用 `CREATE OR REPLACE DYNAMIC TABLE` 重建。
|
|
60
|
-
|
|
61
|
-
这些操作涉及 SQL 查询逻辑变化,无法通过 ALTER 直接完成:
|
|
62
|
-
|
|
63
|
-
7. **修改调度周期** — ❌ 不支持 `ALTER ... SET REFRESH INTERVAL`
|
|
64
|
-
8. **修改计算集群** — ❌ 不支持 `ALTER ... SET VCLUSTER`
|
|
65
|
-
9. **增加列**
|
|
66
|
-
10. **减列**
|
|
67
|
-
11. **修改列类型**
|
|
68
|
-
12. **修改 SQL 定义**
|
|
69
|
-
|
|
70
|
-
### 步骤 3:执行 CREATE OR REPLACE 重建(仅 B 类操作)
|
|
71
|
-
|
|
72
|
-
1. 用 `read_query` 执行 `SHOW CREATE TABLE schema_name.table_name` 获取原始 DDL
|
|
73
|
-
> ⚠️ `SHOW CREATE TABLE` 不支持 LIMIT/WHERE 子句,直接执行即可
|
|
74
|
-
2. 解析出:列定义、REFRESH 子句、AS SELECT 子句、COMMENT 等
|
|
75
|
-
3. 根据操作修改对应部分
|
|
76
|
-
4. 用 `write_query` 执行重建 SQL
|
|
77
|
-
|
|
78
|
-
**关于全量刷新的触发**:
|
|
79
|
-
- 简单的删除列 / 添加列(添加的列只是从源表 SELECT 透传,不参与 JOIN key、GROUP key 等计算)→ **增量刷新**
|
|
80
|
-
- 涉及计算逻辑变化(修改 WHERE 条件、修改聚合逻辑、新增列参与计算等)→ **全量刷新**
|
|
81
|
-
- 兼容类型变更(如 INT → BIGINT)→ **增量刷新**
|
|
82
|
-
|
|
83
|
-
### 步骤 4:验证修改结果
|
|
84
|
-
使用 `DESC TABLE dt_name` 确认修改生效。
|
|
85
|
-
|
|
86
|
-
---
|
|
87
|
-
|
|
88
|
-
## 示例
|
|
89
|
-
|
|
90
|
-
### 示例 1:修改调度周期
|
|
91
|
-
|
|
92
|
-
```sql
|
|
93
|
-
-- 原表
|
|
94
|
-
CREATE DYNAMIC TABLE dt_name
|
|
95
|
-
REFRESH INTERVAL 10 MINUTE vcluster DEFAULT
|
|
96
|
-
AS SELECT * FROM student02;
|
|
97
|
-
|
|
98
|
-
-- 修改后(改为 20 分钟)
|
|
99
|
-
CREATE OR REPLACE DYNAMIC TABLE dt_name
|
|
100
|
-
REFRESH INTERVAL 20 MINUTE vcluster DEFAULT
|
|
101
|
-
AS SELECT * FROM student02;
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
### 示例 2:修改计算集群
|
|
105
|
-
|
|
106
|
-
```sql
|
|
107
|
-
-- 原表
|
|
108
|
-
CREATE DYNAMIC TABLE dt_name
|
|
109
|
-
REFRESH INTERVAL 10 MINUTE vcluster DEFAULT
|
|
110
|
-
AS SELECT * FROM student02;
|
|
111
|
-
|
|
112
|
-
-- 修改后(改为 alter_vc 集群)
|
|
113
|
-
CREATE OR REPLACE DYNAMIC TABLE dt_name
|
|
114
|
-
REFRESH INTERVAL 10 MINUTE vcluster alter_vc
|
|
115
|
-
AS SELECT * FROM student02;
|
|
116
|
-
```
|
|
117
|
-
|
|
118
|
-
### 示例 3:增加列
|
|
119
|
-
|
|
120
|
-
```sql
|
|
121
|
-
-- 原表
|
|
122
|
-
CREATE DYNAMIC TABLE change_table (i, j)
|
|
123
|
-
AS SELECT * FROM dy_base_a;
|
|
124
|
-
|
|
125
|
-
-- 添加一列 col(涉及计算逻辑,下次刷新会全量刷新)
|
|
126
|
-
CREATE OR REPLACE DYNAMIC TABLE change_table (i, j, col)
|
|
127
|
-
AS SELECT i, j, j * 1 FROM dy_base_a;
|
|
128
|
-
|
|
129
|
-
REFRESH DYNAMIC TABLE change_table;
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
### 示例 4:减列
|
|
133
|
-
|
|
134
|
-
```sql
|
|
135
|
-
-- 原表有 i, j 两列
|
|
136
|
-
CREATE DYNAMIC TABLE change_table (i, j)
|
|
137
|
-
AS SELECT * FROM dy_base_a;
|
|
138
|
-
|
|
139
|
-
-- 减列(简单透传,增量刷新)
|
|
140
|
-
CREATE OR REPLACE DYNAMIC TABLE change_table (i)
|
|
141
|
-
AS SELECT i FROM dy_base_a;
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
### 示例 5:修改 SQL 定义
|
|
145
|
-
|
|
146
|
-
```sql
|
|
147
|
-
-- 修改 WHERE 过滤条件(全量刷新)
|
|
148
|
-
CREATE OR REPLACE DYNAMIC TABLE change_table (i, j)
|
|
149
|
-
AS SELECT * FROM dy_base_a WHERE i > 3;
|
|
150
|
-
|
|
151
|
-
REFRESH DYNAMIC TABLE change_table;
|
|
152
|
-
```
|
|
153
|
-
|
|
154
|
-
### 示例 6:修改列类型
|
|
155
|
-
|
|
156
|
-
```sql
|
|
157
|
-
-- INT → BIGINT(兼容类型,增量刷新)
|
|
158
|
-
CREATE OR REPLACE DYNAMIC TABLE change_table (i, j)
|
|
159
|
-
AS SELECT CAST(i AS BIGINT), j FROM dy_base_a;
|
|
160
|
-
|
|
161
|
-
REFRESH DYNAMIC TABLE change_table;
|
|
162
|
-
```
|
|
163
|
-
|
|
164
|
-
---
|
|
165
|
-
|
|
166
|
-
## 平台特有知识
|
|
167
|
-
|
|
168
|
-
- **CHANGE COLUMN 语法**:设置列注释用 `CHANGE COLUMN col COMMENT 'xxx'`,不是 `ALTER COLUMN`
|
|
169
|
-
- **RENAME COLUMN 语法**:`RENAME COLUMN old TO new`
|
|
170
|
-
- **DML 限制**:动态表默认不支持 UPDATE/DELETE/MERGE(因隐藏列 MV__KEY),如需 DML 须先执行 `SET cz.sql.dt.allow.dml = true;`
|
|
171
|
-
- **REFRESH 格式**:`REFRESH INTERVAL <N> MINUTE vcluster <name>`,支持 SECOND/MINUTE/HOUR/DAY
|
|
172
|
-
- **CREATE OR REPLACE 风险**:涉及计算逻辑变化时会触发全量刷新,大表可能耗时较长
|
|
173
|
-
- **schema 前缀**:所有 ALTER/CREATE 语句中表名应包含 schema 前缀
|
|
174
|
-
- **列定义可省略类型**:`CREATE DYNAMIC TABLE dt (i, j) AS SELECT ...` 类型由 SELECT 推断
|
|
175
|
-
- **DROP 语法**:必须用 `DROP DYNAMIC TABLE dt_name`,不能用 `DROP TABLE dt_name`(会报错)
|
|
176
|
-
- **UNDROP 语法**:必须用 `UNDROP TABLE dt_name`,不能用 `UNDROP DYNAMIC TABLE dt_name`
|
|
177
|
-
- **DESC 语法**:动态表用 `DESC TABLE dt_name`,不要写 `DESC DYNAMIC TABLE dt_name EXTENDED`(EXTENDED 不支持)
|
|
178
|
-
|
|
179
|
-
## 故障排除
|
|
180
|
-
|
|
181
|
-
| 错误 | 原因 | 解决方案 |
|
|
182
|
-
|---|---|---|
|
|
183
|
-
| ALTER 报 "Syntax error at or near 'REFRESH'" | `ALTER ... SET REFRESH INTERVAL` 语法不存在 | 使用 `CREATE OR REPLACE DYNAMIC TABLE ... REFRESH INTERVAL ...` 重建 |
|
|
184
|
-
| ALTER 报 "unsupported operation" | 尝试对动态表执行 B 类操作的 ALTER 语法 | 使用 CREATE OR REPLACE 重建 |
|
|
185
|
-
| `DROP TABLE dt_name` 报错 | 动态表必须用 `DROP DYNAMIC TABLE` | 改为 `DROP DYNAMIC TABLE dt_name` |
|
|
186
|
-
| `UNDROP DYNAMIC TABLE` 报错 | UNDROP 不支持 DYNAMIC TABLE 关键字 | 改为 `UNDROP TABLE dt_name` |
|
|
187
|
-
| `DESC DYNAMIC TABLE ... EXTENDED` 报错 | 不支持 EXTENDED 参数 | 改为 `DESC TABLE dt_name`(不加 EXTENDED) |
|
|
188
|
-
| UPDATE/DELETE 报 "MV__KEY" 相关错误 | 动态表有隐藏列 MV__KEY,默认禁止 DML | 先执行 `SET cz.sql.dt.allow.dml = true;` |
|
|
189
|
-
| CREATE OR REPLACE 后数据为空 | AS SELECT 子句引用的源表或列不正确 | 先用 `read_query` 验证 SELECT 子句 |
|
|
190
|
-
| CREATE OR REPLACE 后全量刷新 | 新增列参与了计算逻辑(JOIN key、GROUP key 等) | 预期行为,等待全量刷新完成 |
|
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: clickzetta-external-catalog
|
|
3
|
-
description: |
|
|
4
|
-
配置和使用 ClickZetta Lakehouse External Catalog,实现对 Hive、Iceberg、Databricks 等
|
|
5
|
-
外部数据源的联邦查询(只读)。覆盖完整创建流程:存储连接 → Catalog Connection →
|
|
6
|
-
External Catalog,以及 SHOW/DESC/查询外部表等操作。
|
|
7
|
-
当用户说"外部数据目录"、"External Catalog"、"联邦查询"、"Hive 联邦"、
|
|
8
|
-
"访问 Hive 数据"、"Databricks 联邦"、"Iceberg 联邦"、"跨数据源查询"、
|
|
9
|
-
"不迁移数据直接查询"、"Catalog Connection"时触发。
|
|
10
|
-
Keywords: external catalog, Hive, Iceberg, Databricks, federation, read-only
|
|
11
|
-
---
|
|
12
|
-
|
|
13
|
-
# ClickZetta External Catalog
|
|
14
|
-
|
|
15
|
-
> ⚠️ 创建 External Catalog 需要 `instance_admin` 角色。查询权限可通过 GRANT 授予其他用户。
|
|
16
|
-
|
|
17
|
-
阅读 [references/external-catalog-ddl.md](references/external-catalog-ddl.md) 了解完整语法。
|
|
18
|
-
|
|
19
|
-
## 概述
|
|
20
|
-
|
|
21
|
-
External Catalog 让 Lakehouse 可以**不迁移数据**,直接对外部数据系统(Hive、Iceberg、Databricks)执行只读联邦查询。
|
|
22
|
-
|
|
23
|
-
**支持数据源**:Apache Hive · Iceberg REST Catalog · Databricks Unity Catalog
|
|
24
|
-
|
|
25
|
-
---
|
|
26
|
-
|
|
27
|
-
## 创建流程(三步)
|
|
28
|
-
|
|
29
|
-
### 步骤 1:创建存储连接
|
|
30
|
-
|
|
31
|
-
```sql
|
|
32
|
-
-- 阿里云 OSS
|
|
33
|
-
CREATE STORAGE CONNECTION IF NOT EXISTS catalog_storage_oss
|
|
34
|
-
TYPE OSS
|
|
35
|
-
ACCESS_ID = 'LTAIxxxxxxxxxxxx'
|
|
36
|
-
ACCESS_KEY = 'T8Gexxxxxxmtxxxxxx'
|
|
37
|
-
ENDPOINT = 'oss-cn-hangzhou-internal.aliyuncs.com';
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
### 步骤 2:创建 Catalog Connection
|
|
41
|
-
|
|
42
|
-
```sql
|
|
43
|
-
CREATE CATALOG CONNECTION IF NOT EXISTS hive_catalog_conn
|
|
44
|
-
TYPE hms
|
|
45
|
-
hive_metastore_uris = 'hms-host:9083'
|
|
46
|
-
storage_connection = 'catalog_storage_oss';
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
### 步骤 3:创建 External Catalog
|
|
50
|
-
|
|
51
|
-
```sql
|
|
52
|
-
-- ⚠️ CREATE EXTERNAL CATALOG 不支持 COMMENT 子句,加了会报错
|
|
53
|
-
-- ❌ 错误:CREATE EXTERNAL CATALOG my_hive_catalog CONNECTION hive_catalog_conn COMMENT '...';
|
|
54
|
-
-- ✅ 正确:
|
|
55
|
-
CREATE EXTERNAL CATALOG my_hive_catalog
|
|
56
|
-
CONNECTION hive_catalog_conn;
|
|
57
|
-
|
|
58
|
-
-- 如需带选项(如 Iceberg REST):
|
|
59
|
-
CREATE EXTERNAL CATALOG my_iceberg_catalog
|
|
60
|
-
CONNECTION iceberg_conn
|
|
61
|
-
OPTIONS ('key1' = 'value1', 'key2' = 'value2');
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
---
|
|
65
|
-
|
|
66
|
-
## 验证连通性
|
|
67
|
-
|
|
68
|
-
```sql
|
|
69
|
-
-- 查看 Schema 列表(验证连通)
|
|
70
|
-
SHOW SCHEMAS IN my_hive_catalog;
|
|
71
|
-
|
|
72
|
-
-- 查看表列表
|
|
73
|
-
SHOW TABLES IN my_hive_catalog.my_schema;
|
|
74
|
-
|
|
75
|
-
-- 查询数据
|
|
76
|
-
SELECT * FROM my_hive_catalog.my_schema.my_table LIMIT 10;
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
---
|
|
80
|
-
|
|
81
|
-
## 查看与管理
|
|
82
|
-
|
|
83
|
-
```sql
|
|
84
|
-
-- 列出所有 Catalog
|
|
85
|
-
SHOW CATALOGS;
|
|
86
|
-
|
|
87
|
-
-- 查看 Catalog 详情
|
|
88
|
-
DESC CATALOG my_hive_catalog;
|
|
89
|
-
|
|
90
|
-
-- 查看表结构
|
|
91
|
-
DESC TABLE my_hive_catalog.my_schema.my_table;
|
|
92
|
-
|
|
93
|
-
-- 删除 Catalog
|
|
94
|
-
DROP CATALOG IF EXISTS my_hive_catalog;
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
---
|
|
98
|
-
|
|
99
|
-
## 联邦查询示例
|
|
100
|
-
|
|
101
|
-
```sql
|
|
102
|
-
-- 外部 Hive 表 JOIN 内部 Lakehouse 表
|
|
103
|
-
SELECT h.order_id, h.amount, d.region_name
|
|
104
|
-
FROM my_hive_catalog.sales.orders h
|
|
105
|
-
JOIN public.dim_region d ON h.region_id = d.id
|
|
106
|
-
WHERE h.order_date >= '2024-01-01';
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
⚠️ 必须使用三层命名空间语法:`catalog.schema.table`
|
|
110
|
-
|
|
111
|
-
---
|
|
112
|
-
|
|
113
|
-
## 常见问题
|
|
114
|
-
|
|
115
|
-
| 问题 | 原因 | 解决方案 |
|
|
116
|
-
|---|---|---|
|
|
117
|
-
| 无法连接 HMS | 网络未打通 | 通过 PrivateLink 打通 Lakehouse 与 HMS 服务器网络 |
|
|
118
|
-
| 权限不足 | 非 instance_admin | 联系管理员授予 instance_admin 角色 |
|
|
119
|
-
| 查询报错找不到表 | 未使用三层语法 | 使用 `catalog.schema.table` 格式 |
|
|
120
|
-
| Databricks 连接失败 | 不在同一云平台 | 确保 Databricks 存储与 Lakehouse 在同一云平台 |
|