@clickzetta/cz-cli-darwin-arm64 0.3.78 → 0.3.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/package.json +1 -1
- package/bin/skills/clickzetta-access-control/LICENSE +0 -16
- package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
- package/bin/skills/clickzetta-access-control/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
- package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
- package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
- package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
- package/bin/skills/clickzetta-app-python-sdk/LICENSE +0 -16
- package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
- package/bin/skills/clickzetta-app-python-sdk/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
- package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
- package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
- package/bin/skills/clickzetta-batch-sync-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -227
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-bi-connect/LICENSE +0 -16
- package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
- package/bin/skills/clickzetta-bi-connect/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
- package/bin/skills/clickzetta-cdc-sync-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -633
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-data-ingest-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -237
- package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-data-retention/LICENSE +0 -16
- package/bin/skills/clickzetta-data-retention/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-retention/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-data-retention/references/lifecycle-reference.md +0 -175
- package/bin/skills/clickzetta-data-science/LICENSE +0 -16
- package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
- package/bin/skills/clickzetta-data-science/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
- package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
- package/bin/skills/clickzetta-data-sharing/LICENSE +0 -16
- package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
- package/bin/skills/clickzetta-data-sharing/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
- package/bin/skills/clickzetta-dba-guide/LICENSE +0 -16
- package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -542
- package/bin/skills/clickzetta-dba-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-dw-modeling/LICENSE +0 -16
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -351
- package/bin/skills/clickzetta-dw-modeling/eval_cases.jsonl +0 -4
- package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
- package/bin/skills/clickzetta-dynamic-table/LICENSE +0 -16
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -230
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -253
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -427
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
- package/bin/skills/clickzetta-dynamic-table/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/SKILL.md +0 -27
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-column-validation-rules.md +0 -118
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-conversion-rules.md +0 -225
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-placeholder-rules.md +0 -182
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-refresh-rules.md +0 -98
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-self-reference-rules.md +0 -76
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-workflow.md +0 -109
- package/bin/skills/clickzetta-external-catalog/LICENSE +0 -16
- package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -123
- package/bin/skills/clickzetta-external-catalog/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
- package/bin/skills/clickzetta-external-function/LICENSE +0 -16
- package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +0 -4
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
- package/bin/skills/clickzetta-file-import-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -190
- package/bin/skills/clickzetta-file-import-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-index-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
- package/bin/skills/clickzetta-index-manager/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
- package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
- package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
- package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
- package/bin/skills/clickzetta-java-sdk/LICENSE +0 -16
- package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
- package/bin/skills/clickzetta-kafka-ingest-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -769
- package/bin/skills/clickzetta-kafka-ingest-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
- package/bin/skills/clickzetta-lakehouse-connect/LICENSE +0 -16
- package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
- package/bin/skills/clickzetta-lakehouse-connect/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
- package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
- package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
- package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
- package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
- package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
- package/bin/skills/clickzetta-manage-comments/LICENSE +0 -16
- package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
- package/bin/skills/clickzetta-manage-comments/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-metadata/LICENSE +0 -16
- package/bin/skills/clickzetta-metadata/SKILL.md +0 -502
- package/bin/skills/clickzetta-metadata/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-metadata/references/instance-views-reference.md +0 -276
- package/bin/skills/clickzetta-metadata/references/metering-views-reference.md +0 -137
- package/bin/skills/clickzetta-metadata/references/show-desc-reference.md +0 -326
- package/bin/skills/clickzetta-metadata/references/views-reference.md +0 -271
- package/bin/skills/clickzetta-monitoring/LICENSE +0 -16
- package/bin/skills/clickzetta-monitoring/SKILL.md +0 -215
- package/bin/skills/clickzetta-monitoring/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
- package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
- package/bin/skills/clickzetta-oss-ingest-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -562
- package/bin/skills/clickzetta-oss-ingest-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-overview/LICENSE +0 -16
- package/bin/skills/clickzetta-overview/SKILL.md +0 -102
- package/bin/skills/clickzetta-overview/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-overview/references/brands-and-endpoints.md +0 -79
- package/bin/skills/clickzetta-overview/references/object-model.md +0 -311
- package/bin/skills/clickzetta-overview/references/studio-modules.md +0 -173
- package/bin/skills/clickzetta-pipeline-review/LICENSE +0 -16
- package/bin/skills/clickzetta-pipeline-review/SKILL.md +0 -377
- package/bin/skills/clickzetta-query-optimizer/LICENSE +0 -16
- package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
- package/bin/skills/clickzetta-query-optimizer/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
- package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
- package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
- package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
- package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
- package/bin/skills/clickzetta-realtime-sync-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -323
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-semantic-view/LICENSE +0 -16
- package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
- package/bin/skills/clickzetta-spark-flink-connector/LICENSE +0 -16
- package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
- package/bin/skills/clickzetta-spark-flink-connector/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
- package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
- package/bin/skills/clickzetta-sql-pipeline-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -485
- package/bin/skills/clickzetta-sql-pipeline-manager/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
- package/bin/skills/clickzetta-sql-syntax-guide/LICENSE +0 -16
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- package/bin/skills/clickzetta-studio-task-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +0 -652
- package/bin/skills/clickzetta-table-lineage/LICENSE +0 -16
- package/bin/skills/clickzetta-table-lineage/SKILL.md +0 -90
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +0 -1
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +0 -14
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +0 -38
- package/bin/skills/clickzetta-table-lineage/references/table_lineage_standalone.html +0 -562
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +0 -25
- package/bin/skills/clickzetta-table-stream-pipeline/LICENSE +0 -16
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
- package/bin/skills/clickzetta-table-stream-pipeline/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-vcluster-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
- package/bin/skills/clickzetta-vcluster-manager/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
- package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
- package/bin/skills/clickzetta-volume-manager/LICENSE +0 -16
- package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
- package/bin/skills/clickzetta-volume-manager/eval_cases.jsonl +0 -5
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
- package/bin/skills/clickzetta-zettapark/LICENSE +0 -16
- package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
- package/bin/skills/clickzetta-zettapark/eval_cases.jsonl +0 -12
- package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
- package/bin/skills/cz-cli/SKILL.md +0 -311
- package/bin/skills/cz-cli/references/profile-setup.md +0 -120
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
ClickZetta Skills License
|
|
2
|
-
© 2026 Yunqi Inc. All rights reserved.
|
|
3
|
-
LICENSE: Use of these materials (including all code, prompts, assets, files, and other components of these skills (collectively, "Skills")) is governed by your agreement with ClickZetta for the Service. If no separate agreement exists, use is governed by ClickZetta's Terms of Service (available at: https://yunqi.tech/documents/user-aggrement).
|
|
4
|
-
Your applicable agreement is referred to as the "Agreement." "Service" is as defined in the Agreement.
|
|
5
|
-
ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the contrary, you may not:
|
|
6
|
-
|
|
7
|
-
Extract from the Service or retain copies of the Skills outside use with the Service;
|
|
8
|
-
Reproduce or copy the Skills, except for temporary copies created automatically during authorized use of the Service;
|
|
9
|
-
Create derivative works based on the Skills;
|
|
10
|
-
Distribute, sublicense, or transfer the Skills to any third party;
|
|
11
|
-
Make, offer to sell, sell, or import any inventions embodied in the Skills; nor,
|
|
12
|
-
Reverse engineer, decompile, or disassemble the Skills.
|
|
13
|
-
|
|
14
|
-
The receipt, viewing, or possession of the Skills does not convey or imply any license or right beyond those expressly granted above.
|
|
15
|
-
Yunqi retains all rights, title, and interest in the Skills, including all copyrights, trademarks, patents, and all other applicable intellectual property rights.
|
|
16
|
-
THE SKILLS ARE PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SKILLS OR THE USE OR OTHER DEALINGS IN THE SKILLS.
|
|
@@ -1,351 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: clickzetta-dw-modeling
|
|
3
|
-
description: |
|
|
4
|
-
ClickZetta Lakehouse 数仓建模向导。先自主探索用户的数据现状,再给出有依据的
|
|
5
|
-
具体建议让用户选择,而不是让用户填空回答问卷。
|
|
6
|
-
覆盖三种分层模式:传统数仓分层(ODS/DWD/DWS/ADS)、大奖牌架构(Bronze/Silver/Gold)、
|
|
7
|
-
混合模式。数据管道与建模一体化设计,DDL 和管道配置同步输出。
|
|
8
|
-
核心原则:聚合计算层使用 Dynamic Table,不推荐物化视图。
|
|
9
|
-
当用户说"数仓建模"、"分层设计"、"建模方案"、"ODS/DWD/DWS"、"Medallion"、
|
|
10
|
-
"Bronze/Silver/Gold"、"事实表"、"维度表"、"宽表设计"、"星型模型"、"雪花模型"、
|
|
11
|
-
"分层架构"、"数据分层"、"建模向导"、"怎么设计表结构"、"数仓架构"、
|
|
12
|
-
"数据管道设计"、"数据流转"、"端到端数仓搭建"时触发。
|
|
13
|
-
Keywords: data warehouse, modeling, star schema, medallion, ODS, DWD, DWS, ADS, layering
|
|
14
|
-
---
|
|
15
|
-
|
|
16
|
-
# ClickZetta 数仓建模向导
|
|
17
|
-
|
|
18
|
-
阅读 [references/modeling-patterns.md](references/modeling-patterns.md) 了解各分层模式的详细模板。
|
|
19
|
-
|
|
20
|
-
---
|
|
21
|
-
|
|
22
|
-
## 工作模式:先探索,再建议
|
|
23
|
-
|
|
24
|
-
**不要问问卷式问题。先动手看数据,再给出有依据的选择题。**
|
|
25
|
-
|
|
26
|
-
用户最多只需要回答 2 个问题:
|
|
27
|
-
1. 选择给出的方案选项(A/B/C)
|
|
28
|
-
2. 补充看不到的信息(业务用途、查询场景)
|
|
29
|
-
|
|
30
|
-
---
|
|
31
|
-
|
|
32
|
-
## 第一阶段:自主探索数据现状
|
|
33
|
-
|
|
34
|
-
收到建模需求后,立即执行以下探索,**不要先问用户任何问题**:
|
|
35
|
-
|
|
36
|
-
```sql
|
|
37
|
-
-- Step 1: 看有哪些 schema
|
|
38
|
-
SHOW SCHEMAS;
|
|
39
|
-
|
|
40
|
-
-- Step 2: 看各 schema 下的表(对每个看起来有业务数据的 schema 执行)
|
|
41
|
-
SHOW TABLES IN <schema>;
|
|
42
|
-
|
|
43
|
-
-- Step 3: 查表大小和行数(先 describe_table 确认字段名)
|
|
44
|
-
SELECT table_schema, table_name, table_type,
|
|
45
|
-
ROUND(bytes/1024.0/1024/1024, 2) AS size_gb,
|
|
46
|
-
row_count,
|
|
47
|
-
last_modify_time
|
|
48
|
-
FROM information_schema.tables
|
|
49
|
-
WHERE table_type = 'MANAGED_TABLE'
|
|
50
|
-
ORDER BY bytes DESC NULLS LAST
|
|
51
|
-
LIMIT 20;
|
|
52
|
-
|
|
53
|
-
-- Step 4: 对最大的 2-3 张表抽样,了解字段和数据特征
|
|
54
|
-
SELECT * FROM <schema>.<table> LIMIT 5;
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
**探索时的判断逻辑:**
|
|
58
|
-
|
|
59
|
-
| 观察到的特征 | 推断 |
|
|
60
|
-
|---|---|
|
|
61
|
-
| 表名含 order/user/product/trade | 业务库原始数据,适合做 ODS/Bronze |
|
|
62
|
-
| 表名含 log/event/track/click | 埋点/日志数据,数据量大,需要分区 |
|
|
63
|
-
| 表名含 dw/ods/dwd/dws/ads | 已有分层,评估现有结构是否合理 |
|
|
64
|
-
| 表名含 tmp/temp/bak | 临时表,不纳入建模范围 |
|
|
65
|
-
| 字段含 _op/_ts/binlog | CDC 同步过来的数据 |
|
|
66
|
-
| 字段含 event_time/log_time | 时序数据,按时间分区 |
|
|
67
|
-
| 单表 > 10GB | 需要分区+分桶 |
|
|
68
|
-
|
|
69
|
-
---
|
|
70
|
-
|
|
71
|
-
## 第二阶段:给出有依据的建议
|
|
72
|
-
|
|
73
|
-
基于探索结果,向用户呈现三部分内容:
|
|
74
|
-
|
|
75
|
-
### 1. 数据现状摘要(自主总结,不问用户)
|
|
76
|
-
|
|
77
|
-
```
|
|
78
|
-
我看了一下你的数据:
|
|
79
|
-
- `raw` schema:orders(2.3GB/1200万行)、users(450MB)、products(120MB)
|
|
80
|
-
→ 字段特征像是从 MySQL 同步的业务库,orders 有 _op/_ts 字段(CDC 接入)
|
|
81
|
-
- `events` schema:user_events(18GB/8亿行)
|
|
82
|
-
→ 字段含 event_time、event_type,是埋点日志数据
|
|
83
|
-
- 没有发现已有的分层结构
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
### 2. 方案选项(给 A/B 或 A/B/C,不超过 3 个)
|
|
87
|
-
|
|
88
|
-
```
|
|
89
|
-
基于以上数据,建议两个方向:
|
|
90
|
-
|
|
91
|
-
A. 传统数仓分层
|
|
92
|
-
raw → ODS(现有数据直接复用)
|
|
93
|
-
新建 DWD(清洗标准化)+ DWS(聚合,用 Dynamic Table)+ ADS(指标输出)
|
|
94
|
-
适合:BI 报表为主,有明确的指标体系需求
|
|
95
|
-
|
|
96
|
-
B. 大奖牌架构(Medallion)
|
|
97
|
-
raw → Bronze(现有数据直接复用)
|
|
98
|
-
新建 Silver(标准化)+ Gold(指标,用 Dynamic Table)
|
|
99
|
-
适合:多场景复用,既做 BI 又做数据科学
|
|
100
|
-
```
|
|
101
|
-
|
|
102
|
-
### 3. 只问一个问题
|
|
103
|
-
|
|
104
|
-
```
|
|
105
|
-
你们主要用这些数据做什么?
|
|
106
|
-
- BI 报表(固定报表,指标体系明确)→ 推荐 A
|
|
107
|
-
- 多场景(报表+分析+数据科学)→ 推荐 B
|
|
108
|
-
- 实时看板(分钟级延迟)→ 告诉我,方案会有调整
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
### 4. 方案确认时主动提示成本注意事项
|
|
112
|
-
|
|
113
|
-
给出方案选项时,提醒用户以下成本相关决策点,让用户做知情选择:
|
|
114
|
-
|
|
115
|
-
- **Dynamic Table 刷新频率**:按业务时效性选择,不要默认最高频率。T+1 用 `1 DAY`,小时级用 `1 HOUR`,分钟级用 `10~30 MINUTE`
|
|
116
|
-
- **分层数量**:评估 DWS 和 ADS 是否都必要,每多一层 DT 就多一份刷新成本
|
|
117
|
-
- **VCluster 规格**:建议从小规格开始,按需扩容
|
|
118
|
-
|
|
119
|
-
> 具体 CRU 消耗无法在方案设计阶段精确预估,上线后加载 `clickzetta-cost-management` skill 监控实际消耗,按需调整刷新频率和集群规格。
|
|
120
|
-
|
|
121
|
-
---
|
|
122
|
-
|
|
123
|
-
## 第三阶段:方案确认后的完整输出
|
|
124
|
-
|
|
125
|
-
用户选择方向后,**一次性给出完整方案**,不再追问:
|
|
126
|
-
|
|
127
|
-
### 分层结构设计
|
|
128
|
-
|
|
129
|
-
根据选择的模式,给出各层定义、表类型推荐:
|
|
130
|
-
|
|
131
|
-
**传统分层表类型:**
|
|
132
|
-
|
|
133
|
-
| 层次 | 推荐表类型 | 说明 |
|
|
134
|
-
|---|---|---|
|
|
135
|
-
| ODS | 内部表 | 贴源,不转换 |
|
|
136
|
-
| DWD | 内部表 | 清洗标准化 |
|
|
137
|
-
| DWS | **Dynamic Table** | 增量聚合,自动刷新 |
|
|
138
|
-
| ADS | **Dynamic Table** | 面向应用,按需刷新 |
|
|
139
|
-
|
|
140
|
-
**Medallion 表类型:**
|
|
141
|
-
|
|
142
|
-
| 层次 | 推荐表类型 | 说明 |
|
|
143
|
-
|---|---|---|
|
|
144
|
-
| Bronze | 内部表 | 零转换,保留原始 |
|
|
145
|
-
| Silver | 内部表 或 Dynamic Table | 清洗标准化 |
|
|
146
|
-
| Gold | **Dynamic Table** | 聚合指标,自动刷新 |
|
|
147
|
-
|
|
148
|
-
> ⚠️ 聚合层**不推荐物化视图**,使用 Dynamic Table:CBO 增量计算,只刷新变化分区,支持 Time Travel。
|
|
149
|
-
|
|
150
|
-
### 数据接入管道
|
|
151
|
-
|
|
152
|
-
根据探索到的数据源特征,直接给出管道推荐(不再问用户):
|
|
153
|
-
|
|
154
|
-
| 数据源特征 | 推荐管道 | 对应 skill |
|
|
155
|
-
|---|---|---|
|
|
156
|
-
| 有 _op/_ts 字段(CDC) | CDC 同步 | `clickzetta-cdc-sync-pipeline` |
|
|
157
|
-
| Kafka 消息数据 | Kafka Pipe | `clickzetta-kafka-ingest-pipeline` |
|
|
158
|
-
| OSS/S3 文件 | OSS Pipe | `clickzetta-oss-ingest-pipeline` |
|
|
159
|
-
| 普通数据库表(无 CDC 标记) | 批量同步 | `clickzetta-batch-sync-pipeline` |
|
|
160
|
-
|
|
161
|
-
**ODS/Bronze 层表结构调整(根据管道类型):**
|
|
162
|
-
- CDC 接入 → 保留 `_op`(I/U/D)和 `_ts` 字段,不要删除
|
|
163
|
-
- 批量接入 → 增加 `dw_batch_date` 标记批次
|
|
164
|
-
- Kafka 接入 → JSON 消息用 `STRING` 或 `MAP<STRING,STRING>` 存储
|
|
165
|
-
|
|
166
|
-
### 分区与分桶策略
|
|
167
|
-
|
|
168
|
-
根据探索到的表大小自动推荐:
|
|
169
|
-
|
|
170
|
-
```sql
|
|
171
|
-
-- 单表 < 1GB:不分区
|
|
172
|
-
-- 单表 1GB-100GB:按天分区
|
|
173
|
-
PARTITIONED BY (days(event_date))
|
|
174
|
-
|
|
175
|
-
-- 单表 > 100GB:按天分区 + 分桶
|
|
176
|
-
PARTITIONED BY (days(event_date))
|
|
177
|
-
CLUSTERED BY (user_id) INTO 32 BUCKETS
|
|
178
|
-
```
|
|
179
|
-
|
|
180
|
-
注意:ClickZetta 分区用 `PARTITIONED BY (days(col))`,不是 `PARTITIONED BY (col)`。
|
|
181
|
-
|
|
182
|
-
### 层间流转
|
|
183
|
-
|
|
184
|
-
```
|
|
185
|
-
ODS/Bronze → DWD/Silver:SQL 任务(Studio 调度,清洗逻辑需手动控制)
|
|
186
|
-
DWD/Silver → DWS/Gold:Dynamic Table(REFRESH INTERVAL 控制延迟,自动增量)
|
|
187
|
-
DWS → ADS:Dynamic Table 或直接查询
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
加载 `clickzetta-sql-pipeline-manager` 获取 Dynamic Table 详细语法。
|
|
191
|
-
|
|
192
|
-
### 建管分离原则(重要)
|
|
193
|
-
|
|
194
|
-
Studio 任务按类型严格区分,**不同类型任务的调度策略完全不同**:
|
|
195
|
-
|
|
196
|
-
| 任务类型 | 示例 | 调度配置 | 说明 |
|
|
197
|
-
|---|---|---|---|
|
|
198
|
-
| DDL 建表任务 | CREATE TABLE、CREATE SCHEMA | **DRAFT,禁止配 Cron,禁止配依赖** | 一次性执行,手动触发,不参与调度链 |
|
|
199
|
-
| ETL 转换任务 | ODS→DWD 清洗 SQL | 配置 Cron + 依赖上游同步任务 | 周期性执行,构成调度 DAG |
|
|
200
|
-
| 数据同步任务 | MySQL→ODS 整库同步 | 配置 Cron,作为 ETL 任务的上游 | 周期性执行,ETL 任务的触发前提 |
|
|
201
|
-
| DWS/ADS 聚合层 | 指标汇总、报表宽表 | **使用 Dynamic Table,不建调度任务** | 系统自动刷新,额外建任务是冗余计算 |
|
|
202
|
-
|
|
203
|
-
> ⚠️ **常见错误**:为 DDL 任务配置了 Cron,导致建表语句被重复执行,引发 `SCHEDULE_TASK_HAD_CHILDREN_NODES_EXCEPTION` 等调度冲突。DDL 任务完成后应立即降级为 DRAFT 状态。
|
|
204
|
-
|
|
205
|
-
### Studio 任务目录组织规范
|
|
206
|
-
|
|
207
|
-
每个数仓项目在 Studio 中创建独立任务目录,统一管理所有任务资产:
|
|
208
|
-
|
|
209
|
-
```
|
|
210
|
-
<业务域>_dw/ ← 项目任务目录(如 shenyu_gateway_dw)
|
|
211
|
-
├── 00_sync_<source>_to_ods ← 数据同步(Cron,最早执行)
|
|
212
|
-
├── 01_ddl_ods ← ODS 建表(DRAFT,不调度)
|
|
213
|
-
├── 02_ddl_dwd ← DWD 建表(DRAFT,不调度)
|
|
214
|
-
├── 03_ddl_dws_ads ← DWS/ADS 动态表建表(DRAFT,不调度)
|
|
215
|
-
├── 04_transform_ods_to_dwd ← ODS→DWD 清洗(Cron,依赖 00)
|
|
216
|
-
└── 05_dqc_check ← 数据质量检查(Cron,依赖 04,可选)
|
|
217
|
-
(DWS/ADS 层由 Dynamic Table 自动刷新,无需任务)
|
|
218
|
-
```
|
|
219
|
-
|
|
220
|
-
> 任务编号规范:`00~` 同步层,`01~03` DDL 层(DRAFT),`04~` ETL 层(调度),DWS/ADS 无任务。
|
|
221
|
-
|
|
222
|
-
### 数据质量卡点
|
|
223
|
-
|
|
224
|
-
| 层次 | 检查重点 | 时机 |
|
|
225
|
-
|---|---|---|
|
|
226
|
-
| ODS/Bronze | NULL 比例、CDC _op 分布、行数与源端一致 | 入库后 |
|
|
227
|
-
| DWD/Silver | 唯一性、LEFT JOIN 匹配率(结果行数 ≥ 左表行数)、关键字段非空率 | ETL 后 |
|
|
228
|
-
| DWS/Gold/ADS | 指标环比异常、汇总一致性、Dynamic Table 刷新历史为 SUCCESS | Dynamic Table 刷新后 |
|
|
229
|
-
|
|
230
|
-
> ⚠️ **LEFT JOIN 陷阱**:`LEFT JOIN ... WHERE 右表字段 = 值` 会退化为 INNER JOIN,导致数据丢失。过滤右表字段必须放在 `ON` 子句:`LEFT JOIN ... ON ... AND 右表字段 = 值`。
|
|
231
|
-
|
|
232
|
-
### 交付验证 Checklist
|
|
233
|
-
|
|
234
|
-
方案上线前必须逐项确认:
|
|
235
|
-
|
|
236
|
-
- [ ] 各层行数与预期一致(ODS 行数 ≈ 源端,DWD 行数 ≤ ODS,DWS/ADS 行数符合聚合逻辑)
|
|
237
|
-
- [ ] Dynamic Table 刷新历史显示 `SUCCESS`(`SHOW DYNAMIC TABLE REFRESH HISTORY`)
|
|
238
|
-
- [ ] 关键字段 NULL 率在可接受范围内
|
|
239
|
-
- [ ] LEFT JOIN 结果行数 ≥ 左表行数(否则检查过滤条件是否误放在 WHERE)
|
|
240
|
-
- [ ] DWS/ADS 层无冗余调度任务(Dynamic Table 不需要额外 Cron)
|
|
241
|
-
- [ ] 所有 DDL 任务已降级为 DRAFT 状态
|
|
242
|
-
|
|
243
|
-
验证通过后,如需对整个管道做全面健康检查(调度依赖、DT 反模式、分层跳层等),加载 `clickzetta-pipeline-review` skill。
|
|
244
|
-
|
|
245
|
-
### 调度 DAG
|
|
246
|
-
|
|
247
|
-
```
|
|
248
|
-
日批场景:
|
|
249
|
-
00_sync(Cron 02:00)→ 04_transform(Cron 02:30,依赖 00)→ 05_dqc(可选)
|
|
250
|
-
↓
|
|
251
|
-
DWS/ADS(Dynamic Table 自动刷新,无需调度)
|
|
252
|
-
|
|
253
|
-
实时场景:
|
|
254
|
-
CDC/Kafka 持续写入 Bronze → Silver(REFRESH INTERVAL 10 MINUTE)→ Gold(REFRESH INTERVAL 1 HOUR)
|
|
255
|
-
```
|
|
256
|
-
|
|
257
|
-
### DDL 模板
|
|
258
|
-
|
|
259
|
-
加载 `clickzetta-sql-syntax-guide` 确认语法,生成各层 DDL。
|
|
260
|
-
|
|
261
|
-
**数仓开发代码资产化原则:每段 SQL 都应保存为 Studio 任务,作为可管理的代码资产。**
|
|
262
|
-
|
|
263
|
-
生成 DDL 后,按以下规范保存为 Studio 任务(先创建任务目录,再逐层保存):
|
|
264
|
-
|
|
265
|
-
```bash
|
|
266
|
-
# 创建项目任务目录
|
|
267
|
-
cz-cli task folder create <业务域>_dw
|
|
268
|
-
|
|
269
|
-
# 各层 DDL 保存为独立 DRAFT 任务(不配 Cron,不配依赖)
|
|
270
|
-
cz-cli task save-content 01_ddl_ods --content "<ods_ddl_sql>"
|
|
271
|
-
cz-cli task save-content 02_ddl_dwd --content "<dwd_ddl_sql>"
|
|
272
|
-
cz-cli task save-content 03_ddl_dws_ads --content "<dws_ads_ddl_sql>"
|
|
273
|
-
|
|
274
|
-
# ETL 转换 SQL 保存为调度任务(配 Cron + 依赖上游同步任务)
|
|
275
|
-
cz-cli task save-content 04_transform_ods_to_dwd --content "<etl_sql>"
|
|
276
|
-
cz-cli task save-cron 04_transform_ods_to_dwd --cron '0 30 2 * * ? *'
|
|
277
|
-
```
|
|
278
|
-
|
|
279
|
-
> 任务是代码的载体,不只是调度配置。即使是一次性执行的 DDL,也应保存为 DRAFT 任务,方便后续查阅、复用和多环境迁移。
|
|
280
|
-
|
|
281
|
-
**生成 Dynamic Table DDL 前,先确认可用的 GP 型 VCluster:**
|
|
282
|
-
|
|
283
|
-
```sql
|
|
284
|
-
-- 查看所有 VCluster 及状态,找到 type=GENERAL 且 status=RUNNING 的集群
|
|
285
|
-
SHOW VCLUSTERS;
|
|
286
|
-
```
|
|
287
|
-
|
|
288
|
-
- `type = GENERAL`(GP 型)且 `status = RUNNING` → 直接使用该集群名
|
|
289
|
-
- `status = STOPPED` → 先执行 `ALTER VCLUSTER <name> RESUME;` 再建表
|
|
290
|
-
- 无 GP 型集群 → 参考 `clickzetta-vcluster-manager` 创建
|
|
291
|
-
|
|
292
|
-
将查到的集群名替换下方 DDL 中的 `<gp_vcluster_name>`。
|
|
293
|
-
|
|
294
|
-
```sql
|
|
295
|
-
-- ODS/Bronze(以 CDC 接入为例)
|
|
296
|
-
CREATE TABLE IF NOT EXISTS ods.orders (
|
|
297
|
-
order_id BIGINT,
|
|
298
|
-
user_id BIGINT,
|
|
299
|
-
amount DECIMAL(18, 2),
|
|
300
|
-
status STRING,
|
|
301
|
-
created_at TIMESTAMP,
|
|
302
|
-
_op STRING, -- CDC 操作类型:I/U/D
|
|
303
|
-
_ts TIMESTAMP, -- 变更时间
|
|
304
|
-
dw_insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
305
|
-
)
|
|
306
|
-
PARTITIONED BY (days(created_at))
|
|
307
|
-
COMMENT 'ODS 订单原始表,贴源不转换';
|
|
308
|
-
|
|
309
|
-
-- DWD/Silver
|
|
310
|
-
CREATE TABLE IF NOT EXISTS dwd.fact_orders (
|
|
311
|
-
order_id BIGINT,
|
|
312
|
-
user_id BIGINT,
|
|
313
|
-
amount DECIMAL(18, 2),
|
|
314
|
-
status_code INT,
|
|
315
|
-
order_date DATE,
|
|
316
|
-
dw_insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
317
|
-
)
|
|
318
|
-
PARTITIONED BY (days(order_date))
|
|
319
|
-
CLUSTERED BY (user_id) INTO 32 BUCKETS
|
|
320
|
-
COMMENT 'DWD 订单事实表,清洗标准化';
|
|
321
|
-
|
|
322
|
-
-- DWS/Gold(Dynamic Table,不用物化视图)
|
|
323
|
-
CREATE DYNAMIC TABLE IF NOT EXISTS dws.user_order_daily
|
|
324
|
-
REFRESH INTERVAL 1 HOUR vcluster <gp_vcluster_name>
|
|
325
|
-
AS
|
|
326
|
-
SELECT
|
|
327
|
-
user_id,
|
|
328
|
-
order_date,
|
|
329
|
-
COUNT(order_id) AS order_cnt,
|
|
330
|
-
SUM(amount) AS total_amount,
|
|
331
|
-
AVG(amount) AS avg_amount
|
|
332
|
-
FROM dwd.fact_orders
|
|
333
|
-
WHERE status_code = 1
|
|
334
|
-
GROUP BY user_id, order_date;
|
|
335
|
-
|
|
336
|
-
-- 创建后立即执行首次刷新,重置刷新基准时间
|
|
337
|
-
REFRESH DYNAMIC TABLE dws.user_order_daily;
|
|
338
|
-
```
|
|
339
|
-
|
|
340
|
-
---
|
|
341
|
-
|
|
342
|
-
## 核心原则
|
|
343
|
-
|
|
344
|
-
1. **先探索数据,再给建议**——不问问卷,看完数据再说
|
|
345
|
-
2. **给选择题,不给填空题**——用户选 A/B,不要让用户凭空描述
|
|
346
|
-
3. **聚合层用 Dynamic Table,不用物化视图**
|
|
347
|
-
4. **建模和管道一体**——DDL 和管道配置同步输出
|
|
348
|
-
5. **分区用转换函数**:`days(col)` 不是 `col`
|
|
349
|
-
6. **ODS/Bronze 零转换**,保留原始数据方便回溯
|
|
350
|
-
7. **建管分离**——DDL 任务 DRAFT 不调度,DWS/ADS 层不建调度任务
|
|
351
|
-
8. **创建 Dynamic Table 后立即 REFRESH**——重置刷新基准,实现开箱即用
|
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
{"case_id":"001","type":"should_call","user_input":"数仓分层设计的原则是什么?ODS、DWD、DWS 各层的职责?","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["分层"]}
|
|
2
|
-
{"case_id":"002","type":"should_call","user_input":"ODS/DWD/DWS/ADS 分层怎么设计","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["ODS","DWD","DWS"]}
|
|
3
|
-
{"case_id":"003","type":"should_call","user_input":"Medallion 架构 Bronze/Silver/Gold 怎么搭建","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["Bronze","Silver","Gold"]}
|
|
4
|
-
{"case_id":"004","type":"should_call","user_input":"星型模型和雪花模型怎么选","expected_skill":"clickzetta-dw-modeling","expected_output_contains":["星型","雪花"]}
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
# 数仓建模模式参考
|
|
2
|
-
|
|
3
|
-
## 传统数仓分层详细说明
|
|
4
|
-
|
|
5
|
-
### 分层职责
|
|
6
|
-
|
|
7
|
-
```
|
|
8
|
-
ODS(Operational Data Store)
|
|
9
|
-
├── 贴源存储,不做业务转换
|
|
10
|
-
├── 保留原始字段名和类型
|
|
11
|
-
├── 增加 dw_insert_time、dw_source 等元数据字段
|
|
12
|
-
└── 按时间分区,支持增量同步
|
|
13
|
-
|
|
14
|
-
DWD(Data Warehouse Detail)
|
|
15
|
-
├── 数据清洗:去重、NULL 处理、格式标准化
|
|
16
|
-
├── 维度退化:将常用维度字段冗余到事实表
|
|
17
|
-
├── 业务规则:状态码映射、金额单位统一
|
|
18
|
-
└── 建立主键约束(逻辑主键,ClickZetta 不强制)
|
|
19
|
-
|
|
20
|
-
DWS(Data Warehouse Summary)
|
|
21
|
-
├── 轻度聚合:按天/周/月汇总
|
|
22
|
-
├── 使用 Dynamic Table 自动增量刷新
|
|
23
|
-
├── 面向主题域:用户域、商品域、交易域
|
|
24
|
-
└── 不直接对外提供查询(由 ADS 层封装)
|
|
25
|
-
|
|
26
|
-
ADS(Application Data Store)
|
|
27
|
-
├── 面向具体应用/报表的宽表
|
|
28
|
-
├── 使用 Dynamic Table 或直接查询 DWS
|
|
29
|
-
└── 字段命名业务友好
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
### 命名规范建议
|
|
33
|
-
|
|
34
|
-
```
|
|
35
|
-
Schema 命名:ods_<业务域> / dwd_<业务域> / dws / ads
|
|
36
|
-
表命名:
|
|
37
|
-
ODS:ods_<源系统>_<表名>(如 ods_mysql_orders)
|
|
38
|
-
DWD:dwd_<主题>_<粒度>(如 dwd_trade_order_detail)
|
|
39
|
-
DWS:dws_<主题>_<维度>_<周期>(如 dws_user_order_1d)
|
|
40
|
-
ADS:ads_<应用>_<指标>(如 ads_report_gmv_daily)
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
---
|
|
44
|
-
|
|
45
|
-
## 大奖牌架构(Medallion)详细说明
|
|
46
|
-
|
|
47
|
-
### 分层职责
|
|
48
|
-
|
|
49
|
-
```
|
|
50
|
-
Bronze(铜牌层)
|
|
51
|
-
├── 原始数据,零转换原则
|
|
52
|
-
├── 支持多种格式:结构化/半结构化/非结构化
|
|
53
|
-
├── 保留所有历史版本(Time Travel)
|
|
54
|
-
└── 数据来源标记(source_system、ingestion_time)
|
|
55
|
-
|
|
56
|
-
Silver(银牌层)
|
|
57
|
-
├── 可信数据:去重、清洗、标准化
|
|
58
|
-
├── 跨源整合:统一字段命名和类型
|
|
59
|
-
├── 业务实体识别:用户、订单、商品
|
|
60
|
-
└── 可直接用于数据科学和探索性分析
|
|
61
|
-
|
|
62
|
-
Gold(金牌层)
|
|
63
|
-
├── 业务就绪数据:聚合指标、宽表
|
|
64
|
-
├── 使用 Dynamic Table 自动刷新
|
|
65
|
-
├── 面向 BI 工具和应用系统
|
|
66
|
-
└── 语义清晰,字段命名业务友好
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
### Schema 命名建议
|
|
70
|
-
|
|
71
|
-
```
|
|
72
|
-
bronze.<source>_<entity> -- 如 bronze.mysql_orders
|
|
73
|
-
silver.<entity> -- 如 silver.orders
|
|
74
|
-
gold.<domain>_<metric> -- 如 gold.trade_gmv_daily
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
---
|
|
78
|
-
|
|
79
|
-
## Dynamic Table vs 物化视图对比
|
|
80
|
-
|
|
81
|
-
| 特性 | Dynamic Table | 物化视图 |
|
|
82
|
-
|---|---|---|
|
|
83
|
-
| 刷新机制 | CBO 增量计算,只刷新变化分区 | 全量或手动增量 |
|
|
84
|
-
| 调度方式 | TARGET_LAG 自动控制 | 需手动配置调度 |
|
|
85
|
-
| Time Travel | ✅ 支持 | ❌ 不支持 |
|
|
86
|
-
| 数据恢复 | ✅ RESTORE TABLE | ❌ 不支持 |
|
|
87
|
-
| 语法复杂度 | 简单,类似 CREATE TABLE | 较复杂 |
|
|
88
|
-
| 推荐场景 | **新项目首选** | 遗留项目兼容 |
|
|
89
|
-
|
|
90
|
-
**结论:新建项目一律使用 Dynamic Table,不使用物化视图。**
|
|
91
|
-
|
|
92
|
-
---
|
|
93
|
-
|
|
94
|
-
## 常见建模陷阱
|
|
95
|
-
|
|
96
|
-
1. **过度规范化**:DWD 层不要拆太细,适当冗余维度字段,减少下游 JOIN
|
|
97
|
-
2. **分区粒度过细**:按小时分区会产生大量小文件,日批场景用按天分区
|
|
98
|
-
3. **ADS 层直接写 SQL**:ADS 层应该用 Dynamic Table,不要让 BI 工具直接跑复杂 SQL
|
|
99
|
-
4. **忽略数据质量**:ODS 层入库时就应该检查 NULL 比例,不要等到 DWS 层才发现问题
|
|
100
|
-
5. **Bronze 层做转换**:Bronze 层一旦做了转换,原始数据就丢失了,回溯困难
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
ClickZetta Skills License
|
|
2
|
-
© 2026 Yunqi Inc. All rights reserved.
|
|
3
|
-
LICENSE: Use of these materials (including all code, prompts, assets, files, and other components of these skills (collectively, "Skills")) is governed by your agreement with ClickZetta for the Service. If no separate agreement exists, use is governed by ClickZetta's Terms of Service (available at: https://yunqi.tech/documents/user-aggrement).
|
|
4
|
-
Your applicable agreement is referred to as the "Agreement." "Service" is as defined in the Agreement.
|
|
5
|
-
ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the contrary, you may not:
|
|
6
|
-
|
|
7
|
-
Extract from the Service or retain copies of the Skills outside use with the Service;
|
|
8
|
-
Reproduce or copy the Skills, except for temporary copies created automatically during authorized use of the Service;
|
|
9
|
-
Create derivative works based on the Skills;
|
|
10
|
-
Distribute, sublicense, or transfer the Skills to any third party;
|
|
11
|
-
Make, offer to sell, sell, or import any inventions embodied in the Skills; nor,
|
|
12
|
-
Reverse engineer, decompile, or disassemble the Skills.
|
|
13
|
-
|
|
14
|
-
The receipt, viewing, or possession of the Skills does not convey or imply any license or right beyond those expressly granted above.
|
|
15
|
-
Yunqi retains all rights, title, and interest in the Skills, including all copyrights, trademarks, patents, and all other applicable intellectual property rights.
|
|
16
|
-
THE SKILLS ARE PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SKILLS OR THE USE OR OTHER DEALINGS IN THE SKILLS.
|