@clickzetta/cz-cli-darwin-x64 0.3.87 → 0.3.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-dynamic-table/SKILL.md +169 -169
  3. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +126 -126
  4. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +25 -25
  5. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +48 -48
  6. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +51 -51
  7. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +59 -59
  8. package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +8 -7
  9. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +99 -99
  10. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +188 -188
  11. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +117 -117
  12. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +29 -29
  13. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +80 -79
  14. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/SKILL.md +15 -15
  15. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-column-validation-rules.md +61 -61
  16. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-conversion-rules.md +100 -100
  17. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-placeholder-rules.md +64 -64
  18. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-refresh-rules.md +32 -32
  19. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-self-reference-rules.md +21 -21
  20. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-workflow.md +71 -71
  21. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +203 -202
  22. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +62 -62
  23. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +34 -34
  24. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +61 -61
  25. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +41 -41
  26. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +103 -101
  27. package/bin/skills/cz-cli/SKILL.md +1 -1
  28. package/package.json +1 -1
@@ -1,79 +1,79 @@
1
- # 维度表 JOIN 场景详解
1
+ # Dimension Table JOIN Scenarios — Detailed Guide
2
2
 
3
- ## 核心机制
3
+ ## Core Mechanism
4
4
 
5
- 将某张表标记为维度表(dimension table)后,增量引擎会将该表的变更数据视为**空**。即:
6
- - 维度表的任何数据变更(INSERT/UPDATE/DELETE)都**不会触发增量计算**
7
- - 增量计算时,维度表始终读取**最新全量数据**
8
- - 只有非维度表(事实表)的变更才会驱动增量刷新
5
+ After marking a table as a dimension table, the incremental engine treats that table's change data as **empty**. That is:
6
+ - Any data changes (INSERT/UPDATE/DELETE) to the dimension table **do not trigger incremental computation**
7
+ - During incremental computation, the dimension table always reads its **latest full data**
8
+ - Only changes in non-dimension tables (fact tables) drive incremental refresh
9
9
 
10
- ## 配置方式
10
+ ## Configuration
11
11
 
12
12
  ```sql
13
- -- 方式1DT 表属性(推荐,跟随 DT 定义)
13
+ -- Method 1: DT table properties (recommended; follows DT definition)
14
14
  CREATE DYNAMIC TABLE my_dt
15
15
  TBLPROPERTIES('mv_const_tables'='dim_table1,dim_table2')
16
16
  AS SELECT ...;
17
17
 
18
- -- 方式2Session 配置(在 REFRESH 前设置,灵活可动态调整)
18
+ -- Method 2: Session configuration (set before REFRESH; flexible and dynamically adjustable)
19
19
  set CZ_OPTIMIZER_INCREMENTAL_DIMENSION_TABLES=dim_table1:dim_table2
20
20
  ```
21
21
 
22
- ## JOIN 类型下的增量行为
22
+ ## Incremental Behavior Under Each JOIN Type
23
23
 
24
- ### A LEFT JOIN BB 为维度表)
24
+ ### A LEFT JOIN B (B is dimension table)
25
25
 
26
- 这是最常见的维度表 JOIN 场景。
26
+ This is the most common dimension table JOIN scenario.
27
27
 
28
- **Case 1A 有增量数据,B 无变化**
28
+ **Case 1: A has incremental data, B has no changes**
29
29
  ```
30
- 增量计划:A 的变更数据 LEFT JOIN B 的全量数据
30
+ Incremental plan: A's change data LEFT JOIN B's full data
31
31
  ```
32
- - 新增的 A 行与 B 的最新数据做 LEFT JOIN
33
- - 如果 JOIN 输出完整行
34
- - 如果没 JOIN → B 侧输出 NULL
35
- - ✅ 结果正确
32
+ - New A rows LEFT JOIN with B's latest data
33
+ - If JOIN matchesoutput complete row
34
+ - If no match → B side outputs NULL
35
+ - ✅ Result is correct
36
36
 
37
- **Case 2B 有数据变更,A 无变化**
37
+ **Case 2: B has data changes, A has no changes**
38
38
  ```
39
- 增量计划:不触发计算(变更数据为空)
39
+ Incremental plan: no computation triggered (change data is empty)
40
40
  ```
41
- - B 的变更被完全忽略
42
- - 之前 A 行没 JOIN 上 B 输出的 `(xxx, NULL)` 不会被修正为 `(xxx, yyy)`
43
- - 之前 A JOIN 上的旧 B 数据不会被更新为新值
44
- - ⚠️ 结果与全量重算不一致,但这是**预期行为**
41
+ - B's changes are completely ignored
42
+ - Previously output `(xxx, NULL)` rows (where A didn't match B) will not be corrected to `(xxx, yyy)`
43
+ - Previously output rows with old B data will not be updated to new values
44
+ - ⚠️ Result differs from full recomputation, but this is **expected behavior**
45
45
 
46
- **Case 3A B 同时有变化**
46
+ **Case 3: Both A and B have changes**
47
47
  ```
48
- 增量计划:A 的变更数据 LEFT JOIN B 的全量数据
48
+ Incremental plan: A's change data LEFT JOIN B's full data
49
49
  ```
50
- - 只处理 A 的增量,B 的变更被忽略
51
- - 新增的 A 行会 JOIN B 的最新数据
52
- - 但已有的 A 行不会因 B 的变更而更新
53
- - ⚠️ 新旧数据可能存在不一致
50
+ - Only A's incremental data is processed; B's changes are ignored
51
+ - New A rows will JOIN to B's latest data
52
+ - But existing A rows will not be updated due to B's changes
53
+ - ⚠️ New and old data may be inconsistent
54
54
 
55
- ### A INNER JOIN BB 为维度表)
55
+ ### A INNER JOIN B (B is dimension table)
56
56
 
57
- **Case 1A 有增量数据,B 无变化**
57
+ **Case 1: A has incremental data, B has no changes**
58
58
  ```
59
- 增量计划:A 的变更数据 INNER JOIN B 的全量数据
59
+ Incremental plan: A's change data INNER JOIN B's full data
60
60
  ```
61
- - 新增的 A 行与 B INNER JOIN
62
- - JOIN 不上的 A 行被丢弃
63
- - ✅ 结果正确
61
+ - New A rows INNER JOIN with B
62
+ - A rows that don't match are discarded
63
+ - ✅ Result is correct
64
64
 
65
- **Case 2B 有数据变更,A 无变化**
65
+ **Case 2: B has data changes, A has no changes**
66
66
  ```
67
- 增量计划:不触发计算
67
+ Incremental plan: no computation triggered
68
68
  ```
69
- - B 新增了能匹配已有 A 行的数据不会产出新结果
70
- - B 删除了匹配已有 A 行的数据已输出的结果不会被撤回
71
- - ⚠️ 结果与全量重算不一致
69
+ - B adds data that can match existing A rowsno new results are produced
70
+ - B deletes data that matched existing A rowsalready-output results are not retracted
71
+ - ⚠️ Result differs from full recomputation
72
72
 
73
- ### 多表 JOIN 中的维度表
73
+ ### Dimension Tables in Multi-table JOINs
74
74
 
75
75
  ```sql
76
- -- t2, t3 都是维度表
76
+ -- t2, t3 are both dimension tables
77
77
  CREATE DYNAMIC TABLE dt
78
78
  TBLPROPERTIES('mv_const_tables'='t2,t3')
79
79
  AS
@@ -83,171 +83,171 @@ LEFT JOIN t2 ON t1.id = t2.id
83
83
  LEFT JOIN t3 ON t1.id = t3.id;
84
84
  ```
85
85
 
86
- - 只有 t1 的变更会触发增量计算
87
- - t2t3 的变更都被忽略
88
- - 增量计划:t1 的变更数据 LEFT JOIN t2 的全量数据 LEFT JOIN t3 的全量数据
86
+ - Only t1's changes trigger incremental computation
87
+ - Changes to t2 and t3 are both ignored
88
+ - Incremental plan: t1's change data LEFT JOIN t2's full data LEFT JOIN t3's full data
89
89
 
90
- ## 适合使用维度表的场景
90
+ ## Scenarios Suitable for Dimension Tables
91
91
 
92
- ### ✅ 推荐场景
92
+ ### ✅ Recommended Scenarios
93
93
 
94
- 1. **码表/字典表 JOIN**
95
- - 如:地区码表、产品分类表、状态码映射表
96
- - 特点:数据量小、极少变更、即使变更也不影响历史分析
94
+ 1. **Lookup/dictionary table JOINs**
95
+ - E.g., region code tables, product category tables, status code mapping tables
96
+ - Characteristics: small data volume, rarely changes, even if it changes it doesn't affect historical analysis
97
97
  ```sql
98
- -- 地区码表几乎不变
98
+ -- Region code table almost never changes
99
99
  TBLPROPERTIES('mv_const_tables'='dim_region')
100
100
  ```
101
101
 
102
- 2. **T+1 维度表 + 实时事实表**
103
- - 维度表每天批量更新一次,事实表持续写入
104
- - 在两次维度表更新之间,维度表可视为不变
102
+ 2. **T+1 dimension table + real-time fact table**
103
+ - Dimension table updates in batch once per day; fact table writes continuously
104
+ - Between two dimension table updates, the dimension table can be treated as unchanged
105
105
  ```sql
106
- -- 用户画像表每天更新,订单表实时写入
106
+ -- User profile table updates daily; order table writes in real-time
107
107
  TBLPROPERTIES('mv_const_tables'='dim_user_profile')
108
108
  ```
109
109
 
110
- 3. **配置表 JOIN**
111
- - 如:业务规则配置、阈值配置、权重配置
112
- - 变更频率极低,且变更后可以手动触发全量刷新
110
+ 3. **Configuration table JOINs**
111
+ - E.g., business rule configs, threshold configs, weight configs
112
+ - Very low change frequency; after changes, a manual full refresh can correct data
113
113
  ```sql
114
114
  TBLPROPERTIES('mv_const_tables'='config_rules')
115
115
  ```
116
116
 
117
- 4. **大事实表 JOIN 小维度表,且对维度表变更的实时性要求低**
118
- - 核心诉求是事实表的增量计算性能
119
- - 维度表偶尔变更后,可以接受短暂的数据不一致
117
+ 4. **Large fact table JOIN small dimension table, with low real-time requirements for dimension table changes**
118
+ - Core goal is incremental performance on the fact table
119
+ - Brief inconsistency after occasional dimension table changes is acceptable
120
120
  ```sql
121
- -- 商品信息表偶尔更新,订单表持续写入
121
+ -- Product info table occasionally updates; order table writes continuously
122
122
  TBLPROPERTIES('mv_const_tables'='dim_product')
123
123
  ```
124
124
 
125
- 5. **不支持 time travel 的外部表作为 JOIN 右表**
126
- - 外部表无法提供变更数据,标记为维度表后可以正常进行增量计算
127
- - 增量引擎会读取外部表的最新快照
125
+ 5. **External tables that don't support time travel as the right side of a JOIN**
126
+ - External tables cannot provide change data; marking as dimension table enables normal incremental computation
127
+ - The incremental engine reads the latest snapshot of the external table
128
128
  ```sql
129
- -- 外部 MySQL 表不支持 time travel
129
+ -- External MySQL table doesn't support time travel
130
130
  TBLPROPERTIES('mv_const_tables'='external_mysql_table')
131
131
  ```
132
132
 
133
- ### ❌ 不推荐场景
133
+ ### ❌ Not Recommended Scenarios
134
134
 
135
- 1. **维度表频繁更新且要求结果实时一致**
136
- - 如:用户状态表每分钟更新,且下游报表要求实时反映最新状态
137
- - 此时不应标记为维度表,应让两侧都参与增量计算
135
+ 1. **Dimension table updates frequently and real-time consistency is required**
136
+ - E.g., user status table updates every minute, and downstream reports require real-time reflection of the latest status
137
+ - In this case, do not mark as dimension table; let both sides participate in incremental computation
138
138
 
139
- 2. **维度表变更会影响聚合结果的正确性**
140
- - 如:价格表更新后,历史订单的金额计算应该用旧价格
141
- - 但维度表标记后,新的事实行会 JOIN 到新价格,旧事实行保持旧价格
142
- - 如果业务要求所有行统一使用最新价格,不应使用维度表
139
+ 2. **Dimension table changes affect the correctness of aggregation results**
140
+ - E.g., after a price table update, historical order amounts should use the old price
141
+ - But with dimension table marking, new fact rows will JOIN to the new price, while old fact rows keep the old price
142
+ - If business requires all rows to use the latest price uniformly, do not use dimension table
143
143
 
144
- 3. **维度表数据量大且变更频繁**
145
- - 维度表标记的优化收益来自跳过变更数据的计算
146
- - 如果维度表本身很大且频繁变更,应该考虑让它正常参与增量
144
+ 3. **Dimension table has large data volume and changes frequently**
145
+ - The optimization benefit of dimension table marking comes from skipping change data computation
146
+ - If the dimension table itself is large and changes frequently, consider letting it participate in incremental normally
147
147
 
148
- ## 维度表变更后的数据订正
148
+ ## Data Correction After Dimension Table Changes
149
149
 
150
- 由于维度表的变更不会触发增量计算,当维度表发生了重要变更(如修正了错误数据、更新了映射关系),DT 中已有的结果不会自动更新。**如果需要订正数据,必须执行全量刷新。**
150
+ Since dimension table changes do not trigger incremental computation, when a dimension table undergoes an important change (e.g., incorrect data was corrected, mapping relationships were updated), existing results in the DT will not be automatically updated. **If data correction is needed, a full refresh must be executed.**
151
151
 
152
152
  ```sql
153
- -- 强制全量刷新(推荐)
153
+ -- Force full refresh (recommended)
154
154
  set cz.optimizer.incremental.force.full.refresh=true
155
155
  REFRESH DYNAMIC TABLE my_dt;
156
- -- 刷新完成后记得关闭,否则后续每次都是全量
156
+ -- Remember to turn it off after refresh; otherwise every subsequent refresh will be full
157
157
  set cz.optimizer.incremental.force.full.refresh=false
158
158
 
159
- -- 如果是分区表,也可以只全量刷新指定分区
159
+ -- For partitioned tables, you can also do a full refresh of only a specific partition
160
160
  set cz.optimizer.incremental.force.full.refresh=true
161
161
  set dt.args.ds=2025-01-01
162
162
  REFRESH DYNAMIC TABLE my_dt PARTITION(ds = '2025-01-01');
163
163
  set cz.optimizer.incremental.force.full.refresh=false
164
164
  ```
165
165
 
166
- 配置说明:
167
- - `cz.optimizer.incremental.force.full.refresh`:默认 `false`。设为 `true` 后,下一次 REFRESH 会忽略增量逻辑,对所有源表做全量扫描重算
168
- - 该配置是 session 级别的,刷新完成后需要手动设回 `false`,否则后续所有 REFRESH 都会走全量
169
- - backfill 模式(`cz.optimizer.incremental.backfill.enabled=TRUE`)也会自动开启全量刷新
166
+ Configuration notes:
167
+ - `cz.optimizer.incremental.force.full.refresh`: default `false`. When set to `true`, the next REFRESH ignores incremental logic and does a full scan and recomputation of all source tables.
168
+ - This config is Session-level; after the refresh completes, it must be manually reset to `false`; otherwise all subsequent REFRESHes will use full mode.
169
+ - Backfill mode (`cz.optimizer.incremental.backfill.enabled=TRUE`) also automatically enables full refresh.
170
170
 
171
- ## 性能收益
171
+ ## Performance Benefits
172
172
 
173
- 标记维度表后的优化效果:
174
- - **跳过维度表的变更数据扫描**:不需要读取维度表的变更日志
175
- - **简化增量计划**:只需要用事实表的变更数据 JOIN 维度表的全量数据,不需要反向计算
173
+ Optimization effects after marking dimension tables:
174
+ - **Skip dimension table change data scanning**: no need to read dimension table change logs
175
+ - **Simplify incremental plan**: only need to JOIN fact table change data with dimension table full data; no reverse computation needed
176
176
 
177
- ## ⚠️ 开启维度表后可能出现的数据不一致与重复
177
+ ## ⚠️ Potential Data Inconsistency and Duplication After Enabling Dimension Tables
178
178
 
179
- 标记维度表是一种**用一致性换性能**的权衡。以下是具体会出现问题的场景,使用前务必评估业务是否可以接受。
179
+ Marking dimension tables is a **tradeoff of consistency for performance**. The following are specific scenarios where problems will occur — evaluate whether the business can accept these before using.
180
180
 
181
- ### 场景 1LEFT JOIN 维度表更新导致 NULL 不被修正
181
+ ### Scenario 1: LEFT JOIN Dimension Table Update Causes NULL Not to Be Corrected
182
182
 
183
183
  ```sql
184
- -- DT 定义
184
+ -- DT definition
185
185
  SELECT order.*, product.name
186
186
  FROM order LEFT JOIN product ON order.pid = product.id;
187
- -- product 标记为维度表
187
+ -- product marked as dimension table
188
188
  ```
189
189
 
190
- | 时间 | 事件 | DT 中的结果 | 全量重算应有的结果 |
190
+ | Time | Event | Result in DT | Expected result from full recomputation |
191
191
  |------|------|------------|------------------|
192
- | T1 | order 插入 (pid=100)product 中无 id=100 | (pid=100, name=NULL) | (pid=100, name=NULL) |
193
- | T2 | product 插入 id=100, name='手机' | (pid=100, name=NULL) **不变** | (pid=100, name='手机') |
192
+ | T1 | order inserts (pid=100); product has no id=100 | (pid=100, name=NULL) | (pid=100, name=NULL) |
193
+ | T2 | product inserts id=100, name='Phone' | (pid=100, name=NULL) **unchanged** | (pid=100, name='Phone') |
194
194
 
195
- **原因**:product 的变更不触发增量计算,T1 输出的 NULL 行永远不会被修正。
195
+ **Reason**: product's changes don't trigger incremental computation; the NULL row output at T1 will never be corrected.
196
196
 
197
- ### 场景 2INNER JOIN 维度表新增数据导致结果缺失
197
+ ### Scenario 2: INNER JOIN — Dimension Table New Data Causes Missing Results
198
198
 
199
199
  ```sql
200
200
  SELECT order.*, product.name
201
201
  FROM order INNER JOIN product ON order.pid = product.id;
202
- -- product 标记为维度表
202
+ -- product marked as dimension table
203
203
  ```
204
204
 
205
- | 时间 | 事件 | DT 中的结果 | 全量重算应有的结果 |
205
+ | Time | Event | Result in DT | Expected result from full recomputation |
206
206
  |------|------|------------|------------------|
207
- | T1 | order 插入 (pid=200)product 中无 id=200 | 无输出(INNER JOIN 不匹配) | 无输出 |
208
- | T2 | product 插入 id=200, name='电脑' | **仍然无输出** | (pid=200, name='电脑') |
207
+ | T1 | order inserts (pid=200); product has no id=200 | No output (INNER JOIN no match) | No output |
208
+ | T2 | product inserts id=200, name='Computer' | **Still no output** | (pid=200, name='Computer') |
209
209
 
210
- **原因**:product 的新增不触发增量,已有的 order 行不会被重新 JOIN。
210
+ **Reason**: product's new data doesn't trigger incremental; existing order rows are not re-JOINed.
211
211
 
212
- ### 场景 3:维度表删除/更新导致过期数据残留
212
+ ### Scenario 3: Dimension Table Delete/Update Causes Stale Data to Remain
213
213
 
214
214
  ```sql
215
215
  SELECT order.*, product.name, product.price
216
216
  FROM order LEFT JOIN product ON order.pid = product.id;
217
- -- product 标记为维度表
217
+ -- product marked as dimension table
218
218
  ```
219
219
 
220
- | 时间 | 事件 | DT 中的结果 | 全量重算应有的结果 |
220
+ | Time | Event | Result in DT | Expected result from full recomputation |
221
221
  |------|------|------------|------------------|
222
- | T1 | order 插入 (pid=100)product id=100 price=99 | (pid=100, price=99) | (pid=100, price=99) |
223
- | T2 | product 更新 id=100 price=**199** | (pid=100, price=**99**) 旧值残留 | (pid=100, price=199) |
224
- | T3 | product 删除 id=100 | (pid=100, price=**99**) 仍然残留 | (pid=100, name=NULL) |
222
+ | T1 | order inserts (pid=100); product id=100 price=99 | (pid=100, price=99) | (pid=100, price=99) |
223
+ | T2 | product updates id=100 price=**199** | (pid=100, price=**99**) old value remains | (pid=100, price=199) |
224
+ | T3 | product deletes id=100 | (pid=100, price=**99**) still remains | (pid=100, name=NULL) |
225
225
 
226
- **原因**:维度表的 UPDATE/DELETE 都被忽略,已输出的行保持旧值。
226
+ **Reason**: dimension table UPDATE/DELETE are both ignored; already-output rows keep old values.
227
227
 
228
- ### 场景 4:维度表 + 聚合导致聚合结果不一致
228
+ ### Scenario 4: Dimension Table + Aggregation Causes Inconsistent Aggregation Results
229
229
 
230
230
  ```sql
231
231
  SELECT product.category, SUM(order.amount) as total
232
232
  FROM order LEFT JOIN product ON order.pid = product.id
233
233
  GROUP BY product.category;
234
- -- product 标记为维度表
234
+ -- product marked as dimension table
235
235
  ```
236
236
 
237
- | 时间 | 事件 | DT 中的结果 | 全量重算应有的结果 |
237
+ | Time | Event | Result in DT | Expected result from full recomputation |
238
238
  |------|------|------------|------------------|
239
- | T1 | order (pid=1, amount=100)product (id=1, category='A') | category='A', total=100 | 同左 |
240
- | T2 | product 更新 id=1 category 'A' 改为 'B' | category='A', total=100 **不变** | category='B', total=100 |
241
- | T3 | order 新增 (pid=1, amount=200) | category='B', total=200(新行 JOIN 到新 category)| category='B', total=300 |
239
+ | T1 | order (pid=1, amount=100); product (id=1, category='A') | category='A', total=100 | Same |
240
+ | T2 | product updates id=1 category from 'A' to 'B' | category='A', total=100 **unchanged** | category='B', total=100 |
241
+ | T3 | order adds (pid=1, amount=200) | category='B', total=200 (new row JOINs to new category) | category='B', total=300 |
242
242
 
243
- **原因**:T2 category 变更不触发重算,T1 的旧数据仍按旧 category 聚合。T3 的新数据按新 category 聚合。最终结果中同一个 pid 的数据被分到了不同 category,聚合结果错乱。
243
+ **Reason**: T2's category change doesn't trigger recomputation; T1's old data is still aggregated under the old category. T3's new data is aggregated under the new category. The final result has data for the same pid split across different categories, causing incorrect aggregation.
244
244
 
245
- ### 总结:什么时候结果会不一致
245
+ ### Summary: When Results Will Be Inconsistent
246
246
 
247
- | 维度表变更类型 | LEFT JOIN | INNER JOIN |
247
+ | Dimension table change type | LEFT JOIN | INNER JOIN |
248
248
  |--------------|-----------|------------|
249
- | 新增匹配行 | fact 行的 NULL 不被修正 | fact 行不会产出新结果 |
250
- | 更新已有行 | fact 行保持旧值 | fact 行保持旧值 |
251
- | 删除已有行 | fact 行保持旧值(不会变 NULL | fact 行不会被撤回 |
249
+ | New matching row added | Old fact rows' NULL is not corrected | Old fact rows don't produce new results |
250
+ | Existing row updated | Old fact rows keep old values | Old fact rows keep old values |
251
+ | Existing row deleted | Old fact rows keep old values (won't become NULL) | Old fact rows are not retracted |
252
252
 
253
- **核心原则**:维度表的任何变更都不会影响已经输出的结果行。只有新的事实表增量才会 JOIN 到维度表的最新快照。
253
+ **Core principle**: any change to a dimension table does not affect already-output result rows. Only new fact table increments will JOIN to the dimension table's latest snapshot.
@@ -1,20 +1,20 @@
1
- # Medallion 架构与 Table Stream 组合模式
1
+ # Medallion Architecture and Table Stream Combination Patterns
2
2
 
3
- ## Medallion 三层管道
3
+ ## Medallion Three-layer Pipeline
4
4
 
5
5
  ```
6
- Bronze(原始数据)
7
- ↓ Dynamic Table(清洗,INCREMENTAL
8
- Silver(清洗数据)
9
- ↓ Dynamic Table(聚合,FULL
10
- Gold(指标数据)
11
- ↓ BI 工具直接查询
6
+ Bronze (raw data)
7
+ ↓ Dynamic Table (cleansing, INCREMENTAL)
8
+ Silver (cleansed data)
9
+ ↓ Dynamic Table (aggregation, FULL)
10
+ Gold (metric data)
11
+ ↓ BI tools query directly
12
12
  ```
13
13
 
14
- ### Bronze → Silver(增量清洗)
14
+ ### Bronze → Silver (Incremental Cleansing)
15
15
 
16
16
  ```sql
17
- -- 前提:源表开启变更跟踪
17
+ -- Prerequisite: enable change tracking on source table
18
18
  ALTER TABLE bronze.raw_orders SET PROPERTIES ('change_tracking' = 'true');
19
19
 
20
20
  CREATE DYNAMIC TABLE IF NOT EXISTS silver.orders_cleaned
@@ -30,7 +30,7 @@ FROM bronze.raw_orders
30
30
  WHERE order_id IS NOT NULL AND amount > 0;
31
31
  ```
32
32
 
33
- ### Silver → Gold(聚合指标,通常 FULL
33
+ ### Silver → Gold (Aggregated Metrics, typically FULL)
34
34
 
35
35
  ```sql
36
36
  CREATE DYNAMIC TABLE IF NOT EXISTS gold.orders_daily_summary
@@ -48,23 +48,23 @@ GROUP BY 1, 2;
48
48
 
49
49
  ---
50
50
 
51
- ## Table Stream 组合(事件驱动)
51
+ ## Combined with Table Stream (Event-driven)
52
52
 
53
- Table Stream 捕获源表变更,Dynamic Table 消费 Stream 做增量处理。
53
+ Table Stream captures source table changes; Dynamic Table consumes the Stream for incremental processing.
54
54
 
55
- ### 基本模式
55
+ ### Basic Pattern
56
56
 
57
57
  ```sql
58
- -- 1. 源表开启变更跟踪
58
+ -- 1. Enable change tracking on source table
59
59
  ALTER TABLE bronze.raw_orders SET PROPERTIES ('change_tracking' = 'true');
60
60
 
61
- -- 2. 创建 Table Stream
61
+ -- 2. Create Table Stream
62
62
  CREATE TABLE STREAM bronze.orders_stream
63
63
  ON TABLE bronze.raw_orders
64
64
  WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
65
65
 
66
- -- 3. Dynamic Table 消费 Stream
67
- -- 注意:Stream 作为 DT 源时,每次刷新会消费 offset
66
+ -- 3. Dynamic Table consumes Stream
67
+ -- Note: when Stream is used as DT source, each refresh consumes the offset
68
68
  CREATE DYNAMIC TABLE IF NOT EXISTS silver.orders_incremental
69
69
  REFRESH INTERVAL 5 MINUTE vcluster default
70
70
  AS
@@ -73,16 +73,16 @@ FROM bronze.orders_stream
73
73
  WHERE __change_type IN ('INSERT', 'UPDATE_AFTER');
74
74
  ```
75
75
 
76
- ### MERGE INTO + Table Stream(替代非分区 DT 的去重场景)
76
+ ### MERGE INTO + Table Stream (Alternative to Non-partitioned DT Deduplication)
77
77
 
78
- 当需要按主键去重且源表持续写入时,推荐用 MERGE INTO 替代 Dynamic Table
78
+ When deduplication by primary key is needed and the source table has continuous writes, MERGE INTO is recommended over Dynamic Table:
79
79
 
80
80
  ```sql
81
- -- 1. 创建 Table Stream
81
+ -- 1. Create Table Stream
82
82
  CREATE TABLE STREAM source_stream ON TABLE source_table
83
83
  WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD', 'SHOW_INITIAL_ROWS' = 'TRUE');
84
84
 
85
- -- 2. 创建目标表
85
+ -- 2. Create target table
86
86
  CREATE TABLE target_table (
87
87
  id BIGINT,
88
88
  col1 STRING,
@@ -90,7 +90,7 @@ CREATE TABLE target_table (
90
90
  event_time TIMESTAMP
91
91
  );
92
92
 
93
- -- 3. 定时调度 MERGE INTO 消费 Stream
93
+ -- 3. Scheduled MERGE INTO to consume Stream
94
94
  MERGE INTO target_table t
95
95
  USING (
96
96
  SELECT id, col1, col2, event_time,
@@ -105,10 +105,10 @@ WHEN NOT MATCHED AND s.op = 'UPSERT' THEN INSERT
105
105
 
106
106
  ---
107
107
 
108
- ## 实时报表物化
108
+ ## Real-time Report Materialization
109
109
 
110
110
  ```sql
111
- -- 每小时刷新销售汇总,供 BI 工具直接查询
111
+ -- Refresh hourly sales summary for direct BI tool queries
112
112
  CREATE DYNAMIC TABLE IF NOT EXISTS rpt.sales_hourly
113
113
  REFRESH INTERVAL 60 MINUTE vcluster default
114
114
  AS