@clickzetta/cz-cli-darwin-x64 0.3.89 → 0.3.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +169 -169
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +126 -126
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +25 -25
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +48 -48
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +51 -51
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +59 -59
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +8 -7
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +99 -99
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +188 -188
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +117 -117
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +29 -29
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +80 -79
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/SKILL.md +15 -15
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-column-validation-rules.md +61 -61
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-conversion-rules.md +100 -100
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-placeholder-rules.md +64 -64
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-refresh-rules.md +32 -32
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-self-reference-rules.md +21 -21
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-workflow.md +71 -71
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +203 -202
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +62 -62
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +34 -34
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +61 -61
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +41 -41
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +103 -101
- package/package.json +1 -1
|
@@ -1,79 +1,79 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Dimension Table JOIN Scenarios — Detailed Guide
|
|
2
2
|
|
|
3
|
-
##
|
|
3
|
+
## Core Mechanism
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
5
|
+
After marking a table as a dimension table, the incremental engine treats that table's change data as **empty**. That is:
|
|
6
|
+
- Any data changes (INSERT/UPDATE/DELETE) to the dimension table **do not trigger incremental computation**
|
|
7
|
+
- During incremental computation, the dimension table always reads its **latest full data**
|
|
8
|
+
- Only changes in non-dimension tables (fact tables) drive incremental refresh
|
|
9
9
|
|
|
10
|
-
##
|
|
10
|
+
## Configuration
|
|
11
11
|
|
|
12
12
|
```sql
|
|
13
|
-
--
|
|
13
|
+
-- Method 1: DT table properties (recommended; follows DT definition)
|
|
14
14
|
CREATE DYNAMIC TABLE my_dt
|
|
15
15
|
TBLPROPERTIES('mv_const_tables'='dim_table1,dim_table2')
|
|
16
16
|
AS SELECT ...;
|
|
17
17
|
|
|
18
|
-
--
|
|
18
|
+
-- Method 2: Session configuration (set before REFRESH; flexible and dynamically adjustable)
|
|
19
19
|
set CZ_OPTIMIZER_INCREMENTAL_DIMENSION_TABLES=dim_table1:dim_table2
|
|
20
20
|
```
|
|
21
21
|
|
|
22
|
-
##
|
|
22
|
+
## Incremental Behavior Under Each JOIN Type
|
|
23
23
|
|
|
24
|
-
### A LEFT JOIN B
|
|
24
|
+
### A LEFT JOIN B (B is dimension table)
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
This is the most common dimension table JOIN scenario.
|
|
27
27
|
|
|
28
|
-
**Case 1
|
|
28
|
+
**Case 1: A has incremental data, B has no changes**
|
|
29
29
|
```
|
|
30
|
-
|
|
30
|
+
Incremental plan: A's change data LEFT JOIN B's full data
|
|
31
31
|
```
|
|
32
|
-
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
- ✅
|
|
32
|
+
- New A rows LEFT JOIN with B's latest data
|
|
33
|
+
- If JOIN matches → output complete row
|
|
34
|
+
- If no match → B side outputs NULL
|
|
35
|
+
- ✅ Result is correct
|
|
36
36
|
|
|
37
|
-
**Case 2
|
|
37
|
+
**Case 2: B has data changes, A has no changes**
|
|
38
38
|
```
|
|
39
|
-
|
|
39
|
+
Incremental plan: no computation triggered (change data is empty)
|
|
40
40
|
```
|
|
41
|
-
- B
|
|
42
|
-
-
|
|
43
|
-
-
|
|
44
|
-
- ⚠️
|
|
41
|
+
- B's changes are completely ignored
|
|
42
|
+
- Previously output `(xxx, NULL)` rows (where A didn't match B) will not be corrected to `(xxx, yyy)`
|
|
43
|
+
- Previously output rows with old B data will not be updated to new values
|
|
44
|
+
- ⚠️ Result differs from full recomputation, but this is **expected behavior**
|
|
45
45
|
|
|
46
|
-
**Case 3
|
|
46
|
+
**Case 3: Both A and B have changes**
|
|
47
47
|
```
|
|
48
|
-
|
|
48
|
+
Incremental plan: A's change data LEFT JOIN B's full data
|
|
49
49
|
```
|
|
50
|
-
-
|
|
51
|
-
-
|
|
52
|
-
-
|
|
53
|
-
- ⚠️
|
|
50
|
+
- Only A's incremental data is processed; B's changes are ignored
|
|
51
|
+
- New A rows will JOIN to B's latest data
|
|
52
|
+
- But existing A rows will not be updated due to B's changes
|
|
53
|
+
- ⚠️ New and old data may be inconsistent
|
|
54
54
|
|
|
55
|
-
### A INNER JOIN B
|
|
55
|
+
### A INNER JOIN B (B is dimension table)
|
|
56
56
|
|
|
57
|
-
**Case 1
|
|
57
|
+
**Case 1: A has incremental data, B has no changes**
|
|
58
58
|
```
|
|
59
|
-
|
|
59
|
+
Incremental plan: A's change data INNER JOIN B's full data
|
|
60
60
|
```
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
- ✅
|
|
61
|
+
- New A rows INNER JOIN with B
|
|
62
|
+
- A rows that don't match are discarded
|
|
63
|
+
- ✅ Result is correct
|
|
64
64
|
|
|
65
|
-
**Case 2
|
|
65
|
+
**Case 2: B has data changes, A has no changes**
|
|
66
66
|
```
|
|
67
|
-
|
|
67
|
+
Incremental plan: no computation triggered
|
|
68
68
|
```
|
|
69
|
-
- B
|
|
70
|
-
- B
|
|
71
|
-
- ⚠️
|
|
69
|
+
- B adds data that can match existing A rows → no new results are produced
|
|
70
|
+
- B deletes data that matched existing A rows → already-output results are not retracted
|
|
71
|
+
- ⚠️ Result differs from full recomputation
|
|
72
72
|
|
|
73
|
-
###
|
|
73
|
+
### Dimension Tables in Multi-table JOINs
|
|
74
74
|
|
|
75
75
|
```sql
|
|
76
|
-
-- t2, t3
|
|
76
|
+
-- t2, t3 are both dimension tables
|
|
77
77
|
CREATE DYNAMIC TABLE dt
|
|
78
78
|
TBLPROPERTIES('mv_const_tables'='t2,t3')
|
|
79
79
|
AS
|
|
@@ -83,171 +83,171 @@ LEFT JOIN t2 ON t1.id = t2.id
|
|
|
83
83
|
LEFT JOIN t3 ON t1.id = t3.id;
|
|
84
84
|
```
|
|
85
85
|
|
|
86
|
-
-
|
|
87
|
-
- t2
|
|
88
|
-
-
|
|
86
|
+
- Only t1's changes trigger incremental computation
|
|
87
|
+
- Changes to t2 and t3 are both ignored
|
|
88
|
+
- Incremental plan: t1's change data LEFT JOIN t2's full data LEFT JOIN t3's full data
|
|
89
89
|
|
|
90
|
-
##
|
|
90
|
+
## Scenarios Suitable for Dimension Tables
|
|
91
91
|
|
|
92
|
-
### ✅
|
|
92
|
+
### ✅ Recommended Scenarios
|
|
93
93
|
|
|
94
|
-
1.
|
|
95
|
-
-
|
|
96
|
-
-
|
|
94
|
+
1. **Lookup/dictionary table JOINs**
|
|
95
|
+
- E.g., region code tables, product category tables, status code mapping tables
|
|
96
|
+
- Characteristics: small data volume, rarely changes, even if it changes it doesn't affect historical analysis
|
|
97
97
|
```sql
|
|
98
|
-
--
|
|
98
|
+
-- Region code table almost never changes
|
|
99
99
|
TBLPROPERTIES('mv_const_tables'='dim_region')
|
|
100
100
|
```
|
|
101
101
|
|
|
102
|
-
2. **T+1
|
|
103
|
-
-
|
|
104
|
-
-
|
|
102
|
+
2. **T+1 dimension table + real-time fact table**
|
|
103
|
+
- Dimension table updates in batch once per day; fact table writes continuously
|
|
104
|
+
- Between two dimension table updates, the dimension table can be treated as unchanged
|
|
105
105
|
```sql
|
|
106
|
-
--
|
|
106
|
+
-- User profile table updates daily; order table writes in real-time
|
|
107
107
|
TBLPROPERTIES('mv_const_tables'='dim_user_profile')
|
|
108
108
|
```
|
|
109
109
|
|
|
110
|
-
3.
|
|
111
|
-
-
|
|
112
|
-
-
|
|
110
|
+
3. **Configuration table JOINs**
|
|
111
|
+
- E.g., business rule configs, threshold configs, weight configs
|
|
112
|
+
- Very low change frequency; after changes, a manual full refresh can correct data
|
|
113
113
|
```sql
|
|
114
114
|
TBLPROPERTIES('mv_const_tables'='config_rules')
|
|
115
115
|
```
|
|
116
116
|
|
|
117
|
-
4.
|
|
118
|
-
-
|
|
119
|
-
-
|
|
117
|
+
4. **Large fact table JOIN small dimension table, with low real-time requirements for dimension table changes**
|
|
118
|
+
- Core goal is incremental performance on the fact table
|
|
119
|
+
- Brief inconsistency after occasional dimension table changes is acceptable
|
|
120
120
|
```sql
|
|
121
|
-
--
|
|
121
|
+
-- Product info table occasionally updates; order table writes continuously
|
|
122
122
|
TBLPROPERTIES('mv_const_tables'='dim_product')
|
|
123
123
|
```
|
|
124
124
|
|
|
125
|
-
5.
|
|
126
|
-
-
|
|
127
|
-
-
|
|
125
|
+
5. **External tables that don't support time travel as the right side of a JOIN**
|
|
126
|
+
- External tables cannot provide change data; marking as dimension table enables normal incremental computation
|
|
127
|
+
- The incremental engine reads the latest snapshot of the external table
|
|
128
128
|
```sql
|
|
129
|
-
--
|
|
129
|
+
-- External MySQL table doesn't support time travel
|
|
130
130
|
TBLPROPERTIES('mv_const_tables'='external_mysql_table')
|
|
131
131
|
```
|
|
132
132
|
|
|
133
|
-
### ❌
|
|
133
|
+
### ❌ Not Recommended Scenarios
|
|
134
134
|
|
|
135
|
-
1.
|
|
136
|
-
-
|
|
137
|
-
-
|
|
135
|
+
1. **Dimension table updates frequently and real-time consistency is required**
|
|
136
|
+
- E.g., user status table updates every minute, and downstream reports require real-time reflection of the latest status
|
|
137
|
+
- In this case, do not mark as dimension table; let both sides participate in incremental computation
|
|
138
138
|
|
|
139
|
-
2.
|
|
140
|
-
-
|
|
141
|
-
-
|
|
142
|
-
-
|
|
139
|
+
2. **Dimension table changes affect the correctness of aggregation results**
|
|
140
|
+
- E.g., after a price table update, historical order amounts should use the old price
|
|
141
|
+
- But with dimension table marking, new fact rows will JOIN to the new price, while old fact rows keep the old price
|
|
142
|
+
- If business requires all rows to use the latest price uniformly, do not use dimension table
|
|
143
143
|
|
|
144
|
-
3.
|
|
145
|
-
-
|
|
146
|
-
-
|
|
144
|
+
3. **Dimension table has large data volume and changes frequently**
|
|
145
|
+
- The optimization benefit of dimension table marking comes from skipping change data computation
|
|
146
|
+
- If the dimension table itself is large and changes frequently, consider letting it participate in incremental normally
|
|
147
147
|
|
|
148
|
-
##
|
|
148
|
+
## Data Correction After Dimension Table Changes
|
|
149
149
|
|
|
150
|
-
|
|
150
|
+
Since dimension table changes do not trigger incremental computation, when a dimension table undergoes an important change (e.g., incorrect data was corrected, mapping relationships were updated), existing results in the DT will not be automatically updated. **If data correction is needed, a full refresh must be executed.**
|
|
151
151
|
|
|
152
152
|
```sql
|
|
153
|
-
--
|
|
153
|
+
-- Force full refresh (recommended)
|
|
154
154
|
set cz.optimizer.incremental.force.full.refresh=true
|
|
155
155
|
REFRESH DYNAMIC TABLE my_dt;
|
|
156
|
-
--
|
|
156
|
+
-- Remember to turn it off after refresh; otherwise every subsequent refresh will be full
|
|
157
157
|
set cz.optimizer.incremental.force.full.refresh=false
|
|
158
158
|
|
|
159
|
-
--
|
|
159
|
+
-- For partitioned tables, you can also do a full refresh of only a specific partition
|
|
160
160
|
set cz.optimizer.incremental.force.full.refresh=true
|
|
161
161
|
set dt.args.ds=2025-01-01
|
|
162
162
|
REFRESH DYNAMIC TABLE my_dt PARTITION(ds = '2025-01-01');
|
|
163
163
|
set cz.optimizer.incremental.force.full.refresh=false
|
|
164
164
|
```
|
|
165
165
|
|
|
166
|
-
|
|
167
|
-
- `cz.optimizer.incremental.force.full.refresh
|
|
168
|
-
-
|
|
169
|
-
-
|
|
166
|
+
Configuration notes:
|
|
167
|
+
- `cz.optimizer.incremental.force.full.refresh`: default `false`. When set to `true`, the next REFRESH ignores incremental logic and does a full scan and recomputation of all source tables.
|
|
168
|
+
- This config is Session-level; after the refresh completes, it must be manually reset to `false`; otherwise all subsequent REFRESHes will use full mode.
|
|
169
|
+
- Backfill mode (`cz.optimizer.incremental.backfill.enabled=TRUE`) also automatically enables full refresh.
|
|
170
170
|
|
|
171
|
-
##
|
|
171
|
+
## Performance Benefits
|
|
172
172
|
|
|
173
|
-
|
|
174
|
-
-
|
|
175
|
-
-
|
|
173
|
+
Optimization effects after marking dimension tables:
|
|
174
|
+
- **Skip dimension table change data scanning**: no need to read dimension table change logs
|
|
175
|
+
- **Simplify incremental plan**: only need to JOIN fact table change data with dimension table full data; no reverse computation needed
|
|
176
176
|
|
|
177
|
-
## ⚠️
|
|
177
|
+
## ⚠️ Potential Data Inconsistency and Duplication After Enabling Dimension Tables
|
|
178
178
|
|
|
179
|
-
|
|
179
|
+
Marking dimension tables is a **tradeoff of consistency for performance**. The following are specific scenarios where problems will occur — evaluate whether the business can accept these before using.
|
|
180
180
|
|
|
181
|
-
###
|
|
181
|
+
### Scenario 1: LEFT JOIN — Dimension Table Update Causes NULL Not to Be Corrected
|
|
182
182
|
|
|
183
183
|
```sql
|
|
184
|
-
-- DT
|
|
184
|
+
-- DT definition
|
|
185
185
|
SELECT order.*, product.name
|
|
186
186
|
FROM order LEFT JOIN product ON order.pid = product.id;
|
|
187
|
-
-- product
|
|
187
|
+
-- product marked as dimension table
|
|
188
188
|
```
|
|
189
189
|
|
|
190
|
-
|
|
|
190
|
+
| Time | Event | Result in DT | Expected result from full recomputation |
|
|
191
191
|
|------|------|------------|------------------|
|
|
192
|
-
| T1 | order
|
|
193
|
-
| T2 | product
|
|
192
|
+
| T1 | order inserts (pid=100); product has no id=100 | (pid=100, name=NULL) | (pid=100, name=NULL) |
|
|
193
|
+
| T2 | product inserts id=100, name='Phone' | (pid=100, name=NULL) **unchanged** | (pid=100, name='Phone') |
|
|
194
194
|
|
|
195
|
-
|
|
195
|
+
**Reason**: product's changes don't trigger incremental computation; the NULL row output at T1 will never be corrected.
|
|
196
196
|
|
|
197
|
-
###
|
|
197
|
+
### Scenario 2: INNER JOIN — Dimension Table New Data Causes Missing Results
|
|
198
198
|
|
|
199
199
|
```sql
|
|
200
200
|
SELECT order.*, product.name
|
|
201
201
|
FROM order INNER JOIN product ON order.pid = product.id;
|
|
202
|
-
-- product
|
|
202
|
+
-- product marked as dimension table
|
|
203
203
|
```
|
|
204
204
|
|
|
205
|
-
|
|
|
205
|
+
| Time | Event | Result in DT | Expected result from full recomputation |
|
|
206
206
|
|------|------|------------|------------------|
|
|
207
|
-
| T1 | order
|
|
208
|
-
| T2 | product
|
|
207
|
+
| T1 | order inserts (pid=200); product has no id=200 | No output (INNER JOIN no match) | No output |
|
|
208
|
+
| T2 | product inserts id=200, name='Computer' | **Still no output** | (pid=200, name='Computer') |
|
|
209
209
|
|
|
210
|
-
|
|
210
|
+
**Reason**: product's new data doesn't trigger incremental; existing order rows are not re-JOINed.
|
|
211
211
|
|
|
212
|
-
###
|
|
212
|
+
### Scenario 3: Dimension Table Delete/Update Causes Stale Data to Remain
|
|
213
213
|
|
|
214
214
|
```sql
|
|
215
215
|
SELECT order.*, product.name, product.price
|
|
216
216
|
FROM order LEFT JOIN product ON order.pid = product.id;
|
|
217
|
-
-- product
|
|
217
|
+
-- product marked as dimension table
|
|
218
218
|
```
|
|
219
219
|
|
|
220
|
-
|
|
|
220
|
+
| Time | Event | Result in DT | Expected result from full recomputation |
|
|
221
221
|
|------|------|------------|------------------|
|
|
222
|
-
| T1 | order
|
|
223
|
-
| T2 | product
|
|
224
|
-
| T3 | product
|
|
222
|
+
| T1 | order inserts (pid=100); product id=100 price=99 | (pid=100, price=99) | (pid=100, price=99) |
|
|
223
|
+
| T2 | product updates id=100 price=**199** | (pid=100, price=**99**) old value remains | (pid=100, price=199) |
|
|
224
|
+
| T3 | product deletes id=100 | (pid=100, price=**99**) still remains | (pid=100, name=NULL) |
|
|
225
225
|
|
|
226
|
-
|
|
226
|
+
**Reason**: dimension table UPDATE/DELETE are both ignored; already-output rows keep old values.
|
|
227
227
|
|
|
228
|
-
###
|
|
228
|
+
### Scenario 4: Dimension Table + Aggregation Causes Inconsistent Aggregation Results
|
|
229
229
|
|
|
230
230
|
```sql
|
|
231
231
|
SELECT product.category, SUM(order.amount) as total
|
|
232
232
|
FROM order LEFT JOIN product ON order.pid = product.id
|
|
233
233
|
GROUP BY product.category;
|
|
234
|
-
-- product
|
|
234
|
+
-- product marked as dimension table
|
|
235
235
|
```
|
|
236
236
|
|
|
237
|
-
|
|
|
237
|
+
| Time | Event | Result in DT | Expected result from full recomputation |
|
|
238
238
|
|------|------|------------|------------------|
|
|
239
|
-
| T1 | order (pid=1, amount=100)
|
|
240
|
-
| T2 | product
|
|
241
|
-
| T3 | order
|
|
239
|
+
| T1 | order (pid=1, amount=100); product (id=1, category='A') | category='A', total=100 | Same |
|
|
240
|
+
| T2 | product updates id=1 category from 'A' to 'B' | category='A', total=100 **unchanged** | category='B', total=100 |
|
|
241
|
+
| T3 | order adds (pid=1, amount=200) | category='B', total=200 (new row JOINs to new category) | category='B', total=300 |
|
|
242
242
|
|
|
243
|
-
|
|
243
|
+
**Reason**: T2's category change doesn't trigger recomputation; T1's old data is still aggregated under the old category. T3's new data is aggregated under the new category. The final result has data for the same pid split across different categories, causing incorrect aggregation.
|
|
244
244
|
|
|
245
|
-
###
|
|
245
|
+
### Summary: When Results Will Be Inconsistent
|
|
246
246
|
|
|
247
|
-
|
|
|
247
|
+
| Dimension table change type | LEFT JOIN | INNER JOIN |
|
|
248
248
|
|--------------|-----------|------------|
|
|
249
|
-
|
|
|
250
|
-
|
|
|
251
|
-
|
|
|
249
|
+
| New matching row added | Old fact rows' NULL is not corrected | Old fact rows don't produce new results |
|
|
250
|
+
| Existing row updated | Old fact rows keep old values | Old fact rows keep old values |
|
|
251
|
+
| Existing row deleted | Old fact rows keep old values (won't become NULL) | Old fact rows are not retracted |
|
|
252
252
|
|
|
253
|
-
|
|
253
|
+
**Core principle**: any change to a dimension table does not affect already-output result rows. Only new fact table increments will JOIN to the dimension table's latest snapshot.
|
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
# Medallion
|
|
1
|
+
# Medallion Architecture and Table Stream Combination Patterns
|
|
2
2
|
|
|
3
|
-
## Medallion
|
|
3
|
+
## Medallion Three-layer Pipeline
|
|
4
4
|
|
|
5
5
|
```
|
|
6
|
-
Bronze
|
|
7
|
-
↓ Dynamic Table
|
|
8
|
-
Silver
|
|
9
|
-
↓ Dynamic Table
|
|
10
|
-
Gold
|
|
11
|
-
↓ BI
|
|
6
|
+
Bronze (raw data)
|
|
7
|
+
↓ Dynamic Table (cleansing, INCREMENTAL)
|
|
8
|
+
Silver (cleansed data)
|
|
9
|
+
↓ Dynamic Table (aggregation, FULL)
|
|
10
|
+
Gold (metric data)
|
|
11
|
+
↓ BI tools query directly
|
|
12
12
|
```
|
|
13
13
|
|
|
14
|
-
### Bronze → Silver
|
|
14
|
+
### Bronze → Silver (Incremental Cleansing)
|
|
15
15
|
|
|
16
16
|
```sql
|
|
17
|
-
--
|
|
17
|
+
-- Prerequisite: enable change tracking on source table
|
|
18
18
|
ALTER TABLE bronze.raw_orders SET PROPERTIES ('change_tracking' = 'true');
|
|
19
19
|
|
|
20
20
|
CREATE DYNAMIC TABLE IF NOT EXISTS silver.orders_cleaned
|
|
@@ -30,7 +30,7 @@ FROM bronze.raw_orders
|
|
|
30
30
|
WHERE order_id IS NOT NULL AND amount > 0;
|
|
31
31
|
```
|
|
32
32
|
|
|
33
|
-
### Silver → Gold
|
|
33
|
+
### Silver → Gold (Aggregated Metrics, typically FULL)
|
|
34
34
|
|
|
35
35
|
```sql
|
|
36
36
|
CREATE DYNAMIC TABLE IF NOT EXISTS gold.orders_daily_summary
|
|
@@ -48,23 +48,23 @@ GROUP BY 1, 2;
|
|
|
48
48
|
|
|
49
49
|
---
|
|
50
50
|
|
|
51
|
-
##
|
|
51
|
+
## Combined with Table Stream (Event-driven)
|
|
52
52
|
|
|
53
|
-
Table Stream
|
|
53
|
+
Table Stream captures source table changes; Dynamic Table consumes the Stream for incremental processing.
|
|
54
54
|
|
|
55
|
-
###
|
|
55
|
+
### Basic Pattern
|
|
56
56
|
|
|
57
57
|
```sql
|
|
58
|
-
-- 1.
|
|
58
|
+
-- 1. Enable change tracking on source table
|
|
59
59
|
ALTER TABLE bronze.raw_orders SET PROPERTIES ('change_tracking' = 'true');
|
|
60
60
|
|
|
61
|
-
-- 2.
|
|
61
|
+
-- 2. Create Table Stream
|
|
62
62
|
CREATE TABLE STREAM bronze.orders_stream
|
|
63
63
|
ON TABLE bronze.raw_orders
|
|
64
64
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
|
|
65
65
|
|
|
66
|
-
-- 3. Dynamic Table
|
|
67
|
-
--
|
|
66
|
+
-- 3. Dynamic Table consumes Stream
|
|
67
|
+
-- Note: when Stream is used as DT source, each refresh consumes the offset
|
|
68
68
|
CREATE DYNAMIC TABLE IF NOT EXISTS silver.orders_incremental
|
|
69
69
|
REFRESH INTERVAL 5 MINUTE vcluster default
|
|
70
70
|
AS
|
|
@@ -73,16 +73,16 @@ FROM bronze.orders_stream
|
|
|
73
73
|
WHERE __change_type IN ('INSERT', 'UPDATE_AFTER');
|
|
74
74
|
```
|
|
75
75
|
|
|
76
|
-
### MERGE INTO + Table Stream
|
|
76
|
+
### MERGE INTO + Table Stream (Alternative to Non-partitioned DT Deduplication)
|
|
77
77
|
|
|
78
|
-
|
|
78
|
+
When deduplication by primary key is needed and the source table has continuous writes, MERGE INTO is recommended over Dynamic Table:
|
|
79
79
|
|
|
80
80
|
```sql
|
|
81
|
-
-- 1.
|
|
81
|
+
-- 1. Create Table Stream
|
|
82
82
|
CREATE TABLE STREAM source_stream ON TABLE source_table
|
|
83
83
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD', 'SHOW_INITIAL_ROWS' = 'TRUE');
|
|
84
84
|
|
|
85
|
-
-- 2.
|
|
85
|
+
-- 2. Create target table
|
|
86
86
|
CREATE TABLE target_table (
|
|
87
87
|
id BIGINT,
|
|
88
88
|
col1 STRING,
|
|
@@ -90,7 +90,7 @@ CREATE TABLE target_table (
|
|
|
90
90
|
event_time TIMESTAMP
|
|
91
91
|
);
|
|
92
92
|
|
|
93
|
-
-- 3.
|
|
93
|
+
-- 3. Scheduled MERGE INTO to consume Stream
|
|
94
94
|
MERGE INTO target_table t
|
|
95
95
|
USING (
|
|
96
96
|
SELECT id, col1, col2, event_time,
|
|
@@ -105,10 +105,10 @@ WHEN NOT MATCHED AND s.op = 'UPSERT' THEN INSERT
|
|
|
105
105
|
|
|
106
106
|
---
|
|
107
107
|
|
|
108
|
-
##
|
|
108
|
+
## Real-time Report Materialization
|
|
109
109
|
|
|
110
110
|
```sql
|
|
111
|
-
--
|
|
111
|
+
-- Refresh hourly sales summary for direct BI tool queries
|
|
112
112
|
CREATE DYNAMIC TABLE IF NOT EXISTS rpt.sales_hourly
|
|
113
113
|
REFRESH INTERVAL 60 MINUTE vcluster default
|
|
114
114
|
AS
|