@clickzetta/cz-cli-darwin-x64 0.3.87-dev.20260528223948 → 0.3.88
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +169 -169
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +126 -126
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +25 -25
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +48 -48
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +51 -51
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +59 -59
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +8 -7
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +99 -99
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +188 -188
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +117 -117
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +29 -29
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +80 -79
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/SKILL.md +15 -15
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-column-validation-rules.md +61 -61
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-conversion-rules.md +100 -100
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-placeholder-rules.md +64 -64
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-refresh-rules.md +32 -32
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-self-reference-rules.md +21 -21
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-workflow.md +71 -71
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +203 -202
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +62 -62
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +34 -34
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +61 -61
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +41 -41
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +103 -101
- package/package.json +1 -1
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
# Table Stream
|
|
1
|
+
# Table Stream SQL Reference
|
|
2
2
|
|
|
3
|
-
> **⚠️ ClickZetta
|
|
4
|
-
> -
|
|
5
|
-
> -
|
|
6
|
-
> - UPDATE
|
|
7
|
-
> -
|
|
3
|
+
> **⚠️ ClickZetta-specific syntax**
|
|
4
|
+
> - Creation syntax is `CREATE TABLE STREAM`, with parameters inside `WITH PROPERTIES (...)`
|
|
5
|
+
> - Metadata field is `__change_type` (double underscore), values: `INSERT` / `UPDATE_BEFORE` / `UPDATE_AFTER` / `DELETE`
|
|
6
|
+
> - UPDATE produces two records: `UPDATE_BEFORE` (before update) and `UPDATE_AFTER` (after update)
|
|
7
|
+
> - Typically only `UPDATE_AFTER` and `INSERT` are needed; `UPDATE_BEFORE` can be ignored
|
|
8
8
|
|
|
9
|
-
Table Stream
|
|
9
|
+
Table Stream captures change data (INSERT / UPDATE / DELETE) from a source table and is the core object for building CDC pipelines. It is typically consumed by Dynamic Tables or SQL tasks.
|
|
10
10
|
|
|
11
11
|
## CREATE TABLE STREAM
|
|
12
12
|
|
|
@@ -21,48 +21,48 @@ CREATE [ OR REPLACE ] TABLE STREAM [ IF NOT EXISTS ] <stream_name>
|
|
|
21
21
|
);
|
|
22
22
|
```
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
- `TABLE_STREAM_MODE = STANDARD
|
|
26
|
-
- `TABLE_STREAM_MODE = APPEND_ONLY
|
|
27
|
-
- `SHOW_INITIAL_ROWS = TRUE
|
|
28
|
-
- `TIMESTAMP AS OF
|
|
24
|
+
**Key parameters:**
|
|
25
|
+
- `TABLE_STREAM_MODE = STANDARD` (default): captures all changes — INSERT, UPDATE, DELETE — each row includes a `__change_type` field (`INSERT` / `UPDATE_BEFORE` / `UPDATE_AFTER` / `DELETE`)
|
|
26
|
+
- `TABLE_STREAM_MODE = APPEND_ONLY`: captures INSERT only; better performance, suitable for append-only source tables
|
|
27
|
+
- `SHOW_INITIAL_ROWS = TRUE`: first consumption returns rows already in the table when the Stream was created; `FALSE` (default) returns only new changes after Stream creation
|
|
28
|
+
- `TIMESTAMP AS OF`: specifies the point in time from which the Stream starts capturing changes
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
**Examples:**
|
|
31
31
|
```sql
|
|
32
|
-
--
|
|
32
|
+
-- Create a standard stream on a regular table (captures all changes; change_tracking must be enabled first)
|
|
33
33
|
ALTER TABLE ods.orders SET PROPERTIES ('change_tracking' = 'true');
|
|
34
34
|
|
|
35
35
|
CREATE TABLE STREAM orders_stream
|
|
36
36
|
ON TABLE ods.orders
|
|
37
37
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
|
|
38
38
|
|
|
39
|
-
--
|
|
39
|
+
-- Append-only stream
|
|
40
40
|
CREATE TABLE STREAM events_stream
|
|
41
41
|
ON TABLE dw.events
|
|
42
|
-
COMMENT '
|
|
42
|
+
COMMENT 'Event stream, append only'
|
|
43
43
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'APPEND_ONLY');
|
|
44
44
|
|
|
45
|
-
--
|
|
45
|
+
-- Start capturing from a specific timestamp
|
|
46
46
|
CREATE TABLE STREAM orders_stream_from_ts
|
|
47
47
|
ON TABLE ods.orders
|
|
48
48
|
TIMESTAMP AS OF '2024-01-01 00:00:00'
|
|
49
49
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD', 'SHOW_INITIAL_ROWS' = 'TRUE');
|
|
50
50
|
```
|
|
51
51
|
|
|
52
|
-
##
|
|
52
|
+
## Consuming a Table Stream
|
|
53
53
|
|
|
54
|
-
Table Stream
|
|
54
|
+
The Table Stream offset advances through DML operations. **SELECT alone does not advance the offset** — you can query repeatedly for preview. Executing DML (INSERT INTO / MERGE INTO / UPDATE / DELETE) consumes the data and advances the offset.
|
|
55
55
|
|
|
56
56
|
```sql
|
|
57
|
-
--
|
|
57
|
+
-- View current unconsumed change data (does not advance offset)
|
|
58
58
|
SELECT * FROM orders_stream;
|
|
59
59
|
|
|
60
|
-
--
|
|
60
|
+
-- System fields included in change data:
|
|
61
61
|
-- __change_type: INSERT | UPDATE_BEFORE | UPDATE_AFTER | DELETE
|
|
62
|
-
-- __commit_version:
|
|
63
|
-
-- __commit_timestamp:
|
|
62
|
+
-- __commit_version: change version number
|
|
63
|
+
-- __commit_timestamp: time the change occurred
|
|
64
64
|
|
|
65
|
-
--
|
|
65
|
+
-- Typical usage: MERGE change data into target table (filter out UPDATE_BEFORE)
|
|
66
66
|
MERGE INTO dw.orders_dim AS target
|
|
67
67
|
USING (
|
|
68
68
|
SELECT * FROM orders_stream
|
|
@@ -73,7 +73,7 @@ WHEN MATCHED AND src.__change_type = 'UPDATE_AFTER' THEN UPDATE SET target.statu
|
|
|
73
73
|
WHEN MATCHED AND src.__change_type = 'DELETE' THEN DELETE
|
|
74
74
|
WHEN NOT MATCHED AND src.__change_type IN ('INSERT', 'UPDATE_AFTER') THEN INSERT (order_id, status, amount) VALUES (src.order_id, src.status, src.amount);
|
|
75
75
|
|
|
76
|
-
--
|
|
76
|
+
-- Consume automatically with a Dynamic Table (recommended)
|
|
77
77
|
CREATE OR REPLACE DYNAMIC TABLE dw.orders_processed
|
|
78
78
|
REFRESH INTERVAL 1 MINUTE vcluster default
|
|
79
79
|
AS
|
|
@@ -91,35 +91,35 @@ DROP TABLE STREAM [ IF EXISTS ] <stream_name>;
|
|
|
91
91
|
## SHOW / DESC
|
|
92
92
|
|
|
93
93
|
```sql
|
|
94
|
-
--
|
|
94
|
+
-- List all Table Streams in the current schema
|
|
95
95
|
SHOW TABLE STREAMS;
|
|
96
96
|
|
|
97
|
-
--
|
|
97
|
+
-- List Table Streams in a specific schema
|
|
98
98
|
SHOW TABLE STREAMS IN <schema_name>;
|
|
99
99
|
|
|
100
|
-
--
|
|
100
|
+
-- Filter by name
|
|
101
101
|
SHOW TABLE STREAMS LIKE 'orders%';
|
|
102
102
|
|
|
103
|
-
--
|
|
103
|
+
-- View Table Stream details (source table, mode, creation time)
|
|
104
104
|
DESC TABLE STREAM <stream_name>;
|
|
105
105
|
```
|
|
106
106
|
|
|
107
|
-
##
|
|
107
|
+
## Notes
|
|
108
108
|
|
|
109
|
-
-
|
|
110
|
-
- DML
|
|
111
|
-
- ⚠️
|
|
112
|
-
-
|
|
113
|
-
- `STANDARD`
|
|
114
|
-
-
|
|
115
|
-
-
|
|
109
|
+
- SELECT alone does not advance the offset; you can query repeatedly for preview
|
|
110
|
+
- DML operations (INSERT INTO / MERGE INTO / UPDATE / DELETE) advance the offset
|
|
111
|
+
- ⚠️ Even if a DML has a WHERE clause that filters some rows, **the offset advances for all rows**
|
|
112
|
+
- If not consumed for a long time, data will be lost once the source table's `data_retention_days` is exceeded
|
|
113
|
+
- In `STANDARD` mode, UPDATE produces two records: `UPDATE_BEFORE` (before update) and `UPDATE_AFTER` (after update)
|
|
114
|
+
- When consuming, typically filter `__change_type != 'UPDATE_BEFORE'` to ignore old values
|
|
115
|
+
- The source table must have `change_tracking` enabled first: `ALTER TABLE name SET PROPERTIES ('change_tracking' = 'true')`
|
|
116
116
|
|
|
117
|
-
##
|
|
117
|
+
## Reference Documentation
|
|
118
118
|
|
|
119
119
|
- [CREATE TABLE STREAM](https://www.yunqi.tech/documents/create-table-stream)
|
|
120
120
|
- [DESC TABLE STREAM](https://www.yunqi.tech/documents/desc-table-stream)
|
|
121
121
|
- [SHOW TABLE STREAMS](https://www.yunqi.tech/documents/show-table-streams)
|
|
122
122
|
- [DROP TABLE STREAM](https://www.yunqi.tech/documents/drop-table-stream)
|
|
123
|
-
- [
|
|
124
|
-
- [Table Stream
|
|
125
|
-
- [Table Stream
|
|
123
|
+
- [Table Stream Overview](https://www.yunqi.tech/documents/tablestream_summary)
|
|
124
|
+
- [Table Stream Change Data Capture](https://www.yunqi.tech/documents/table_stream)
|
|
125
|
+
- [Table Stream Best Practices](https://www.yunqi.tech/documents/lakehouse-table-stream-best-practices)
|
|
@@ -1,89 +1,91 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: clickzetta-table-stream-pipeline
|
|
3
3
|
description: |
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
"Table Stream
|
|
7
|
-
|
|
4
|
+
Build and manage ClickZetta Table Stream change data capture pipelines, covering the
|
|
5
|
+
end-to-end workflow from source table configuration, Stream creation, and data consumption
|
|
6
|
+
to incremental ETL. Trigger when the user says "create Table Stream", "Table Stream CDC",
|
|
7
|
+
"Table Stream pipeline", "Table Stream incremental consumption", or "Stream consumption".
|
|
8
|
+
Includes change tracking enablement, mode selection, offset management, metadata field usage,
|
|
9
|
+
and idempotent consumption — all ClickZetta-specific logic.
|
|
8
10
|
Keywords: table stream, CDC, change capture, incremental ETL, stream
|
|
9
11
|
---
|
|
10
12
|
|
|
11
|
-
# Table Stream
|
|
13
|
+
# Table Stream Change Data Capture Workflow
|
|
12
14
|
|
|
13
|
-
##
|
|
15
|
+
## Instructions
|
|
14
16
|
|
|
15
|
-
###
|
|
16
|
-
|
|
17
|
+
### Step 1: Enable Change Tracking on the Source Table (Required Prerequisite)
|
|
18
|
+
Execute SQL to enable `change_tracking` on the source table:
|
|
17
19
|
```sql
|
|
18
20
|
ALTER TABLE <source_table> SET PROPERTIES ('change_tracking' = 'true');
|
|
19
21
|
```
|
|
20
|
-
-
|
|
21
|
-
-
|
|
22
|
+
- This is a mandatory prerequisite. Without it, the Stream cannot correctly capture changes.
|
|
23
|
+
- Verify the property took effect (two methods):
|
|
22
24
|
```sql
|
|
23
|
-
--
|
|
25
|
+
-- Method 1: DESC EXTENDED to view properties
|
|
24
26
|
DESC EXTENDED <source_table>;
|
|
25
27
|
|
|
26
|
-
--
|
|
28
|
+
-- Method 2: Query information_schema
|
|
27
29
|
SELECT table_name, properties FROM information_schema.tables WHERE table_name = '<source_table>';
|
|
28
30
|
```
|
|
29
31
|
|
|
30
|
-
###
|
|
31
|
-
|
|
32
|
+
### Step 2: Create a Table Stream
|
|
33
|
+
Execute SQL to create the Stream:
|
|
32
34
|
```sql
|
|
33
35
|
CREATE [ OR REPLACE ] TABLE STREAM <stream_name>
|
|
34
36
|
ON TABLE <source_table>
|
|
35
37
|
[ TIMESTAMP AS OF '<timestamp>' ]
|
|
36
|
-
[ COMMENT '
|
|
38
|
+
[ COMMENT '<description>' ]
|
|
37
39
|
WITH PROPERTIES (
|
|
38
40
|
'TABLE_STREAM_MODE' = 'STANDARD | APPEND_ONLY',
|
|
39
41
|
'SHOW_INITIAL_ROWS' = 'TRUE | FALSE'
|
|
40
42
|
);
|
|
41
43
|
```
|
|
42
|
-
|
|
43
|
-
- **STANDARD
|
|
44
|
-
-
|
|
45
|
-
- **APPEND_ONLY
|
|
46
|
-
-
|
|
47
|
-
- **SHOW_INITIAL_ROWS = TRUE
|
|
48
|
-
- **SHOW_INITIAL_ROWS = FALSE
|
|
49
|
-
-
|
|
44
|
+
Key parameter selection:
|
|
45
|
+
- **STANDARD mode**: captures INSERT/UPDATE/DELETE, reflecting the current state of the table (delta changes) → suitable for data sync, incremental ETL
|
|
46
|
+
- Delta changes refer to the net change between two transaction timestamps. For example: INSERT then DELETE the same row → delta is empty; INSERT then UPDATE → delta is one new row (final state)
|
|
47
|
+
- **APPEND_ONLY mode**: captures INSERT only, retaining all historical insert records → suitable for auditing, historical record retention
|
|
48
|
+
- Even if a row is later DELETEd, APPEND_ONLY mode retains the INSERT record for that row
|
|
49
|
+
- **SHOW_INITIAL_ROWS = TRUE**: first consumption returns rows already in the table when the Stream was created
|
|
50
|
+
- **SHOW_INITIAL_ROWS = FALSE** (default): first consumption returns only new changes after Stream creation
|
|
51
|
+
- Optional: specify a starting timestamp
|
|
50
52
|
```sql
|
|
51
|
-
-- TIMESTAMP AS OF
|
|
52
|
-
--
|
|
53
|
+
-- TIMESTAMP AS OF specifies the starting read offset for the Stream
|
|
54
|
+
-- Note: this feature may be unstable in some scenarios; prefer the default behavior (start from creation time)
|
|
53
55
|
CREATE TABLE STREAM <stream_name>
|
|
54
56
|
ON TABLE <source_table>
|
|
55
57
|
TIMESTAMP AS OF '<timestamp>'
|
|
56
58
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
|
|
57
59
|
```
|
|
58
60
|
|
|
59
|
-
###
|
|
60
|
-
|
|
61
|
-
-
|
|
62
|
-
-
|
|
61
|
+
### Step 3: Prepare the Target Table
|
|
62
|
+
Create a target table with a structure compatible with the source table:
|
|
63
|
+
- The target table column definitions must include the business columns from the source table
|
|
64
|
+
- Recommended: add extra metadata columns (e.g., sync_version, sync_timestamp) for tracking
|
|
63
65
|
|
|
64
|
-
###
|
|
65
|
-
|
|
66
|
+
### Step 4: Query Stream Data (Preview — Does Not Advance Offset)
|
|
67
|
+
Execute SELECT to preview change data in the Stream:
|
|
66
68
|
```sql
|
|
67
69
|
SELECT *, __change_type, __commit_version, __commit_timestamp
|
|
68
70
|
FROM <stream_name>;
|
|
69
71
|
```
|
|
70
|
-
-
|
|
71
|
-
-
|
|
72
|
-
- **UPDATE
|
|
73
|
-
- `UPDATE_BEFORE
|
|
74
|
-
- `UPDATE_AFTER
|
|
75
|
-
-
|
|
72
|
+
- SELECT alone does not advance the offset
|
|
73
|
+
- Metadata fields: `__change_type` (values: `INSERT` / `UPDATE_BEFORE` / `UPDATE_AFTER` / `DELETE`), `__commit_version`, `__commit_timestamp`
|
|
74
|
+
- **UPDATE handling**: an UPDATE operation produces two records:
|
|
75
|
+
- `UPDATE_BEFORE`: the old value before the update (typically ignored during consumption)
|
|
76
|
+
- `UPDATE_AFTER`: the new value after the update (used when writing to the target table)
|
|
77
|
+
- Always filter on `__change_type` during consumption to avoid writing `UPDATE_BEFORE` old values into the target table
|
|
76
78
|
|
|
77
|
-
###
|
|
78
|
-
|
|
79
|
+
### Step 5: Consume Stream Data (Advances Offset)
|
|
80
|
+
Execute a DML operation to consume data:
|
|
79
81
|
|
|
80
|
-
####
|
|
82
|
+
#### Method A: Full Consumption (INSERT INTO)
|
|
81
83
|
```sql
|
|
82
84
|
INSERT INTO <target_table>
|
|
83
85
|
SELECT <columns> FROM <stream_name>;
|
|
84
86
|
```
|
|
85
87
|
|
|
86
|
-
####
|
|
88
|
+
#### Method B: Idempotent Consumption (MERGE — recommended)
|
|
87
89
|
```sql
|
|
88
90
|
MERGE INTO <target_table> t
|
|
89
91
|
USING (SELECT * FROM <stream_name> WHERE __change_type != 'UPDATE_BEFORE') s
|
|
@@ -92,68 +94,68 @@ WHEN MATCHED AND s.__change_type IN ('INSERT', 'UPDATE_AFTER') THEN UPDATE SET t
|
|
|
92
94
|
WHEN MATCHED AND s.__change_type = 'DELETE' THEN DELETE
|
|
93
95
|
WHEN NOT MATCHED AND s.__change_type = 'INSERT' THEN INSERT (<columns>) VALUES (s.<columns>);
|
|
94
96
|
```
|
|
95
|
-
- DML
|
|
96
|
-
- ⚠️
|
|
97
|
-
-
|
|
98
|
-
-
|
|
99
|
-
- ⚠️ **MERGE
|
|
100
|
-
|
|
101
|
-
###
|
|
102
|
-
|
|
97
|
+
- DML operations (INSERT/UPDATE/MERGE) advance the offset
|
|
98
|
+
- ⚠️ Even with a WHERE clause that filters some rows, **the offset advances for all rows** (not just the matched ones)
|
|
99
|
+
- Use MERGE for idempotency to avoid duplicate data from repeated consumption
|
|
100
|
+
- Filter out `UPDATE_BEFORE` in the USING subquery to prevent old values from interfering with MERGE logic
|
|
101
|
+
- ⚠️ **MERGE clause ordering requirement**: when multiple `WHEN MATCHED` clauses are present, **UPDATE must come before DELETE**, otherwise an error occurs (error message: `update statement must be before delete statement`)
|
|
102
|
+
|
|
103
|
+
### Step 6: Verify Consumption Status
|
|
104
|
+
Execute a query to confirm consumption is complete:
|
|
103
105
|
```sql
|
|
104
106
|
SELECT COUNT(*) FROM <stream_name>;
|
|
105
107
|
```
|
|
106
|
-
-
|
|
107
|
-
-
|
|
108
|
+
- After successful consumption, COUNT should be 0 or contain only new changes
|
|
109
|
+
- Record the last consumed `__commit_version` for failure recovery
|
|
108
110
|
|
|
109
|
-
## Offset
|
|
111
|
+
## Offset Advancement Rules
|
|
110
112
|
|
|
111
|
-
|
|
|
113
|
+
| Operation | Advances offset? | Notes |
|
|
112
114
|
|------|----------------|------|
|
|
113
|
-
| `SELECT * FROM stream` |
|
|
114
|
-
| `INSERT INTO target SELECT ... FROM stream` |
|
|
115
|
-
| `MERGE INTO target USING stream ...` |
|
|
116
|
-
| `UPDATE target SET ... FROM stream` |
|
|
117
|
-
| `DELETE FROM target USING stream` |
|
|
118
|
-
|
|
|
115
|
+
| `SELECT * FROM stream` | No | Preview only; can be queried repeatedly |
|
|
116
|
+
| `INSERT INTO target SELECT ... FROM stream` | Yes | Consumes data |
|
|
117
|
+
| `MERGE INTO target USING stream ...` | Yes | Consumes data (recommended) |
|
|
118
|
+
| `UPDATE target SET ... FROM stream` | Yes | Consumes data |
|
|
119
|
+
| `DELETE FROM target USING stream` | Yes | Consumes data |
|
|
120
|
+
| DML with WHERE clause | Yes (all rows) | Even if WHERE filters some rows, offset advances for all rows |
|
|
119
121
|
|
|
120
|
-
> ⚠️
|
|
122
|
+
> ⚠️ **Key note**: offset advancement is all-or-nothing. Once a DML consumes the Stream, the offset advances for all change records — partial consumption is not possible. If the DML fails (e.g., target table does not exist), the offset does not advance.
|
|
121
123
|
|
|
122
|
-
##
|
|
124
|
+
## Mode Selection Quick Reference
|
|
123
125
|
|
|
124
|
-
|
|
|
126
|
+
| Requirement | Recommended mode |
|
|
125
127
|
|------|---------|
|
|
126
|
-
|
|
|
127
|
-
|
|
|
128
|
-
|
|
|
129
|
-
|
|
|
128
|
+
| Data sync (keep target consistent with source) | STANDARD |
|
|
129
|
+
| Incremental ETL pipeline | STANDARD |
|
|
130
|
+
| Audit all insert records | APPEND_ONLY |
|
|
131
|
+
| Historical record retention | APPEND_ONLY |
|
|
130
132
|
|
|
131
|
-
##
|
|
133
|
+
## Performance Optimization Tips
|
|
132
134
|
|
|
133
|
-
-
|
|
134
|
-
-
|
|
135
|
-
-
|
|
136
|
-
-
|
|
137
|
-
-
|
|
135
|
+
- Select only necessary columns; avoid `SELECT *`
|
|
136
|
+
- Consume the Stream regularly to prevent data accumulation
|
|
137
|
+
- High-change-rate tables: consume more frequently; low-change-rate tables: reduce frequency
|
|
138
|
+
- Large Streams can be split by primary key range for parallel processing
|
|
139
|
+
- Set an appropriate data retention period on the source table
|
|
138
140
|
|
|
139
|
-
##
|
|
141
|
+
## Examples
|
|
140
142
|
|
|
141
|
-
###
|
|
143
|
+
### Example 1: Real-time Order Table Sync
|
|
142
144
|
```sql
|
|
143
|
-
-- 1.
|
|
145
|
+
-- 1. Enable change tracking on source table
|
|
144
146
|
ALTER TABLE orders SET PROPERTIES ('change_tracking' = 'true');
|
|
145
147
|
|
|
146
|
-
-- 2.
|
|
148
|
+
-- 2. Create Table Stream
|
|
147
149
|
CREATE TABLE STREAM orders_stream ON TABLE orders
|
|
148
150
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD', 'SHOW_INITIAL_ROWS' = 'FALSE');
|
|
149
151
|
|
|
150
|
-
-- 3.
|
|
152
|
+
-- 3. Create target table (compatible structure with source)
|
|
151
153
|
CREATE TABLE orders_sync (order_id INT, status STRING, amount DOUBLE);
|
|
152
154
|
|
|
153
|
-
-- 4.
|
|
155
|
+
-- 4. Preview Stream data (does not advance offset)
|
|
154
156
|
SELECT *, __commit_version, __commit_timestamp FROM orders_stream;
|
|
155
157
|
|
|
156
|
-
-- 5.
|
|
158
|
+
-- 5. Consume Stream data (advances offset)
|
|
157
159
|
MERGE INTO orders_sync t
|
|
158
160
|
USING (SELECT * FROM orders_stream WHERE __change_type != 'UPDATE_BEFORE') s
|
|
159
161
|
ON t.order_id = s.order_id
|
|
@@ -161,46 +163,46 @@ WHEN MATCHED AND s.__change_type IN ('INSERT', 'UPDATE_AFTER') THEN UPDATE SET t
|
|
|
161
163
|
WHEN MATCHED AND s.__change_type = 'DELETE' THEN DELETE
|
|
162
164
|
WHEN NOT MATCHED AND s.__change_type = 'INSERT' THEN INSERT (order_id, status, amount) VALUES (s.order_id, s.status, s.amount);
|
|
163
165
|
|
|
164
|
-
-- 6.
|
|
166
|
+
-- 6. Verify consumption is complete
|
|
165
167
|
SELECT COUNT(*) FROM orders_stream;
|
|
166
168
|
```
|
|
167
169
|
|
|
168
|
-
###
|
|
170
|
+
### Example 2: User Behavior Audit (Retain Full Insert History)
|
|
169
171
|
```sql
|
|
170
|
-
-- 1.
|
|
172
|
+
-- 1. Enable change tracking on source table
|
|
171
173
|
ALTER TABLE user_actions SET PROPERTIES ('change_tracking' = 'true');
|
|
172
174
|
|
|
173
|
-
-- 2.
|
|
175
|
+
-- 2. Create Table Stream (APPEND_ONLY mode)
|
|
174
176
|
CREATE TABLE STREAM user_actions_audit_stream ON TABLE user_actions
|
|
175
177
|
WITH PROPERTIES ('TABLE_STREAM_MODE' = 'APPEND_ONLY', 'SHOW_INITIAL_ROWS' = 'TRUE');
|
|
176
178
|
|
|
177
|
-
-- 3.
|
|
179
|
+
-- 3. Preview Stream data
|
|
178
180
|
SELECT *, __commit_version, __commit_timestamp FROM user_actions_audit_stream;
|
|
179
181
|
|
|
180
|
-
-- 4.
|
|
182
|
+
-- 4. Consume Stream data (INSERT INTO advances offset)
|
|
181
183
|
INSERT INTO user_actions_audit
|
|
182
184
|
SELECT *, __commit_version AS audit_version, __commit_timestamp AS audit_time
|
|
183
185
|
FROM user_actions_audit_stream;
|
|
184
186
|
```
|
|
185
187
|
|
|
186
|
-
##
|
|
188
|
+
## Troubleshooting
|
|
187
189
|
|
|
188
|
-
Stream
|
|
189
|
-
|
|
190
|
-
|
|
190
|
+
Stream not capturing changes:
|
|
191
|
+
Cause: `change_tracking` not enabled on the source table
|
|
192
|
+
Solution: Execute `ALTER TABLE <table> SET PROPERTIES ('change_tracking' = 'true')`; confirm that DML was executed after the Stream was created
|
|
191
193
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
194
|
+
Cannot distinguish change types:
|
|
195
|
+
Cause: `__change_type` not filtered in MERGE/INSERT, causing `UPDATE_BEFORE` old values to be written to the target table
|
|
196
|
+
Solution: Filter `__change_type IN ('UPDATE_AFTER', 'DELETE')` in MERGE; ignore `UPDATE_BEFORE` records
|
|
195
197
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
198
|
+
Offset not advancing after consumption:
|
|
199
|
+
Cause: Only SELECT was used; no DML was executed
|
|
200
|
+
Solution: Data must be consumed via DML operations such as INSERT INTO / MERGE INTO / UPDATE
|
|
199
201
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
202
|
+
Duplicate data in target table from repeated consumption:
|
|
203
|
+
Cause: Using INSERT INTO instead of MERGE, or non-idempotent consumption logic
|
|
204
|
+
Solution: Switch to MERGE statements; record the last consumed `__commit_version` and `__commit_timestamp` for checkpoint recovery
|
|
203
205
|
|
|
204
|
-
COMMENT
|
|
205
|
-
|
|
206
|
-
|
|
206
|
+
COMMENT syntax error:
|
|
207
|
+
Cause: Used `COMMENT = '...'` (with equals sign) instead of `COMMENT '...'`
|
|
208
|
+
Solution: Correct syntax is `COMMENT 'description'` — no equals sign
|