@clickzetta/cz-cli-darwin-x64 0.3.87-dev.20260528223948 → 0.3.88
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-dynamic-table/SKILL.md +169 -169
- package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +126 -126
- package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +25 -25
- package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +48 -48
- package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +51 -51
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +59 -59
- package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +8 -7
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +99 -99
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +188 -188
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +117 -117
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +29 -29
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +80 -79
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/SKILL.md +15 -15
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-column-validation-rules.md +61 -61
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-conversion-rules.md +100 -100
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-placeholder-rules.md +64 -64
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-refresh-rules.md +32 -32
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-self-reference-rules.md +21 -21
- package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-workflow.md +71 -71
- package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +203 -202
- package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +62 -62
- package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +34 -34
- package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +61 -61
- package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +41 -41
- package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +103 -101
- package/package.json +1 -1
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
# Dynamic Table
|
|
1
|
+
# Dynamic Table SQL Reference
|
|
2
2
|
|
|
3
|
-
> **⚠️ ClickZetta
|
|
4
|
-
> -
|
|
5
|
-
> -
|
|
6
|
-
> - `ALTER DYNAMIC TABLE`
|
|
7
|
-
> -
|
|
8
|
-
> -
|
|
9
|
-
> -
|
|
3
|
+
> **⚠️ ClickZetta-specific syntax**
|
|
4
|
+
> - Refresh schedule syntax: `REFRESH INTERVAL 5 MINUTE vcluster default` (not `TARGET_LAG`)
|
|
5
|
+
> - Modifying the schedule interval or compute cluster requires `CREATE OR REPLACE`; `ALTER` does not support this
|
|
6
|
+
> - `ALTER DYNAMIC TABLE` only supports: SUSPEND / RESUME / SET COMMENT / RENAME COLUMN / CHANGE COLUMN COMMENT / SET/UNSET PROPERTIES
|
|
7
|
+
> - Drop with `DROP DYNAMIC TABLE` (not `DROP TABLE`)
|
|
8
|
+
> - Restore with `UNDROP TABLE` (not `UNDROP DYNAMIC TABLE`)
|
|
9
|
+
> - Describe with `DESC TABLE name` (does not support `DESC DYNAMIC TABLE name EXTENDED`)
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
Dynamic Tables are the core incremental computation objects in ClickZetta Lakehouse. Defined by a SQL query, they refresh automatically and incrementally without manual scheduling.
|
|
12
12
|
|
|
13
13
|
## CREATE DYNAMIC TABLE
|
|
14
14
|
|
|
@@ -25,15 +25,15 @@ AS
|
|
|
25
25
|
<query>;
|
|
26
26
|
```
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
- `REFRESH INTERVAL <n> MINUTE
|
|
30
|
-
- `vcluster
|
|
31
|
-
- `OR REPLACE
|
|
32
|
-
-
|
|
28
|
+
**Key parameters:**
|
|
29
|
+
- `REFRESH INTERVAL <n> MINUTE`: refresh interval, minimum 1 minute
|
|
30
|
+
- `vcluster`: name of the compute cluster to run refresh jobs (name directly, no equals sign or quotes)
|
|
31
|
+
- `OR REPLACE`: replaces an existing Dynamic Table with the same name (required when modifying SQL logic or scheduling config)
|
|
32
|
+
- Recommended: use a GP-type cluster (e.g., `default`); AP-type clusters do not support small file compaction
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
**Examples:**
|
|
35
35
|
```sql
|
|
36
|
-
--
|
|
36
|
+
-- Basic example: refresh order summary every 5 minutes
|
|
37
37
|
CREATE OR REPLACE DYNAMIC TABLE dw.order_summary
|
|
38
38
|
REFRESH INTERVAL 5 MINUTE vcluster default
|
|
39
39
|
AS
|
|
@@ -45,7 +45,7 @@ SELECT
|
|
|
45
45
|
FROM ods.orders
|
|
46
46
|
GROUP BY 1, 2;
|
|
47
47
|
|
|
48
|
-
--
|
|
48
|
+
-- Modify refresh interval (must use CREATE OR REPLACE)
|
|
49
49
|
CREATE OR REPLACE DYNAMIC TABLE dw.order_summary
|
|
50
50
|
REFRESH INTERVAL 10 MINUTE vcluster default
|
|
51
51
|
AS
|
|
@@ -61,82 +61,82 @@ GROUP BY 1, 2;
|
|
|
61
61
|
## ALTER DYNAMIC TABLE
|
|
62
62
|
|
|
63
63
|
```sql
|
|
64
|
-
--
|
|
64
|
+
-- Suspend refresh
|
|
65
65
|
ALTER DYNAMIC TABLE <name> SUSPEND;
|
|
66
66
|
|
|
67
|
-
--
|
|
67
|
+
-- Resume refresh
|
|
68
68
|
ALTER DYNAMIC TABLE <name> RESUME;
|
|
69
69
|
|
|
70
|
-
--
|
|
70
|
+
-- Modify comment
|
|
71
71
|
ALTER DYNAMIC TABLE <name> SET COMMENT '<comment>';
|
|
72
72
|
|
|
73
|
-
--
|
|
73
|
+
-- Rename column
|
|
74
74
|
ALTER DYNAMIC TABLE <name> RENAME COLUMN <old_col> TO <new_col>;
|
|
75
75
|
|
|
76
|
-
--
|
|
76
|
+
-- Modify column comment (note: use CHANGE COLUMN)
|
|
77
77
|
ALTER DYNAMIC TABLE <name> CHANGE COLUMN <col_name> COMMENT '<comment>';
|
|
78
78
|
|
|
79
|
-
--
|
|
79
|
+
-- Modify properties
|
|
80
80
|
ALTER DYNAMIC TABLE <name> SET PROPERTIES ('key' = 'value');
|
|
81
81
|
ALTER DYNAMIC TABLE <name> UNSET PROPERTIES ('key');
|
|
82
82
|
```
|
|
83
83
|
|
|
84
|
-
>
|
|
84
|
+
> Note: To modify the refresh interval, compute cluster, or SQL query logic, use `CREATE OR REPLACE DYNAMIC TABLE`. ALTER does not support these operations.
|
|
85
85
|
|
|
86
|
-
## REFRESH DYNAMIC TABLE
|
|
86
|
+
## REFRESH DYNAMIC TABLE (Manual Trigger)
|
|
87
87
|
|
|
88
88
|
```sql
|
|
89
|
-
--
|
|
89
|
+
-- Manually trigger a single refresh
|
|
90
90
|
REFRESH DYNAMIC TABLE <name>;
|
|
91
91
|
```
|
|
92
92
|
|
|
93
93
|
## DROP DYNAMIC TABLE
|
|
94
94
|
|
|
95
95
|
```sql
|
|
96
|
-
-- ⚠️
|
|
96
|
+
-- ⚠️ Must use DROP DYNAMIC TABLE, not DROP TABLE
|
|
97
97
|
DROP DYNAMIC TABLE [ IF EXISTS ] <name>;
|
|
98
98
|
|
|
99
|
-
--
|
|
99
|
+
-- Restore a dropped Dynamic Table (⚠️ use UNDROP TABLE, not UNDROP DYNAMIC TABLE)
|
|
100
100
|
UNDROP TABLE <name>;
|
|
101
101
|
```
|
|
102
102
|
|
|
103
103
|
## SHOW / DESC
|
|
104
104
|
|
|
105
105
|
```sql
|
|
106
|
-
--
|
|
106
|
+
-- List all Dynamic Tables in the current schema
|
|
107
107
|
SHOW TABLES WHERE is_dynamic = true;
|
|
108
108
|
|
|
109
|
-
--
|
|
109
|
+
-- List Dynamic Tables in a specific schema
|
|
110
110
|
SHOW TABLES IN <schema_name> WHERE is_dynamic = true;
|
|
111
111
|
|
|
112
|
-
--
|
|
112
|
+
-- View Dynamic Table structure
|
|
113
113
|
DESC TABLE <name>;
|
|
114
114
|
|
|
115
|
-
--
|
|
115
|
+
-- View full CREATE statement
|
|
116
116
|
SHOW CREATE TABLE <name>;
|
|
117
117
|
|
|
118
|
-
--
|
|
118
|
+
-- View refresh history (status, duration, trigger type, incremental row count)
|
|
119
119
|
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = '<dt_name>' LIMIT 20;
|
|
120
120
|
```
|
|
121
121
|
|
|
122
|
-
> ⚠️ **DESC
|
|
122
|
+
> ⚠️ **DESC note**: Use `DESC TABLE name` for Dynamic Tables. `DESC DYNAMIC TABLE name EXTENDED` is not supported (EXTENDED will cause an error).
|
|
123
123
|
|
|
124
|
-
##
|
|
124
|
+
## Notes
|
|
125
125
|
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
-
|
|
129
|
-
-
|
|
130
|
-
-
|
|
131
|
-
-
|
|
132
|
-
-
|
|
126
|
+
- To modify SQL logic, refresh interval, or compute cluster → use `CREATE OR REPLACE`; `ALTER` is not supported for these
|
|
127
|
+
- Minimum refresh interval is 1 minute
|
|
128
|
+
- Drop with `DROP DYNAMIC TABLE` (not `DROP TABLE`)
|
|
129
|
+
- Restore with `UNDROP TABLE` (not `UNDROP DYNAMIC TABLE`)
|
|
130
|
+
- Refresh failures do not affect queryability (returns data from the last successful version)
|
|
131
|
+
- A `CREATE OR REPLACE` that is not a simple add/drop column will trigger a full refresh
|
|
132
|
+
- Recommended: use a GP-type cluster (e.g., `default`); AP-type clusters do not support small file compaction
|
|
133
133
|
|
|
134
|
-
##
|
|
134
|
+
## Parameterized Dynamic Table (SESSION_CONFIGS)
|
|
135
135
|
|
|
136
|
-
|
|
136
|
+
Use the `SESSION_CONFIGS()` function to define parameterized queries, passing partition values at refresh time to control the refresh scope:
|
|
137
137
|
|
|
138
138
|
```sql
|
|
139
|
-
--
|
|
139
|
+
-- Create a parameterized Dynamic Table
|
|
140
140
|
CREATE OR REPLACE DYNAMIC TABLE dwd.orders_partitioned
|
|
141
141
|
REFRESH INTERVAL 30 MINUTE vcluster default
|
|
142
142
|
AS
|
|
@@ -144,42 +144,42 @@ SELECT order_id, user_id, amount, dt
|
|
|
144
144
|
FROM ods.orders
|
|
145
145
|
WHERE dt = SESSION_CONFIGS('target_date', CAST(CURRENT_DATE() AS STRING));
|
|
146
146
|
|
|
147
|
-
--
|
|
147
|
+
-- Manually trigger refresh with parameters
|
|
148
148
|
REFRESH DYNAMIC TABLE dwd.orders_partitioned
|
|
149
149
|
WITH PROPERTIES ('target_date' = '2024-06-15');
|
|
150
150
|
```
|
|
151
151
|
|
|
152
|
-
|
|
152
|
+
Use case: migrating traditional daily full ETL jobs to incremental jobs, replacing scheduling variables with SESSION_CONFIGS.
|
|
153
153
|
|
|
154
|
-
##
|
|
154
|
+
## Dynamic Table DML Operations
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
Dynamic Tables do not support DML by default. You must enable the parameter first (must be set before each DML operation):
|
|
157
157
|
|
|
158
158
|
```sql
|
|
159
|
-
-- ⚠️
|
|
159
|
+
-- ⚠️ Must execute SET in the same session/batch before the DML
|
|
160
160
|
SET cz.sql.dt.allow.dml = true;
|
|
161
161
|
INSERT INTO <name> VALUES (...);
|
|
162
162
|
|
|
163
|
-
--
|
|
163
|
+
-- Delete
|
|
164
164
|
SET cz.sql.dt.allow.dml = true;
|
|
165
165
|
DELETE FROM <name> WHERE ...;
|
|
166
166
|
```
|
|
167
167
|
|
|
168
|
-
> ⚠️ **DML
|
|
169
|
-
> - `SET cz.sql.dt.allow.dml = true`
|
|
170
|
-
> -
|
|
171
|
-
> - UPDATE
|
|
172
|
-
> -
|
|
168
|
+
> ⚠️ **DML notes**:
|
|
169
|
+
> - `SET cz.sql.dt.allow.dml = true` must be in the same execution batch as the DML statement
|
|
170
|
+
> - After a DML operation, the next automatic refresh will trigger a **full refresh** (not incremental), which may take longer
|
|
171
|
+
> - UPDATE may fail due to internal hidden columns (`MV__KEY`); use DELETE + INSERT instead
|
|
172
|
+
> - Use DML only for special cases such as data correction
|
|
173
173
|
|
|
174
|
-
##
|
|
174
|
+
## Reference Documentation
|
|
175
175
|
|
|
176
176
|
- [CREATE DYNAMIC TABLE](https://www.yunqi.tech/documents/create-dynamic-table)
|
|
177
177
|
- [ALTER DYNAMIC TABLE](https://www.yunqi.tech/documents/alter-dynamic-table)
|
|
178
178
|
- [DROP DYNAMIC TABLE](https://www.yunqi.tech/documents/drop-dynamic-table)
|
|
179
179
|
- [SHOW DYNAMIC TABLES](https://www.yunqi.tech/documents/show-dynamic-table)
|
|
180
180
|
- [SHOW DYNAMIC TABLE REFRESH HISTORY](https://www.yunqi.tech/documents/refresh-history)
|
|
181
|
-
- [
|
|
182
|
-
- [
|
|
183
|
-
- [
|
|
184
|
-
- [
|
|
185
|
-
- [
|
|
181
|
+
- [Dynamic Table Overview](https://www.yunqi.tech/documents/dynamic_table_summary)
|
|
182
|
+
- [View Dynamic Table Refresh Mode](https://www.yunqi.tech/documents/dynamic-table-incre)
|
|
183
|
+
- [Migrating Traditional Offline Jobs to Incremental](https://www.yunqi.tech/documents/transformt-dt)
|
|
184
|
+
- [Parameterized Dynamic Table](https://www.yunqi.tech/documents/dynamicTable-parmaters)
|
|
185
|
+
- [Dynamic Table DML Support](https://www.yunqi.tech/documents/dynamicTable-dml)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
# Materialized View
|
|
1
|
+
# Materialized View SQL Reference
|
|
2
2
|
|
|
3
|
-
> **⚠️ ClickZetta
|
|
4
|
-
> -
|
|
5
|
-
> -
|
|
6
|
-
> -
|
|
3
|
+
> **⚠️ ClickZetta-specific syntax**
|
|
4
|
+
> - Scheduled refresh: `REFRESH INTERVAL 10 MINUTE vcluster default` (same syntax as Dynamic Table)
|
|
5
|
+
> - Manual refresh: `REFRESH MATERIALIZED VIEW <name>;`
|
|
6
|
+
> - Modify comments with `ALTER TABLE`, not `ALTER MATERIALIZED VIEW`
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Materialized Views pre-compute and physically store query results, making them ideal for fixed-dimension aggregation acceleration. Unlike Dynamic Tables, Materialized Views support manual or scheduled refresh but do not support incremental refresh.
|
|
9
9
|
|
|
10
10
|
## CREATE MATERIALIZED VIEW
|
|
11
11
|
|
|
@@ -19,15 +19,15 @@ AS
|
|
|
19
19
|
<query>;
|
|
20
20
|
```
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
- `REFRESH INTERVAL 10 MINUTE vcluster default
|
|
24
|
-
-
|
|
25
|
-
- `BUILD DEFERRED
|
|
26
|
-
- `DISABLE QUERY REWRITE
|
|
22
|
+
**Key parameters:**
|
|
23
|
+
- `REFRESH INTERVAL 10 MINUTE vcluster default`: scheduled automatic refresh (same syntax as Dynamic Table)
|
|
24
|
+
- Omitting the REFRESH clause: only manual refresh via `REFRESH MATERIALIZED VIEW <name>;`
|
|
25
|
+
- `BUILD DEFERRED`: deferred build — does not compute results immediately at creation time
|
|
26
|
+
- `DISABLE QUERY REWRITE`: disables query rewrite (MV will not automatically accelerate queries)
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
**Examples:**
|
|
29
29
|
```sql
|
|
30
|
-
--
|
|
30
|
+
-- Materialized View with scheduled auto-refresh (every 10 minutes)
|
|
31
31
|
CREATE MATERIALIZED VIEW mv_dept_stats
|
|
32
32
|
REFRESH INTERVAL 10 MINUTE vcluster default
|
|
33
33
|
AS
|
|
@@ -40,7 +40,7 @@ FROM departments d
|
|
|
40
40
|
JOIN employees e ON d.dept_id = e.dept_id
|
|
41
41
|
GROUP BY d.dept_id, d.dept_name;
|
|
42
42
|
|
|
43
|
-
--
|
|
43
|
+
-- Modify refresh interval (requires CREATE OR REPLACE)
|
|
44
44
|
CREATE OR REPLACE MATERIALIZED VIEW mv_dept_stats
|
|
45
45
|
BUILD DEFERRED
|
|
46
46
|
REFRESH INTERVAL 20 MINUTE vcluster default
|
|
@@ -57,32 +57,32 @@ FROM departments d
|
|
|
57
57
|
JOIN employees e ON d.dept_id = e.dept_id
|
|
58
58
|
GROUP BY d.dept_id, d.dept_name, d.location;
|
|
59
59
|
|
|
60
|
-
--
|
|
60
|
+
-- Manual refresh
|
|
61
61
|
REFRESH MATERIALIZED VIEW mv_dept_stats;
|
|
62
62
|
```
|
|
63
63
|
|
|
64
64
|
## ALTER MATERIALIZED VIEW
|
|
65
65
|
|
|
66
66
|
```sql
|
|
67
|
-
--
|
|
67
|
+
-- Suspend automatic refresh
|
|
68
68
|
ALTER MATERIALIZED VIEW <name> SUSPEND;
|
|
69
69
|
|
|
70
|
-
--
|
|
70
|
+
-- Resume automatic refresh
|
|
71
71
|
ALTER MATERIALIZED VIEW <name> RESUME;
|
|
72
72
|
|
|
73
|
-
--
|
|
73
|
+
-- Modify comment
|
|
74
74
|
ALTER TABLE <mv_name> SET COMMENT '<comment>';
|
|
75
75
|
|
|
76
|
-
--
|
|
76
|
+
-- Modify column comment (Materialized Views use ALTER TABLE syntax)
|
|
77
77
|
ALTER TABLE <mv_name> CHANGE COLUMN <col_name> COMMENT '<comment>';
|
|
78
78
|
```
|
|
79
79
|
|
|
80
|
-
>
|
|
80
|
+
> Note: Use `ALTER TABLE` (not `ALTER MATERIALIZED VIEW`) to modify comments on a Materialized View.
|
|
81
81
|
|
|
82
82
|
## REFRESH MATERIALIZED VIEW
|
|
83
83
|
|
|
84
84
|
```sql
|
|
85
|
-
--
|
|
85
|
+
-- Manually trigger a full refresh
|
|
86
86
|
REFRESH MATERIALIZED VIEW <name>;
|
|
87
87
|
```
|
|
88
88
|
|
|
@@ -95,35 +95,35 @@ DROP MATERIALIZED VIEW [ IF EXISTS ] <name>;
|
|
|
95
95
|
## SHOW / DESC
|
|
96
96
|
|
|
97
97
|
```sql
|
|
98
|
-
--
|
|
98
|
+
-- List all Materialized Views in the current schema
|
|
99
99
|
SHOW TABLES WHERE is_materialized_view = true;
|
|
100
100
|
|
|
101
|
-
--
|
|
101
|
+
-- Filter by name
|
|
102
102
|
SHOW TABLES LIKE 'mv_%' WHERE is_materialized_view = true;
|
|
103
103
|
|
|
104
|
-
--
|
|
104
|
+
-- View Materialized View structure
|
|
105
105
|
DESC MATERIALIZED VIEW <name>;
|
|
106
106
|
DESCRIBE MATERIALIZED VIEW <name> EXTENDED;
|
|
107
107
|
|
|
108
|
-
--
|
|
108
|
+
-- View full CREATE statement
|
|
109
109
|
SHOW CREATE TABLE <name>;
|
|
110
110
|
```
|
|
111
111
|
|
|
112
|
-
##
|
|
112
|
+
## Dynamic Table vs Materialized View — Selection Guide
|
|
113
113
|
|
|
114
|
-
|
|
|
114
|
+
| Scenario | Recommended |
|
|
115
115
|
|---|---|
|
|
116
|
-
|
|
|
117
|
-
|
|
|
118
|
-
|
|
|
119
|
-
|
|
|
116
|
+
| Need second/minute-level automatic incremental refresh | Dynamic Table |
|
|
117
|
+
| Fixed aggregation, manual or low-frequency refresh | Materialized View |
|
|
118
|
+
| Need CDC change detection | Dynamic Table + Table Stream |
|
|
119
|
+
| Accelerate BI queries, real-time data not required | Materialized View |
|
|
120
120
|
|
|
121
|
-
##
|
|
121
|
+
## Reference Documentation
|
|
122
122
|
|
|
123
123
|
- [CREATE MATERIALIZED VIEW](https://www.yunqi.tech/documents/CREATEMATERIALIZEDVIEW)
|
|
124
124
|
- [ALTER MATERIALIZED VIEW](https://www.yunqi.tech/documents/alter-materialzied-view)
|
|
125
125
|
- [REFRESH MATERIALIZED VIEW](https://www.yunqi.tech/documents/REFRESH)
|
|
126
126
|
- [DROP MATERIALIZED VIEW](https://www.yunqi.tech/documents/DROPMATERIALIZEDVIEW)
|
|
127
127
|
- [SHOW MATERIALIZED VIEWS](https://www.yunqi.tech/documents/show-materialized-view)
|
|
128
|
-
- [
|
|
129
|
-
- [
|
|
128
|
+
- [Materialized View Concepts and Use Cases](https://www.yunqi.tech/documents/MATERIALIZEDVIEW)
|
|
129
|
+
- [Materialized View DDL Summary](https://www.yunqi.tech/documents/materialized_ddl)
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
# Pipe SQL
|
|
1
|
+
# Pipe SQL Reference
|
|
2
2
|
|
|
3
|
-
> **⚠️ ClickZetta
|
|
4
|
-
> - Kafka
|
|
5
|
-
> - JSON
|
|
6
|
-
> - Pipe
|
|
7
|
-
> - OSS
|
|
3
|
+
> **⚠️ ClickZetta-specific syntax**
|
|
4
|
+
> - The Kafka read function is `read_kafka(...)`, using **positional parameters** (not named parameters with `=>`)
|
|
5
|
+
> - JSON field extraction uses `parse_json(value::string)['field']::TYPE` syntax
|
|
6
|
+
> - A Pipe starts automatically after creation; no manual RESUME is needed
|
|
7
|
+
> - For OSS Pipes, `PURGE=true` follows immediately after `USING <format>` (e.g., `USING CSV PURGE=true`)
|
|
8
8
|
|
|
9
|
-
Pipe
|
|
9
|
+
Pipe is the continuous data ingestion object in ClickZetta Lakehouse. Defined by SQL, it automatically and continuously imports data from Kafka or object storage (OSS/S3/COS) into a target table without external scheduling.
|
|
10
10
|
|
|
11
|
-
## CREATE PIPE —
|
|
11
|
+
## CREATE PIPE — Ingest from Kafka
|
|
12
12
|
|
|
13
13
|
```sql
|
|
14
14
|
CREATE [ OR REPLACE ] PIPE <pipe_name>
|
|
@@ -21,22 +21,22 @@ AS
|
|
|
21
21
|
COPY INTO <target_table> FROM (
|
|
22
22
|
SELECT <expr> [, ...]
|
|
23
23
|
FROM read_kafka(
|
|
24
|
-
'<bootstrap_servers>', --
|
|
25
|
-
'<topic>', --
|
|
26
|
-
'', --
|
|
27
|
-
'<group_id>', --
|
|
28
|
-
'', '', '', '', --
|
|
29
|
-
'raw', -- key
|
|
30
|
-
'raw', -- value
|
|
24
|
+
'<bootstrap_servers>', -- required: Kafka cluster address
|
|
25
|
+
'<topic>', -- required: topic name
|
|
26
|
+
'', -- reserved (leave empty string)
|
|
27
|
+
'<group_id>', -- required: persistent consumer group ID
|
|
28
|
+
'', '', '', '', -- positional params left empty, managed by Pipe automatically
|
|
29
|
+
'raw', -- key format (only 'raw' supported currently)
|
|
30
|
+
'raw', -- value format (only 'raw' supported currently)
|
|
31
31
|
0, -- max_errors
|
|
32
|
-
MAP(<kafka_config>) -- Kafka
|
|
32
|
+
MAP(<kafka_config>) -- Kafka configuration parameters
|
|
33
33
|
)
|
|
34
34
|
);
|
|
35
35
|
```
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
**Examples:**
|
|
38
38
|
```sql
|
|
39
|
-
--
|
|
39
|
+
-- Continuously ingest JSON data from Kafka
|
|
40
40
|
CREATE OR REPLACE PIPE kafka_orders_pipe
|
|
41
41
|
VIRTUAL_CLUSTER = 'default'
|
|
42
42
|
BATCH_INTERVAL_IN_SECONDS = '60'
|
|
@@ -62,7 +62,7 @@ COPY INTO ods.orders FROM (
|
|
|
62
62
|
)
|
|
63
63
|
);
|
|
64
64
|
|
|
65
|
-
-- SASL
|
|
65
|
+
-- SASL authentication
|
|
66
66
|
CREATE PIPE kafka_secure_pipe
|
|
67
67
|
VIRTUAL_CLUSTER = 'pipe_vc'
|
|
68
68
|
BATCH_INTERVAL_IN_SECONDS = '60'
|
|
@@ -83,12 +83,12 @@ COPY INTO ods.secure_events FROM (
|
|
|
83
83
|
);
|
|
84
84
|
```
|
|
85
85
|
|
|
86
|
-
##
|
|
86
|
+
## Verify Kafka Connection (Before Creating a Pipe)
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
When using `read_kafka` standalone to explore data, you can set `kafka.auto.offset.reset` in the MAP:
|
|
89
89
|
|
|
90
90
|
```sql
|
|
91
|
-
--
|
|
91
|
+
-- Verify connection and data format
|
|
92
92
|
SELECT value::string
|
|
93
93
|
FROM read_kafka(
|
|
94
94
|
'kafka.example.com:9092',
|
|
@@ -102,11 +102,11 @@ FROM read_kafka(
|
|
|
102
102
|
LIMIT 10;
|
|
103
103
|
```
|
|
104
104
|
|
|
105
|
-
> ⚠️
|
|
106
|
-
> -
|
|
107
|
-
> - Pipe
|
|
105
|
+
> ⚠️ **Standalone exploration vs inside a Pipe**:
|
|
106
|
+
> - Standalone exploration: you can set `kafka.auto.offset.reset` to `earliest` in the MAP to read historical data
|
|
107
|
+
> - Inside a Pipe: positional parameters must be left empty; the consumer offset is controlled by the Pipe's `RESET_KAFKA_GROUP_OFFSETS` parameter
|
|
108
108
|
|
|
109
|
-
## CREATE PIPE —
|
|
109
|
+
## CREATE PIPE — Ingest from Object Storage
|
|
110
110
|
|
|
111
111
|
```sql
|
|
112
112
|
CREATE [ OR REPLACE ] PIPE [ IF NOT EXISTS ] <pipe_name>
|
|
@@ -120,17 +120,17 @@ FROM VOLUME <volume_name>
|
|
|
120
120
|
USING <csv | parquet | orc | json> [OPTIONS ('<key>' = '<value>', ...)] PURGE=true;
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
-
|
|
124
|
-
- `VIRTUAL_CLUSTER
|
|
125
|
-
- `INGEST_MODE = 'LIST_PURGE'
|
|
126
|
-
- `INGEST_MODE = 'EVENT_NOTIFICATION'
|
|
127
|
-
- `COMMENT 'text'
|
|
128
|
-
- `PURGE=true
|
|
129
|
-
- PIPE
|
|
123
|
+
**Key parameters:**
|
|
124
|
+
- `VIRTUAL_CLUSTER`: specifies the virtual cluster name (required for OSS Pipes)
|
|
125
|
+
- `INGEST_MODE = 'LIST_PURGE'`: general mode, periodically scans the file list; `PURGE=true` must be set
|
|
126
|
+
- `INGEST_MODE = 'EVENT_NOTIFICATION'`: event notification mode, low latency (Alibaba Cloud OSS + AWS S3 only); `PURGE=true` is not required
|
|
127
|
+
- `COMMENT 'text'`: no equals sign (`COMMENT = 'text'` will cause an error)
|
|
128
|
+
- `PURGE=true`: placed at the end, after OPTIONS: `USING CSV OPTIONS (...) PURGE=true`
|
|
129
|
+
- COPY statements inside a PIPE do not support `files`, `regexp`, or `subdirectory` parameters
|
|
130
130
|
|
|
131
|
-
|
|
131
|
+
**Examples:**
|
|
132
132
|
```sql
|
|
133
|
-
-- LIST_PURGE
|
|
133
|
+
-- LIST_PURGE mode (with OPTIONS)
|
|
134
134
|
CREATE OR REPLACE PIPE oss_events_pipe
|
|
135
135
|
VIRTUAL_CLUSTER = 'default'
|
|
136
136
|
INGEST_MODE = 'LIST_PURGE'
|
|
@@ -140,7 +140,7 @@ COPY INTO ods.events
|
|
|
140
140
|
FROM VOLUME my_oss_volume
|
|
141
141
|
USING PARQUET PURGE=true;
|
|
142
142
|
|
|
143
|
-
-- CSV
|
|
143
|
+
-- CSV format with OPTIONS (OPTIONS before PURGE)
|
|
144
144
|
CREATE PIPE oss_csv_pipe
|
|
145
145
|
VIRTUAL_CLUSTER = 'default'
|
|
146
146
|
INGEST_MODE = 'LIST_PURGE'
|
|
@@ -149,7 +149,7 @@ COPY INTO ods.csv_data
|
|
|
149
149
|
FROM VOLUME my_csv_volume
|
|
150
150
|
USING CSV OPTIONS ('header' = 'true', 'sep' = ',') PURGE=true;
|
|
151
151
|
|
|
152
|
-
-- EVENT_NOTIFICATION
|
|
152
|
+
-- EVENT_NOTIFICATION mode (PURGE not required)
|
|
153
153
|
CREATE PIPE oss_event_pipe
|
|
154
154
|
VIRTUAL_CLUSTER = 'default'
|
|
155
155
|
INGEST_MODE = 'EVENT_NOTIFICATION'
|
|
@@ -160,33 +160,33 @@ FROM VOLUME my_oss_event_volume
|
|
|
160
160
|
USING PARQUET;
|
|
161
161
|
```
|
|
162
162
|
|
|
163
|
-
##
|
|
163
|
+
## Start / Stop a Pipe
|
|
164
164
|
|
|
165
165
|
```sql
|
|
166
|
-
--
|
|
166
|
+
-- Pause Pipe
|
|
167
167
|
ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = true;
|
|
168
168
|
|
|
169
|
-
--
|
|
169
|
+
-- Resume Pipe
|
|
170
170
|
ALTER PIPE <pipe_name> SET PIPE_EXECUTION_PAUSED = false;
|
|
171
171
|
```
|
|
172
172
|
|
|
173
|
-
##
|
|
173
|
+
## Modify Pipe Properties
|
|
174
174
|
|
|
175
175
|
```sql
|
|
176
|
-
--
|
|
176
|
+
-- Only one property can be modified at a time
|
|
177
177
|
ALTER PIPE <pipe_name> SET VIRTUAL_CLUSTER = 'new_vc';
|
|
178
178
|
ALTER PIPE <pipe_name> SET COPY_JOB_HINT = '{"cz.sql.split.kafka.strategy":"size","cz.mapper.kafka.message.size":"200000"}';
|
|
179
179
|
```
|
|
180
180
|
|
|
181
|
-
> ⚠️ **ALTER PIPE
|
|
181
|
+
> ⚠️ **Supported ALTER PIPE properties**:
|
|
182
182
|
> - ✅ `PIPE_EXECUTION_PAUSED`
|
|
183
183
|
> - ✅ `VIRTUAL_CLUSTER`
|
|
184
184
|
> - ✅ `COPY_JOB_HINT`
|
|
185
|
-
> - ❌ `BATCH_INTERVAL_IN_SECONDS
|
|
186
|
-
> - ❌ `BATCH_SIZE_PER_KAFKA_PARTITION
|
|
185
|
+
> - ❌ `BATCH_INTERVAL_IN_SECONDS` (not supported; must drop and recreate)
|
|
186
|
+
> - ❌ `BATCH_SIZE_PER_KAFKA_PARTITION` (not supported; must drop and recreate)
|
|
187
187
|
>
|
|
188
|
-
>
|
|
189
|
-
> `COPY_JOB_HINT`
|
|
188
|
+
> Modifying the COPY/INSERT statement logic is not supported; drop the Pipe and recreate it.
|
|
189
|
+
> Modifying `COPY_JOB_HINT` overwrites all existing hints; all parameters must be set at once.
|
|
190
190
|
|
|
191
191
|
## DROP PIPE
|
|
192
192
|
|
|
@@ -197,26 +197,26 @@ DROP PIPE [ IF EXISTS ] <pipe_name>;
|
|
|
197
197
|
## SHOW PIPE
|
|
198
198
|
|
|
199
199
|
```sql
|
|
200
|
-
--
|
|
200
|
+
-- List all Pipes in the current schema
|
|
201
201
|
SHOW PIPES;
|
|
202
202
|
|
|
203
|
-
--
|
|
203
|
+
-- View Pipe details (status, latency, definition)
|
|
204
204
|
DESC PIPE <pipe_name>;
|
|
205
205
|
DESC PIPE EXTENDED <pipe_name>;
|
|
206
206
|
```
|
|
207
207
|
|
|
208
|
-
##
|
|
208
|
+
## Notes
|
|
209
209
|
|
|
210
|
-
- Pipe
|
|
211
|
-
- Kafka
|
|
212
|
-
-
|
|
213
|
-
-
|
|
214
|
-
- Kafka
|
|
210
|
+
- A Pipe starts automatically after creation; no manual RESUME is needed
|
|
211
|
+
- Kafka Pipes use a consumer group to manage offsets; keeping the same group_id when recreating a Pipe allows resuming from the last offset
|
|
212
|
+
- Object storage Pipes detect new files via file list scanning or event notifications; `load_history` deduplication records are retained for 7 days
|
|
213
|
+
- Pipes do not support modifying the AS clause; drop and recreate (not `CREATE OR REPLACE`)
|
|
214
|
+
- Kafka Pipes only support PLAINTEXT and SASL_PLAINTEXT security protocols; SSL is not supported
|
|
215
215
|
|
|
216
|
-
##
|
|
216
|
+
## Reference Documentation
|
|
217
217
|
|
|
218
|
-
- [Pipe
|
|
219
|
-
- [
|
|
220
|
-
- [
|
|
221
|
-
- [
|
|
222
|
-
- [
|
|
218
|
+
- [Pipe Overview](https://www.yunqi.tech/documents/pipe-summary)
|
|
219
|
+
- [Continuous Ingestion with read_kafka](https://www.yunqi.tech/documents/pipe-kafka)
|
|
220
|
+
- [Continuous Ingestion with Kafka External Table Stream](https://www.yunqi.tech/documents/pipe-kafka-table-stream)
|
|
221
|
+
- [Best Practices: Efficient Kafka Ingestion with Pipe](https://www.yunqi.tech/documents/pipe-kafka-bestpractice-1)
|
|
222
|
+
- [Continuous Ingestion from Object Storage with Pipe](https://www.yunqi.tech/documents/pipe-storage-object)
|