@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
- package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
- package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
- package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
- package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
- package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
- package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
- package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
- package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
- package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
- package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
- package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
- package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
- package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
- package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
- package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
- package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
- package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
- package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
- package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
- package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
- package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
- package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
- package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
- package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
- package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
- package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
- package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
- package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
- package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
- package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
- package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
- package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
- package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
- package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
- package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
- package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
- package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
- package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
- package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
- package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
- package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
- package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
- package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
- package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
- package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
- package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
- package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
- package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
- package/package.json +1 -1
- package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
- package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
- package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
- package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
- package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
- package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
- package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
- package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
- package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
- package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
- /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# DQL
|
|
1
|
+
# DQL Complete Syntax Reference
|
|
2
2
|
|
|
3
|
-
>
|
|
3
|
+
> Based on ClickZetta Lakehouse product documentation, with Snowflake / Spark SQL difference annotations
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
## SELECT
|
|
7
|
+
## SELECT Basic Syntax
|
|
8
8
|
|
|
9
9
|
```sql
|
|
10
10
|
[WITH cte_name AS (SELECT ...) [, ...]]
|
|
@@ -23,12 +23,12 @@ FROM table_reference
|
|
|
23
23
|
|
|
24
24
|
---
|
|
25
25
|
|
|
26
|
-
## SELECT
|
|
26
|
+
## SELECT Extensions
|
|
27
27
|
|
|
28
|
-
### EXCEPT
|
|
28
|
+
### EXCEPT (Exclude Columns)
|
|
29
29
|
|
|
30
30
|
```sql
|
|
31
|
-
--
|
|
31
|
+
-- Exclude specified columns (ClickZetta-specific, Snowflake/Spark also support)
|
|
32
32
|
SELECT * EXCEPT(password, secret_key) FROM users;
|
|
33
33
|
SELECT * EXCEPT(meta, tags) FROM orders;
|
|
34
34
|
```
|
|
@@ -37,23 +37,23 @@ SELECT * EXCEPT(meta, tags) FROM orders;
|
|
|
37
37
|
|
|
38
38
|
```sql
|
|
39
39
|
SELECT DISTINCT customer_id FROM orders;
|
|
40
|
-
SELECT ALL customer_id FROM orders; --
|
|
40
|
+
SELECT ALL customer_id FROM orders; -- default, keeps duplicates
|
|
41
41
|
```
|
|
42
42
|
|
|
43
43
|
### LIMIT / OFFSET
|
|
44
44
|
|
|
45
45
|
```sql
|
|
46
46
|
SELECT * FROM orders LIMIT 100;
|
|
47
|
-
SELECT * FROM orders LIMIT 100 OFFSET 200; --
|
|
47
|
+
SELECT * FROM orders LIMIT 100 OFFSET 200; -- skip first 200 rows
|
|
48
48
|
|
|
49
|
-
-- ⚠️ ClickZetta
|
|
49
|
+
-- ⚠️ ClickZetta does not support Snowflake's TOP N syntax
|
|
50
50
|
-- Snowflake: SELECT TOP 10 * FROM orders;
|
|
51
51
|
-- ClickZetta: SELECT * FROM orders LIMIT 10;
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
---
|
|
55
55
|
|
|
56
|
-
## FROM
|
|
56
|
+
## FROM Clause
|
|
57
57
|
|
|
58
58
|
### JOIN
|
|
59
59
|
|
|
@@ -72,97 +72,97 @@ SELECT * FROM a CROSS JOIN b;
|
|
|
72
72
|
-- SELF JOIN
|
|
73
73
|
SELECT a.id, b.id FROM orders a JOIN orders b ON a.customer_id = b.customer_id;
|
|
74
74
|
|
|
75
|
-
-- USING
|
|
75
|
+
-- USING syntax
|
|
76
76
|
SELECT * FROM orders JOIN customers USING (customer_id);
|
|
77
77
|
|
|
78
78
|
-- NATURAL JOIN
|
|
79
79
|
SELECT * FROM orders NATURAL JOIN customers;
|
|
80
80
|
|
|
81
|
-
-- SEMI JOIN
|
|
81
|
+
-- SEMI JOIN (implemented with EXISTS or IN)
|
|
82
82
|
SELECT * FROM orders WHERE EXISTS (
|
|
83
83
|
SELECT 1 FROM customers WHERE customers.id = orders.customer_id
|
|
84
84
|
);
|
|
85
85
|
|
|
86
|
-
-- ANTI JOIN
|
|
86
|
+
-- ANTI JOIN (implemented with NOT EXISTS or NOT IN)
|
|
87
87
|
SELECT * FROM orders WHERE NOT EXISTS (
|
|
88
88
|
SELECT 1 FROM customers WHERE customers.id = orders.customer_id
|
|
89
89
|
);
|
|
90
90
|
```
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
- Snowflake
|
|
94
|
-
- Snowflake
|
|
92
|
+
**Differences from Snowflake:**
|
|
93
|
+
- Snowflake supports `ASOF JOIN` (time-series join); ClickZetta does not
|
|
94
|
+
- Snowflake supports `MATCH_RECOGNIZE`; ClickZetta does not
|
|
95
95
|
|
|
96
|
-
### LATERAL VIEW
|
|
96
|
+
### LATERAL VIEW (Expand Arrays/MAPs)
|
|
97
97
|
|
|
98
98
|
```sql
|
|
99
|
-
-- EXPLODE
|
|
99
|
+
-- EXPLODE to expand array
|
|
100
100
|
SELECT e.id, s.skill
|
|
101
101
|
FROM employees e
|
|
102
102
|
LATERAL VIEW EXPLODE(e.skills) s AS skill;
|
|
103
103
|
|
|
104
|
-
-- POSEXPLODE
|
|
104
|
+
-- POSEXPLODE with position index
|
|
105
105
|
SELECT e.id, ps.pos, ps.skill
|
|
106
106
|
FROM employees e
|
|
107
107
|
LATERAL VIEW POSEXPLODE(e.skills) ps AS pos, skill;
|
|
108
108
|
|
|
109
|
-
-- OUTER
|
|
109
|
+
-- OUTER (preserves rows even for empty arrays)
|
|
110
110
|
SELECT e.id, s.skill
|
|
111
111
|
FROM employees e
|
|
112
112
|
LATERAL VIEW OUTER EXPLODE(e.skills) s AS skill;
|
|
113
113
|
|
|
114
|
-
--
|
|
114
|
+
-- Expand MAP
|
|
115
115
|
SELECT id, k, v
|
|
116
116
|
FROM t
|
|
117
117
|
LATERAL VIEW EXPLODE(meta_map) m AS k, v;
|
|
118
118
|
```
|
|
119
119
|
|
|
120
|
-
|
|
121
|
-
- Snowflake
|
|
122
|
-
- Snowflake `f.value::STRING
|
|
120
|
+
**Differences from Snowflake:**
|
|
121
|
+
- Snowflake uses `LATERAL FLATTEN(input => arr)`; ClickZetta uses `LATERAL VIEW EXPLODE(arr)`
|
|
122
|
+
- Snowflake `f.value::STRING`; ClickZetta uses column alias directly
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
-
|
|
124
|
+
**Differences from Spark SQL:**
|
|
125
|
+
- Syntax is identical (ClickZetta is compatible with Hive/Spark style)
|
|
126
126
|
|
|
127
127
|
### TABLESAMPLE
|
|
128
128
|
|
|
129
129
|
```sql
|
|
130
|
-
-- SYSTEM
|
|
130
|
+
-- SYSTEM mode: sample by percentage (file-level)
|
|
131
131
|
SELECT * FROM orders TABLESAMPLE (10 PERCENT);
|
|
132
132
|
|
|
133
|
-
-- ROW
|
|
133
|
+
-- ROW mode: sample by row count
|
|
134
134
|
SELECT * FROM orders TABLESAMPLE (100 ROWS);
|
|
135
135
|
```
|
|
136
136
|
|
|
137
|
-
### SEQUENCE
|
|
137
|
+
### SEQUENCE (Generate Sequences)
|
|
138
138
|
|
|
139
139
|
```sql
|
|
140
|
-
--
|
|
140
|
+
-- Generate integer sequence (returns ARRAY)
|
|
141
141
|
SELECT SEQUENCE(1, 5); -- [1,2,3,4,5]
|
|
142
142
|
SELECT SEQUENCE(0, 10, 2); -- [0,2,4,6,8,10]
|
|
143
143
|
|
|
144
|
-
--
|
|
145
|
-
SELECT EXPLODE(SEQUENCE(1, 5)) AS n; -- 5
|
|
144
|
+
-- Expand to rows (ClickZetta uses EXPLODE(SEQUENCE(...)), no GENERATE_SERIES)
|
|
145
|
+
SELECT EXPLODE(SEQUENCE(1, 5)) AS n; -- 5 rows: 1,2,3,4,5
|
|
146
146
|
```
|
|
147
147
|
|
|
148
|
-
### EXPLODE
|
|
148
|
+
### EXPLODE Directly in SELECT
|
|
149
149
|
|
|
150
150
|
```sql
|
|
151
|
-
-- Spark
|
|
151
|
+
-- Spark style: EXPLODE directly in SELECT
|
|
152
152
|
SELECT EXPLODE(ARRAY(1, 2, 3)) AS val;
|
|
153
153
|
SELECT POSEXPLODE(ARRAY('a', 'b', 'c')) AS (pos, val);
|
|
154
154
|
|
|
155
|
-
--
|
|
155
|
+
-- Equivalent LATERAL VIEW syntax
|
|
156
156
|
SELECT val FROM (SELECT ARRAY(1,2,3) AS arr) t
|
|
157
157
|
LATERAL VIEW EXPLODE(arr) lv AS val;
|
|
158
158
|
```
|
|
159
159
|
|
|
160
160
|
|
|
161
161
|
|
|
162
|
-
## WHERE
|
|
162
|
+
## WHERE Clause
|
|
163
163
|
|
|
164
164
|
```sql
|
|
165
|
-
--
|
|
165
|
+
-- Basic conditions
|
|
166
166
|
WHERE amount > 100 AND status = 'completed'
|
|
167
167
|
WHERE status IN ('pending', 'processing')
|
|
168
168
|
WHERE status NOT IN ('cancelled', 'refunded')
|
|
@@ -172,43 +172,43 @@ WHERE name NOT LIKE '%test%'
|
|
|
172
172
|
WHERE tags IS NULL
|
|
173
173
|
WHERE tags IS NOT NULL
|
|
174
174
|
|
|
175
|
-
--
|
|
175
|
+
-- Regex matching
|
|
176
176
|
WHERE name RLIKE '^[A-Z].*'
|
|
177
|
-
WHERE name REGEXP '^[A-Z].*' --
|
|
177
|
+
WHERE name REGEXP '^[A-Z].*' -- same as RLIKE
|
|
178
178
|
|
|
179
|
-
--
|
|
179
|
+
-- Subquery
|
|
180
180
|
WHERE customer_id IN (SELECT id FROM customers WHERE tier = 'VIP')
|
|
181
181
|
WHERE EXISTS (SELECT 1 FROM orders WHERE orders.customer_id = customers.id)
|
|
182
182
|
```
|
|
183
183
|
|
|
184
|
-
|
|
185
|
-
- Snowflake `ILIKE
|
|
186
|
-
- Snowflake `RLIKE` → ClickZetta
|
|
184
|
+
**Differences from Snowflake:**
|
|
185
|
+
- Snowflake `ILIKE` (case-insensitive LIKE) → ClickZetta `ILIKE` ✅ also supported
|
|
186
|
+
- Snowflake `RLIKE` → ClickZetta also supports `RLIKE` / `REGEXP`
|
|
187
187
|
|
|
188
188
|
---
|
|
189
189
|
|
|
190
|
-
## GROUP BY
|
|
190
|
+
## GROUP BY Extensions
|
|
191
191
|
|
|
192
192
|
```sql
|
|
193
|
-
--
|
|
193
|
+
-- Basic grouping
|
|
194
194
|
SELECT region, SUM(amount) FROM orders GROUP BY region;
|
|
195
|
-
SELECT region, SUM(amount) FROM orders GROUP BY 1; --
|
|
195
|
+
SELECT region, SUM(amount) FROM orders GROUP BY 1; -- by position
|
|
196
196
|
|
|
197
|
-
-- GROUP BY ALL
|
|
197
|
+
-- GROUP BY ALL (auto-infer all non-aggregate columns)
|
|
198
198
|
SELECT year, month, region, SUM(amount) FROM orders GROUP BY ALL;
|
|
199
199
|
|
|
200
|
-
-- GROUPING SETS
|
|
200
|
+
-- GROUPING SETS (multi-dimensional grouping)
|
|
201
201
|
SELECT region, product, SUM(sales)
|
|
202
202
|
FROM orders
|
|
203
203
|
GROUP BY GROUPING SETS ((region, product), (region), (product), ());
|
|
204
204
|
|
|
205
|
-
-- ROLLUP
|
|
205
|
+
-- ROLLUP (hierarchical subtotals)
|
|
206
206
|
SELECT region, city, SUM(amount)
|
|
207
207
|
FROM orders
|
|
208
208
|
GROUP BY ROLLUP (region, city);
|
|
209
|
-
--
|
|
209
|
+
-- equivalent to GROUPING SETS ((region, city), (region), ())
|
|
210
210
|
|
|
211
|
-
-- CUBE
|
|
211
|
+
-- CUBE (all-combination subtotals)
|
|
212
212
|
SELECT region, product, channel, SUM(amount)
|
|
213
213
|
FROM orders
|
|
214
214
|
GROUP BY CUBE (region, product, channel);
|
|
@@ -220,9 +220,9 @@ GROUP BY customer_id
|
|
|
220
220
|
HAVING total > 10000;
|
|
221
221
|
```
|
|
222
222
|
|
|
223
|
-
|
|
224
|
-
- `GROUP BY ALL`
|
|
225
|
-
- `GROUPING SETS / ROLLUP / CUBE`
|
|
223
|
+
**Differences from Snowflake:**
|
|
224
|
+
- `GROUP BY ALL` both support
|
|
225
|
+
- `GROUPING SETS / ROLLUP / CUBE` both support
|
|
226
226
|
|
|
227
227
|
---
|
|
228
228
|
|
|
@@ -232,15 +232,15 @@ HAVING total > 10000;
|
|
|
232
232
|
SELECT * FROM orders ORDER BY amount DESC;
|
|
233
233
|
SELECT * FROM orders ORDER BY amount DESC NULLS LAST;
|
|
234
234
|
SELECT * FROM orders ORDER BY amount ASC NULLS FIRST;
|
|
235
|
-
SELECT * FROM orders ORDER BY 1 DESC, 2 ASC; --
|
|
235
|
+
SELECT * FROM orders ORDER BY 1 DESC, 2 ASC; -- by position
|
|
236
236
|
```
|
|
237
237
|
|
|
238
238
|
---
|
|
239
239
|
|
|
240
|
-
## CTE
|
|
240
|
+
## CTE (Common Table Expressions)
|
|
241
241
|
|
|
242
242
|
```sql
|
|
243
|
-
--
|
|
243
|
+
-- Basic CTE
|
|
244
244
|
WITH
|
|
245
245
|
monthly AS (
|
|
246
246
|
SELECT DATE_TRUNC('month', created_at) AS month, SUM(amount) AS total
|
|
@@ -251,8 +251,8 @@ WITH
|
|
|
251
251
|
)
|
|
252
252
|
SELECT * FROM ranked WHERE rnk <= 5;
|
|
253
253
|
|
|
254
|
-
-- ⚠️
|
|
255
|
-
-- Snowflake/Databricks/Spark SQL
|
|
254
|
+
-- ⚠️ Recursive CTE (ClickZetta does NOT support)
|
|
255
|
+
-- Snowflake/Databricks/Spark SQL support:
|
|
256
256
|
WITH RECURSIVE org_tree AS (
|
|
257
257
|
SELECT id, name, parent_id, 0 AS level
|
|
258
258
|
FROM employees WHERE parent_id IS NULL
|
|
@@ -262,8 +262,8 @@ WITH RECURSIVE org_tree AS (
|
|
|
262
262
|
)
|
|
263
263
|
SELECT * FROM org_tree ORDER BY level, id;
|
|
264
264
|
|
|
265
|
-
-- ClickZetta
|
|
266
|
-
--
|
|
265
|
+
-- ClickZetta alternative: use Python/ZettaPark for iteration
|
|
266
|
+
-- Or use multi-level CTEs to simulate limited-depth recursion
|
|
267
267
|
WITH
|
|
268
268
|
level0 AS (SELECT id, name, parent_id, 0 AS level FROM employees WHERE parent_id IS NULL),
|
|
269
269
|
level1 AS (SELECT e.id, e.name, e.parent_id, 1 AS level FROM employees e JOIN level0 t ON e.parent_id = t.id),
|
|
@@ -271,24 +271,24 @@ WITH
|
|
|
271
271
|
SELECT * FROM level0 UNION ALL SELECT * FROM level1 UNION ALL SELECT * FROM level2;
|
|
272
272
|
```
|
|
273
273
|
|
|
274
|
-
|
|
275
|
-
- Snowflake
|
|
276
|
-
- ClickZetta
|
|
277
|
-
-
|
|
274
|
+
**Differences from Snowflake:**
|
|
275
|
+
- Snowflake supports `WITH RECURSIVE`; ClickZetta ❌ does not support recursive CTE
|
|
276
|
+
- ClickZetta only supports non-recursive CTE (regular WITH clause)
|
|
277
|
+
- For recursive scenarios, use Python/ZettaPark iteration, or multi-level CTEs to simulate limited depth
|
|
278
278
|
|
|
279
279
|
---
|
|
280
280
|
|
|
281
|
-
##
|
|
281
|
+
## Window Functions
|
|
282
282
|
|
|
283
283
|
```sql
|
|
284
|
-
--
|
|
284
|
+
-- Basic syntax
|
|
285
285
|
function_name() OVER (
|
|
286
286
|
[PARTITION BY col1, col2]
|
|
287
287
|
[ORDER BY col3 [ASC|DESC]]
|
|
288
288
|
[ROWS|RANGE BETWEEN start AND end]
|
|
289
289
|
)
|
|
290
290
|
|
|
291
|
-
--
|
|
291
|
+
-- Ranking functions
|
|
292
292
|
ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC)
|
|
293
293
|
RANK() OVER (ORDER BY score DESC)
|
|
294
294
|
DENSE_RANK() OVER (ORDER BY score DESC)
|
|
@@ -296,34 +296,34 @@ NTILE(4) OVER (ORDER BY amount)
|
|
|
296
296
|
PERCENT_RANK() OVER (ORDER BY amount)
|
|
297
297
|
CUME_DIST() OVER (ORDER BY amount)
|
|
298
298
|
|
|
299
|
-
--
|
|
299
|
+
-- Aggregate windows
|
|
300
300
|
SUM(amount) OVER (PARTITION BY customer_id)
|
|
301
301
|
AVG(amount) OVER (PARTITION BY dept ORDER BY date
|
|
302
302
|
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)
|
|
303
303
|
COUNT(*) OVER (PARTITION BY region)
|
|
304
304
|
MAX(amount) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
|
|
305
305
|
|
|
306
|
-
--
|
|
307
|
-
LAG(amount, 1, 0) OVER (ORDER BY date) --
|
|
308
|
-
LEAD(amount, 1) OVER (ORDER BY date) --
|
|
306
|
+
-- Analytic functions
|
|
307
|
+
LAG(amount, 1, 0) OVER (ORDER BY date) -- 1 row before, default 0
|
|
308
|
+
LEAD(amount, 1) OVER (ORDER BY date) -- 1 row after
|
|
309
309
|
FIRST_VALUE(amount) OVER (ORDER BY date)
|
|
310
310
|
LAST_VALUE(amount) OVER (ORDER BY date
|
|
311
311
|
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
|
|
312
312
|
NTH_VALUE(amount, 3) OVER (ORDER BY date)
|
|
313
313
|
|
|
314
314
|
-- Window Frame
|
|
315
|
-
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW --
|
|
316
|
-
ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING --
|
|
317
|
-
RANGE BETWEEN INTERVAL 7 DAY PRECEDING AND CURRENT ROW -- 7
|
|
318
|
-
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING --
|
|
315
|
+
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -- from start to current row
|
|
316
|
+
ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING -- 3 rows before and after
|
|
317
|
+
RANGE BETWEEN INTERVAL 7 DAY PRECEDING AND CURRENT ROW -- within 7 days
|
|
318
|
+
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -- current row to end
|
|
319
319
|
```
|
|
320
320
|
|
|
321
|
-
|
|
322
|
-
- ClickZetta
|
|
321
|
+
**Differences from Snowflake:**
|
|
322
|
+
- ClickZetta also supports `QUALIFY` to directly filter window function results:
|
|
323
323
|
```sql
|
|
324
|
-
--
|
|
324
|
+
-- Both support
|
|
325
325
|
SELECT * FROM orders QUALIFY ROW_NUMBER() OVER (PARTITION BY cust ORDER BY dt DESC) = 1;
|
|
326
|
-
--
|
|
326
|
+
-- Subquery approach also works
|
|
327
327
|
SELECT * FROM (
|
|
328
328
|
SELECT *, ROW_NUMBER() OVER (PARTITION BY cust ORDER BY dt DESC) AS rn FROM orders
|
|
329
329
|
) t WHERE rn = 1;
|
|
@@ -331,24 +331,24 @@ ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -- 当前行到末尾
|
|
|
331
331
|
|
|
332
332
|
---
|
|
333
333
|
|
|
334
|
-
##
|
|
334
|
+
## Subqueries
|
|
335
335
|
|
|
336
336
|
```sql
|
|
337
|
-
--
|
|
337
|
+
-- Scalar subquery
|
|
338
338
|
SELECT id, (SELECT MAX(amount) FROM orders) AS max_amount FROM orders;
|
|
339
339
|
|
|
340
|
-
-- IN
|
|
340
|
+
-- IN subquery
|
|
341
341
|
SELECT * FROM orders WHERE customer_id IN (SELECT id FROM customers WHERE tier = 'VIP');
|
|
342
342
|
|
|
343
|
-
-- EXISTS
|
|
343
|
+
-- EXISTS subquery
|
|
344
344
|
SELECT * FROM customers c
|
|
345
345
|
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.customer_id = c.id);
|
|
346
346
|
|
|
347
|
-
--
|
|
347
|
+
-- Correlated subquery
|
|
348
348
|
SELECT * FROM orders o
|
|
349
349
|
WHERE amount > (SELECT AVG(amount) FROM orders WHERE customer_id = o.customer_id);
|
|
350
350
|
|
|
351
|
-
-- FROM
|
|
351
|
+
-- FROM subquery (derived table)
|
|
352
352
|
SELECT t.region, t.total FROM (
|
|
353
353
|
SELECT region, SUM(amount) AS total FROM orders GROUP BY region
|
|
354
354
|
) t WHERE t.total > 100000;
|
|
@@ -356,75 +356,75 @@ SELECT t.region, t.total FROM (
|
|
|
356
356
|
|
|
357
357
|
---
|
|
358
358
|
|
|
359
|
-
## JSON
|
|
359
|
+
## JSON Queries
|
|
360
360
|
|
|
361
361
|
```sql
|
|
362
|
-
--
|
|
362
|
+
-- Access JSON fields (use [] instead of Snowflake's :)
|
|
363
363
|
SELECT data['address']['city'] AS city FROM users;
|
|
364
364
|
SELECT data['phoneNumbers'][0]['number'] AS phone FROM users;
|
|
365
365
|
SELECT data['scores'][2] AS third_score FROM users;
|
|
366
366
|
|
|
367
|
-
--
|
|
367
|
+
-- Build JSON
|
|
368
368
|
SELECT PARSE_JSON('{"name":"Alice","age":30}') AS info;
|
|
369
369
|
SELECT TO_JSON(STRUCT(name, age)) AS json_str FROM users;
|
|
370
370
|
|
|
371
|
-
--
|
|
371
|
+
-- Type conversion
|
|
372
372
|
SELECT CAST(data['age'] AS INT) AS age FROM users;
|
|
373
|
-
SELECT data['amount']::DOUBLE AS amount FROM orders; -- ::
|
|
373
|
+
SELECT data['amount']::DOUBLE AS amount FROM orders; -- :: syntax also supported
|
|
374
374
|
|
|
375
|
-
-- JSON
|
|
375
|
+
-- JSON aggregation
|
|
376
376
|
SELECT customer_id, TO_JSON(COLLECT_LIST(STRUCT(id, amount))) AS orders_json
|
|
377
377
|
FROM orders GROUP BY customer_id;
|
|
378
378
|
```
|
|
379
379
|
|
|
380
|
-
|
|
381
|
-
- Snowflake `data:key`
|
|
382
|
-
- Snowflake `data:key::STRING` → ClickZetta `CAST(data['key'] AS STRING)`
|
|
383
|
-
- Snowflake `OBJECT_CONSTRUCT(k, v)` → ClickZetta `MAP_AGG(k, v)`
|
|
384
|
-
- Snowflake `PARSE_JSON` → ClickZetta
|
|
380
|
+
**Differences from Snowflake:**
|
|
381
|
+
- Snowflake `data:key` colon syntax → ClickZetta `data['key']` bracket syntax
|
|
382
|
+
- Snowflake `data:key::STRING` → ClickZetta `CAST(data['key'] AS STRING)` or `data['key']::STRING`
|
|
383
|
+
- Snowflake `OBJECT_CONSTRUCT(k, v)` → ClickZetta `MAP_AGG(k, v)` or `STRUCT(...)` + `TO_JSON`
|
|
384
|
+
- Snowflake `PARSE_JSON` → ClickZetta same
|
|
385
385
|
|
|
386
386
|
---
|
|
387
387
|
|
|
388
|
-
## STRUCT / ARRAY / MAP
|
|
388
|
+
## STRUCT / ARRAY / MAP Operations
|
|
389
389
|
|
|
390
390
|
```sql
|
|
391
|
-
--
|
|
392
|
-
SELECT STRUCT(name, age, email) AS user_info FROM users; -- ✅
|
|
393
|
-
SELECT named_struct('name', name, 'age', age, 'email', email) AS user_info FROM users; -- ✅
|
|
394
|
-
-- ⚠️ SELECT STRUCT(name AS n, age AS a)
|
|
391
|
+
-- Build STRUCT
|
|
392
|
+
SELECT STRUCT(name, age, email) AS user_info FROM users; -- ✅ supported (no field names, defaults to col1, col2...)
|
|
393
|
+
SELECT named_struct('name', name, 'age', age, 'email', email) AS user_info FROM users; -- ✅ supported (with field names)
|
|
394
|
+
-- ⚠️ SELECT STRUCT(name AS n, age AS a) does not support AS syntax (Snowflake/Spark support it)
|
|
395
395
|
|
|
396
|
-
--
|
|
396
|
+
-- Build ARRAY / MAP
|
|
397
397
|
SELECT ARRAY(1, 2, 3) AS nums;
|
|
398
398
|
SELECT MAP('k1', 1, 'k2', 2) AS m;
|
|
399
399
|
|
|
400
|
-
--
|
|
401
|
-
SELECT address.city FROM users; -- STRUCT
|
|
402
|
-
SELECT skills[0] FROM employees; -- ARRAY
|
|
403
|
-
SELECT meta_map['key'] FROM t; -- MAP
|
|
400
|
+
-- Access
|
|
401
|
+
SELECT address.city FROM users; -- STRUCT field
|
|
402
|
+
SELECT skills[0] FROM employees; -- ARRAY index (0-based)
|
|
403
|
+
SELECT meta_map['key'] FROM t; -- MAP access
|
|
404
404
|
|
|
405
|
-
--
|
|
405
|
+
-- Array functions
|
|
406
406
|
SELECT SIZE(skills) AS cnt FROM employees;
|
|
407
407
|
SELECT ARRAY_CONTAINS(skills, 'Python') FROM employees;
|
|
408
408
|
SELECT ARRAY_AGG(order_id) FROM orders GROUP BY customer_id;
|
|
409
|
-
SELECT COLLECT_LIST(order_id) FROM orders GROUP BY customer_id; --
|
|
410
|
-
SELECT COLLECT_SET(status) FROM orders GROUP BY customer_id; --
|
|
409
|
+
SELECT COLLECT_LIST(order_id) FROM orders GROUP BY customer_id; -- same as ARRAY_AGG
|
|
410
|
+
SELECT COLLECT_SET(status) FROM orders GROUP BY customer_id; -- deduplicated
|
|
411
411
|
SELECT SORT_ARRAY(skills) FROM employees;
|
|
412
412
|
SELECT ARRAY_DISTINCT(tags) FROM articles;
|
|
413
413
|
SELECT ARRAY_UNION(a, b) FROM t;
|
|
414
414
|
SELECT ARRAY_INTERSECT(a, b) FROM t;
|
|
415
415
|
SELECT ARRAY_EXCEPT(a, b) FROM t;
|
|
416
|
-
SELECT FLATTEN(nested_array) FROM t; --
|
|
416
|
+
SELECT FLATTEN(nested_array) FROM t; -- flatten nested array
|
|
417
417
|
|
|
418
|
-
--
|
|
418
|
+
-- Higher-order functions
|
|
419
419
|
SELECT TRANSFORM(skills, x -> UPPER(x)) FROM employees;
|
|
420
420
|
SELECT FILTER(scores, x -> x > 90) FROM students;
|
|
421
|
-
-- ⚠️ AGGREGATE(arr, init, (acc,x)->...)
|
|
422
|
-
-- ⚠️ REDUCE(arr, init, (acc,x)->...)
|
|
421
|
+
-- ⚠️ AGGREGATE(arr, init, (acc,x)->...) not supported, use ARRAY_AGG + SUM instead
|
|
422
|
+
-- ⚠️ REDUCE(arr, init, (acc,x)->...) not supported (Spark name)
|
|
423
423
|
SELECT EXISTS(scores, x -> x > 100) FROM students;
|
|
424
424
|
SELECT FORALL(scores, x -> x >= 0) FROM students;
|
|
425
425
|
SELECT ZIP_WITH(a, b, (x, y) -> x + y) FROM t;
|
|
426
426
|
|
|
427
|
-
-- MAP
|
|
427
|
+
-- MAP functions
|
|
428
428
|
SELECT MAP_KEYS(meta) FROM t;
|
|
429
429
|
SELECT MAP_VALUES(meta) FROM t;
|
|
430
430
|
SELECT MAP_ENTRIES(meta) FROM t;
|
|
@@ -433,19 +433,19 @@ SELECT MAP_FILTER(meta, (k, v) -> v > 0) FROM t;
|
|
|
433
433
|
SELECT MAP_TRANSFORM_VALUES(meta, (k, v) -> v * 2) FROM t;
|
|
434
434
|
```
|
|
435
435
|
|
|
436
|
-
|
|
436
|
+
**Differences from Snowflake:**
|
|
437
437
|
- Snowflake `ARRAY_SIZE` → ClickZetta `SIZE`
|
|
438
|
-
- Snowflake `ARRAY_CONTAINS(val, arr)`
|
|
438
|
+
- Snowflake `ARRAY_CONTAINS(val, arr)` parameter order reversed → ClickZetta `ARRAY_CONTAINS(arr, val)`
|
|
439
439
|
- Snowflake `OBJECT_KEYS(obj)` → ClickZetta `MAP_KEYS(map)`
|
|
440
|
-
- Snowflake
|
|
440
|
+
- Snowflake has no higher-order functions (TRANSFORM/FILTER); ClickZetta supports them
|
|
441
441
|
|
|
442
442
|
---
|
|
443
443
|
|
|
444
444
|
## PIVOT / UNPIVOT
|
|
445
445
|
|
|
446
446
|
```sql
|
|
447
|
-
-- ClickZetta
|
|
448
|
-
--
|
|
447
|
+
-- ClickZetta does not support native PIVOT syntax
|
|
448
|
+
-- Use CASE WHEN for row-to-column transformation
|
|
449
449
|
SELECT
|
|
450
450
|
product,
|
|
451
451
|
SUM(CASE WHEN month = 'Jan' THEN amount ELSE 0 END) AS Jan,
|
|
@@ -454,7 +454,7 @@ SELECT
|
|
|
454
454
|
FROM sales
|
|
455
455
|
GROUP BY product;
|
|
456
456
|
|
|
457
|
-
-- UNPIVOT
|
|
457
|
+
-- UNPIVOT implemented with LATERAL VIEW + STACK
|
|
458
458
|
SELECT id, month, amount
|
|
459
459
|
FROM sales
|
|
460
460
|
LATERAL VIEW STACK(3,
|
|
@@ -464,15 +464,15 @@ LATERAL VIEW STACK(3,
|
|
|
464
464
|
) t AS month, amount;
|
|
465
465
|
```
|
|
466
466
|
|
|
467
|
-
|
|
468
|
-
- Snowflake
|
|
467
|
+
**Differences from Snowflake:**
|
|
468
|
+
- Snowflake natively supports `PIVOT` / `UNPIVOT` syntax; ClickZetta does not, requires manual implementation
|
|
469
469
|
|
|
470
470
|
---
|
|
471
471
|
|
|
472
|
-
## SET
|
|
472
|
+
## SET Operations
|
|
473
473
|
|
|
474
474
|
```sql
|
|
475
|
-
-- ClickZetta
|
|
475
|
+
-- ClickZetta supports UNION / UNION ALL / INTERSECT / EXCEPT set operations
|
|
476
476
|
SELECT id FROM orders_2023
|
|
477
477
|
UNION ALL
|
|
478
478
|
SELECT id FROM orders_2024;
|
|
@@ -492,13 +492,13 @@ SELECT id FROM orders_2024;
|
|
|
492
492
|
|
|
493
493
|
---
|
|
494
494
|
|
|
495
|
-
## HINTS
|
|
495
|
+
## HINTS (Query Hints)
|
|
496
496
|
|
|
497
497
|
```sql
|
|
498
|
-
-- MAPJOIN
|
|
498
|
+
-- MAPJOIN (force broadcast small table)
|
|
499
499
|
SELECT /*+ MAPJOIN(small_table) */ *
|
|
500
500
|
FROM large_table l JOIN small_table s ON l.id = s.id;
|
|
501
501
|
|
|
502
|
-
--
|
|
502
|
+
-- Vector index search factor
|
|
503
503
|
SET cz.vector.index.search.ef = 128;
|
|
504
504
|
```
|