@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
  3. package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
  4. package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
  5. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
  6. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
  7. package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
  8. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
  9. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
  10. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
  11. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
  12. package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
  13. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
  14. package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
  15. package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
  16. package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
  17. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
  18. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
  19. package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
  20. package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
  21. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
  22. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
  23. package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
  24. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
  25. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
  26. package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
  27. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
  28. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
  29. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
  30. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
  31. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
  32. package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
  33. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
  34. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
  35. package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
  36. package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
  37. package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
  38. package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
  39. package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
  40. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
  41. package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
  42. package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
  43. package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
  44. package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
  45. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
  46. package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
  47. package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
  48. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
  49. package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
  50. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
  51. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
  52. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
  53. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
  54. package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
  55. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
  56. package/package.json +1 -1
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  58. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  59. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  60. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
  61. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  62. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  63. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  64. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  65. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  66. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  67. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  68. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  69. /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
@@ -0,0 +1,143 @@
1
+ # Implicit Type Conversion: Migration Pitfall
2
+
3
+ > **Why this matters for migration**: Snowflake, Databricks, and Spark all allow implicit string conversion in INSERT/UPDATE. ClickZetta does not. This is the #1 cause of unexpected errors when copying SQL from those systems.
4
+
5
+ ---
6
+
7
+ ## The Rule
8
+
9
+ ClickZetta strictly prohibits implicit type conversion in **write operations** (INSERT/UPDATE). Explicit `CAST` is required.
10
+ However, implicit conversion **is allowed** in SELECT/WHERE expressions.
11
+
12
+ ---
13
+
14
+ ## Behavior Comparison Table
15
+
16
+ | Target Column Type | Written Value | Snowflake | Databricks | Spark | ClickZetta INSERT/UPDATE | ClickZetta WHERE |
17
+ |---|---|---|---|---|---|---|
18
+ | `DATE` | `'2024-01-15'` (string) | ✅ implicit | ✅ implicit | ✅ implicit | ❌ Error | ✅ Allowed |
19
+ | `TIMESTAMP` | `'2024-01-15 12:00:00'` (string) | ✅ implicit | ✅ implicit | ✅ implicit | ❌ Error | ✅ Allowed |
20
+ | `BOOLEAN` | `'true'` / `'false'` (string) | ✅ implicit | ✅ implicit | ✅ implicit | ❌ Error | ✅ Allowed |
21
+ | `BOOLEAN` | `1` / `0` (integer) | ✅ implicit | ❌ | ❌ | ❌ Error | ✅ Allowed |
22
+ | `JSON` / `VARIANT` | `'{"k":1}'` (string) | ✅ implicit | N/A | N/A | ❌ Error | ✅ Allowed |
23
+ | `INT` / `BIGINT` | `'123'` (string) | ✅ implicit | ✅ implicit | ✅ implicit | ❌ Error | ✅ Allowed |
24
+ | `BIGINT` | `100` (INT) | ✅ | ✅ | ✅ | ✅ Allowed | ✅ Allowed |
25
+ | `DOUBLE` | `1.5` (FLOAT) | ✅ | ✅ | ✅ | ✅ Allowed | ✅ Allowed |
26
+ | `BIGINT` | `1.5` (FLOAT) | ✅ truncated | ✅ truncated | ✅ truncated | ✅ Allowed (truncated) | ✅ Allowed |
27
+
28
+ ---
29
+
30
+ ## Migration Pattern: How to Rewrite
31
+
32
+ ```sql
33
+ -- ❌ Snowflake / Databricks / Spark style (errors in ClickZetta)
34
+ INSERT INTO orders VALUES (1, '2024-01-15', 'true', '{"k":1}', '123');
35
+
36
+ -- ✅ ClickZetta-compatible
37
+ INSERT INTO orders VALUES (
38
+ 1,
39
+ DATE '2024-01-15', -- or CAST('2024-01-15' AS DATE)
40
+ TRUE, -- or CAST('true' AS BOOLEAN)
41
+ PARSE_JSON('{"k":1}'), -- or CAST(... AS JSON)
42
+ CAST('123' AS INT)
43
+ );
44
+ ```
45
+
46
+ ### DATE Column
47
+
48
+ ```sql
49
+ -- All equivalent and correct
50
+ INSERT INTO t VALUES (CAST('2024-01-15' AS DATE));
51
+ INSERT INTO t VALUES (DATE '2024-01-15');
52
+ INSERT INTO t VALUES (TO_DATE('2024-01-15'));
53
+ INSERT INTO t VALUES (DATE('2024-01-15')); -- function form
54
+ ```
55
+
56
+ ### TIMESTAMP Column
57
+
58
+ ```sql
59
+ -- All equivalent and correct
60
+ INSERT INTO t VALUES (CAST('2024-01-15 12:00:00' AS TIMESTAMP));
61
+ INSERT INTO t VALUES (TIMESTAMP '2024-01-15 12:00:00');
62
+ INSERT INTO t VALUES (TO_TIMESTAMP('2024-01-15 12:00:00'));
63
+ INSERT INTO t VALUES (TIMESTAMP('2024-01-15 12:00:00')); -- function form
64
+ INSERT INTO t VALUES (CURRENT_TIMESTAMP());
65
+ INSERT INTO t VALUES (CURRENT_DATE() - INTERVAL 7 DAY);
66
+ ```
67
+
68
+ ### BOOLEAN Column
69
+
70
+ ```sql
71
+ -- Only TRUE/FALSE literals or explicit CAST
72
+ INSERT INTO t VALUES (TRUE);
73
+ INSERT INTO t VALUES (FALSE);
74
+ INSERT INTO t VALUES (CAST(1 AS BOOLEAN));
75
+ INSERT INTO t VALUES (CAST('true' AS BOOLEAN));
76
+ ```
77
+
78
+ ### JSON Column
79
+
80
+ ```sql
81
+ -- Must use PARSE_JSON or CAST
82
+ INSERT INTO t VALUES (PARSE_JSON('{"key":"value"}'));
83
+ INSERT INTO t VALUES (CAST('{"key":"value"}' AS JSON));
84
+ ```
85
+
86
+ ### INT/BIGINT Column
87
+
88
+ ```sql
89
+ -- Strings must be CAST
90
+ INSERT INTO t VALUES (CAST('123' AS INT));
91
+ INSERT INTO t VALUES (CAST('456' AS BIGINT));
92
+ ```
93
+
94
+ ---
95
+
96
+ ## UPDATE Has the Same Restrictions
97
+
98
+ ```sql
99
+ -- ❌ UPDATE also rejects implicit string conversion
100
+ UPDATE orders SET dt = '2024-06-01' WHERE id = 1; -- Error
101
+ UPDATE orders SET flag = 0 WHERE id = 1; -- Error (BOOLEAN column)
102
+
103
+ -- ✅ Must explicitly convert
104
+ UPDATE orders SET dt = CAST('2024-06-01' AS DATE) WHERE id = 1;
105
+ UPDATE orders SET flag = CAST(0 AS BOOLEAN) WHERE id = 1;
106
+ ```
107
+
108
+ ---
109
+
110
+ ## WHERE Clause Allows Implicit Comparison
111
+
112
+ This is **not** a write operation, so implicit conversion still works:
113
+
114
+ ```sql
115
+ -- ✅ All allowed in WHERE
116
+ SELECT * FROM orders WHERE dt = '2024-01-15';
117
+ SELECT * FROM orders WHERE dt >= '2024-01-01' AND dt < '2025-01-01';
118
+ SELECT * FROM orders WHERE id = '123';
119
+ ```
120
+
121
+ ---
122
+
123
+ ## Also Applies to RESTORE TABLE
124
+
125
+ ```sql
126
+ -- ❌ Errors
127
+ RESTORE TABLE t TO TIMESTAMP AS OF '2024-01-15';
128
+
129
+ -- ✅ Use explicit cast
130
+ RESTORE TABLE t TO TIMESTAMP AS OF CAST('2024-01-15 10:00:00' AS TIMESTAMP);
131
+ ```
132
+
133
+ ---
134
+
135
+ ## Quick Migration Checklist
136
+
137
+ When porting INSERT/UPDATE statements from Snowflake/Databricks/Spark, search for and fix:
138
+
139
+ 1. String literals being inserted into DATE columns → wrap with `DATE '...'` or `CAST(... AS DATE)`
140
+ 2. String literals being inserted into TIMESTAMP columns → wrap with `TIMESTAMP '...'` or `CAST(... AS TIMESTAMP)`
141
+ 3. String `'true'` / `'false'` or integer `1` / `0` for BOOLEAN columns → use `TRUE` / `FALSE` literals
142
+ 4. String JSON for VARIANT/JSON columns → wrap with `PARSE_JSON(...)`
143
+ 5. String numerics for INT/BIGINT columns → wrap with `CAST(... AS INT)`
@@ -0,0 +1,260 @@
1
+ # Databricks → ClickZetta Migration Guide
2
+
3
+ > Covers SQL compatibility issues when migrating from Databricks (Delta Lake) to ClickZetta Lakehouse. All conclusions verified on a real Lakehouse instance.
4
+
5
+ ---
6
+
7
+ ## Object Concept Mapping
8
+
9
+ | Databricks | ClickZetta | Description |
10
+ |---|---|---|
11
+ | Catalog (internal data) | WORKSPACE | Top-level namespace, Catalog.Schema.Table ≈ Workspace.Schema.Table |
12
+ | Catalog (external data sources) | EXTERNAL CATALOG | Top-level three-layer namespace for federated queries (catalog.schema.table) |
13
+ | Database / Schema | SCHEMA | Same |
14
+ | Cluster / SQL Warehouse | VCLUSTER | Compute cluster |
15
+ | Delta Table (regular) | TABLE | ClickZetta defaults to Parquet storage, supports Iceberg format |
16
+ | Delta Table (incremental) | DYNAMIC TABLE | Auto-incremental refresh, replaces DLT Pipeline |
17
+ | External Location | STORAGE CONNECTION + EXTERNAL VOLUME | STORAGE CONNECTION handles auth, EXTERNAL VOLUME mounts the path |
18
+ | Unity Catalog (metadata governance) | No full equivalent | ClickZetta uses RBAC + SCHEMA permissions for partial governance |
19
+ | Unity Catalog (external data federation) | EXTERNAL CATALOG | Supports Hive, Iceberg REST, Databricks Unity Catalog federation |
20
+ | Structured Streaming | PIPE + TABLE STREAM | PIPE handles continuous ingestion, TABLE STREAM handles CDC |
21
+ | APPLY CHANGES INTO (DLT CDC) | TABLE STREAM + MERGE INTO | Create Stream to capture changes, then consume with MERGE |
22
+ | Auto Loader | PIPE (EVENT_NOTIFICATION mode) | File upload triggers loading, only supports OSS/S3 |
23
+
24
+ ---
25
+
26
+ ## DDL Differences
27
+
28
+ ### CREATE TABLE
29
+
30
+ ```sql
31
+ -- Databricks Delta Lake
32
+ CREATE TABLE orders (
33
+ id BIGINT GENERATED ALWAYS AS IDENTITY,
34
+ customer_id INT,
35
+ amount DECIMAL(18,2),
36
+ status STRING DEFAULT 'pending',
37
+ created_at TIMESTAMP DEFAULT current_timestamp(),
38
+ meta STRUCT<city: STRING, zip: STRING>,
39
+ tags ARRAY<STRING>
40
+ )
41
+ USING DELTA
42
+ PARTITIONED BY (DATE(created_at))
43
+ TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');
44
+
45
+ -- ClickZetta equivalent
46
+ CREATE TABLE IF NOT EXISTS orders (
47
+ id BIGINT IDENTITY(1), -- GENERATED ALWAYS AS IDENTITY → IDENTITY
48
+ customer_id INT,
49
+ amount DECIMAL(18,2),
50
+ status STRING DEFAULT 'pending',
51
+ created_at TIMESTAMP DEFAULT current_timestamp(),
52
+ meta STRUCT<city:STRING, zip:STRING>,
53
+ tags ARRAY<STRING>
54
+ )
55
+ -- No need for USING DELTA (default is Parquet)
56
+ PARTITIONED BY (days(created_at)); -- DATE() → days() transform function
57
+ -- TBLPROPERTIES → PROPERTIES
58
+ -- CDC is implemented via TABLE STREAM, no need for enableChangeDataFeed
59
+ ```
60
+
61
+ ### Unsupported DDL
62
+
63
+ ```sql
64
+ -- ❌ USING DELTA / USING PARQUET (ClickZetta defaults to Parquet, no need to specify)
65
+ CREATE TABLE t (...) USING DELTA;
66
+ CREATE TABLE t (...) USING PARQUET;
67
+
68
+ -- ❌ TBLPROPERTIES (use PROPERTIES)
69
+ CREATE TABLE t (...) TBLPROPERTIES ('key' = 'value');
70
+ -- ✅ ClickZetta
71
+ CREATE TABLE t (...) PROPERTIES ('data_lifecycle' = '30');
72
+
73
+ -- ❌ GENERATED ALWAYS AS IDENTITY (use IDENTITY)
74
+ id BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1)
75
+ -- ✅ ClickZetta
76
+ id BIGINT IDENTITY(1)
77
+
78
+ -- ❌ OPTIMIZE ... ZORDER BY (ClickZetta has OPTIMIZE but no ZORDER)
79
+ OPTIMIZE orders ZORDER BY (customer_id, created_at);
80
+ -- ✅ ClickZetta (small file compaction only, no ZORDER)
81
+ OPTIMIZE orders;
82
+
83
+ -- ❌ VACUUM (ClickZetta manages storage automatically)
84
+ VACUUM orders RETAIN 168 HOURS;
85
+ ```
86
+
87
+ ---
88
+
89
+ ## ⚠️ Type Conversion on Write (Important Difference)
90
+
91
+ Databricks allows implicit string conversion; ClickZetta **does not**:
92
+
93
+ ```sql
94
+ -- ❌ Works in Databricks, errors in ClickZetta
95
+ INSERT INTO t VALUES ('2024-01-15', 'true', '123');
96
+
97
+ -- ✅ ClickZetta requires explicit conversion
98
+ INSERT INTO t VALUES (DATE '2024-01-15', TRUE, CAST('123' AS INT));
99
+ ```
100
+
101
+ See [migration-snowflake.md](migration-snowflake.md) for the type conversion table (same rules apply).
102
+
103
+ ---
104
+
105
+ ## DML Differences
106
+
107
+ ### MERGE INTO (WHEN NOT MATCHED BY SOURCE)
108
+
109
+ ```sql
110
+ -- Databricks: supports WHEN NOT MATCHED BY SOURCE
111
+ MERGE INTO target t USING source s ON t.id = s.id
112
+ WHEN MATCHED THEN UPDATE SET t.val = s.val
113
+ WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
114
+ WHEN NOT MATCHED BY SOURCE THEN DELETE; -- ❌ ClickZetta does not support
115
+
116
+ -- ClickZetta alternative: two-step operation
117
+ -- Step 1: MERGE handles matched and new rows
118
+ MERGE INTO target t USING source s ON t.id = s.id
119
+ WHEN MATCHED THEN UPDATE SET t.val = s.val
120
+ WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val);
121
+ -- Step 2: DELETE rows not in source
122
+ DELETE FROM target WHERE id NOT IN (SELECT id FROM source);
123
+ ```
124
+
125
+ ### APPLY CHANGES INTO (CDC)
126
+
127
+ ```sql
128
+ -- Databricks: APPLY CHANGES INTO (DLT-specific)
129
+ APPLY CHANGES INTO target
130
+ FROM source
131
+ KEYS (id)
132
+ SEQUENCE BY ts
133
+ APPLY AS DELETE WHEN operation = 'DELETE';
134
+
135
+ -- ClickZetta: use TABLE STREAM + MERGE INTO
136
+ CREATE TABLE STREAM source_stream ON TABLE source
137
+ WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
138
+
139
+ MERGE INTO target t
140
+ USING source_stream s ON t.id = s.id
141
+ WHEN MATCHED AND s.__change_type = 'UPDATE_AFTER' THEN UPDATE SET t.val = s.val
142
+ WHEN MATCHED AND s.__change_type = 'DELETE' THEN DELETE
143
+ WHEN NOT MATCHED AND s.__change_type = 'INSERT' THEN INSERT (id, val) VALUES (s.id, s.val);
144
+ ```
145
+
146
+ ### Transactions
147
+
148
+ ```sql
149
+ -- ❌ ClickZetta does not support transaction syntax
150
+ BEGIN;
151
+ COMMIT;
152
+ ROLLBACK;
153
+ ```
154
+
155
+ ---
156
+
157
+ ## DQL Differences
158
+
159
+ ### QUALIFY (Window Function Filtering)
160
+
161
+ ```sql
162
+ -- Both support QUALIFY
163
+ SELECT * FROM orders
164
+ QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) = 1;
165
+ ```
166
+
167
+ ### RECURSIVE CTE
168
+
169
+ ```sql
170
+ -- Databricks: supports WITH RECURSIVE
171
+ WITH RECURSIVE nums AS (
172
+ SELECT 1 AS n
173
+ UNION ALL
174
+ SELECT n + 1 FROM nums WHERE n < 5
175
+ )
176
+ SELECT * FROM nums;
177
+
178
+ -- ❌ ClickZetta: does not support WITH RECURSIVE (verified)
179
+ -- Alternative: use Python/ZettaPark to generate sequences, or pre-build helper tables
180
+ ```
181
+
182
+ ### STRUCT Named Fields
183
+
184
+ ```sql
185
+ -- Databricks: supports named fields
186
+ SELECT STRUCT(1 AS id, 'Alice' AS name) AS person;
187
+
188
+ -- ClickZetta: use named_struct for named fields
189
+ SELECT named_struct('id', 1, 'name', 'Alice') AS person; -- ✅ recommended
190
+ SELECT STRUCT(1, 'Alice') AS person; -- positional parameter syntax, access via person.col1, person.col2
191
+ ```
192
+
193
+ ---
194
+
195
+ ## Partition Differences
196
+
197
+ ### Partition Functions
198
+
199
+ ```sql
200
+ -- Databricks: use column names directly
201
+ CREATE TABLE t (...) PARTITIONED BY (year, month);
202
+
203
+ -- ClickZetta: Iceberg hidden partitions with transform functions
204
+ CREATE TABLE t (...) PARTITIONED BY (years(created_at)); -- by year
205
+ CREATE TABLE t (...) PARTITIONED BY (months(created_at)); -- by month
206
+ CREATE TABLE t (...) PARTITIONED BY (days(created_at)); -- by day
207
+ CREATE TABLE t (...) PARTITIONED BY (bucket(16, user_id)); -- by bucket
208
+ ```
209
+
210
+ ### Partition Pruning
211
+
212
+ ```sql
213
+ -- ✅ ClickZetta's YEAR() function in WHERE can trigger partition pruning (engine auto-converts)
214
+ SELECT * FROM t WHERE YEAR(dt) = 2024; -- actually converts to range filter
215
+
216
+ -- ✅ Preferred approach (explicit range)
217
+ SELECT * FROM t WHERE dt >= DATE '2024-01-01' AND dt < DATE '2025-01-01';
218
+ ```
219
+
220
+ ---
221
+
222
+ ## Delta Lake Feature Comparison
223
+
224
+ | Delta Lake Feature | ClickZetta Equivalent | Description |
225
+ |---|---|---|
226
+ | `OPTIMIZE ... ZORDER BY` | `OPTIMIZE table` (no ZORDER) | Only does small file compaction |
227
+ | `VACUUM` | Automatic management | No manual VACUUM needed |
228
+ | `DESCRIBE HISTORY` | `DESC HISTORY table` | Same functionality |
229
+ | `RESTORE TABLE ... VERSION AS OF` | `RESTORE TABLE ... TIMESTAMP AS OF` | Restore by timestamp |
230
+ | `Time Travel VERSION AS OF n` | `TIMESTAMP AS OF '...'` | ClickZetta uses timestamp, not version number |
231
+ | `enableChangeDataFeed` | TABLE STREAM | Different implementation |
232
+ | `MERGE ... WHEN NOT MATCHED BY SOURCE` | Not supported, requires two-step operation | |
233
+ | `APPLY CHANGES INTO` | TABLE STREAM + MERGE INTO | |
234
+ | `GENERATED ALWAYS AS IDENTITY` | `IDENTITY(seed)` | |
235
+ | `TBLPROPERTIES` | `PROPERTIES` | |
236
+ | `USING DELTA` | Not needed (default Parquet) | |
237
+
238
+ ---
239
+
240
+ ## Verified Compatibility (Databricks has it, ClickZetta also has it)
241
+
242
+ - `SEMI JOIN` / `ANTI JOIN` ✅
243
+ - `LATERAL VIEW EXPLODE` / `POSEXPLODE` ✅
244
+ - `QUALIFY` ✅
245
+ - `MERGE INTO` (basic syntax) ✅
246
+ - `GROUPING SETS` / `ROLLUP` / `CUBE` ✅
247
+ - `WITH CTE` (non-recursive) ✅
248
+ - `STRUCT` / `ARRAY` / `MAP` types ✅
249
+ - `TRANSFORM` / `FILTER` / `AGGREGATE` higher-order functions ✅
250
+ - `ARRAY_AGG` / `COLLECT_LIST` / `COLLECT_SET` ✅
251
+ - `REGEXP_EXTRACT` / `REGEXP_REPLACE` ✅
252
+ - `DATE_TRUNC` / `DATE_FORMAT` ✅
253
+ - `TRY_CAST` ✅
254
+ - `IDENTITY` column ✅
255
+ - `GENERATED ALWAYS AS (expr)` generated columns ✅
256
+ - `DEFAULT` values ✅
257
+ - `OPTIMIZE` (small file compaction) ✅
258
+ - `DESC HISTORY` ✅
259
+ - `RESTORE TABLE ... TIMESTAMP AS OF` ✅
260
+ - `UNDROP TABLE` ✅