@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
  3. package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
  4. package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
  5. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
  6. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
  7. package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
  8. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
  9. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
  10. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
  11. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
  12. package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
  13. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
  14. package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
  15. package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
  16. package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
  17. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
  18. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
  19. package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
  20. package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
  21. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
  22. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
  23. package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
  24. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
  25. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
  26. package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
  27. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
  28. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
  29. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
  30. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
  31. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
  32. package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
  33. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
  34. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
  35. package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
  36. package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
  37. package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
  38. package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
  39. package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
  40. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
  41. package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
  42. package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
  43. package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
  44. package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
  45. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
  46. package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
  47. package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
  48. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
  49. package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
  50. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
  51. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
  52. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
  53. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
  54. package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
  55. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
  56. package/package.json +1 -1
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  58. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  59. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  60. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
  61. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  62. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  63. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  64. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  65. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  66. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  67. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  68. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  69. /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
@@ -0,0 +1,346 @@
1
+ # ClickZetta Lakehouse vs Snowflake SQL Differences
2
+
3
+ > Source: Product documentation + migration practice
4
+
5
+ ## Object Concept Mapping
6
+
7
+ | ClickZetta Lakehouse | Snowflake | Description |
8
+ |---|---|---|
9
+ | WORKSPACE | DATABASE | Workspace ≈ Database |
10
+ | SCHEMA | SCHEMA | Same |
11
+ | VCLUSTER | WAREHOUSE | Compute cluster |
12
+ | STORAGE CONNECTION | STORAGE INTEGRATION | Object storage authentication |
13
+ | VOLUME | STAGE | File storage area |
14
+ | TABLE | TABLE | Same |
15
+ | PIPE | SNOWPIPE | Continuous ingestion pipeline |
16
+ | TABLE STREAM | STREAM | Change data capture |
17
+ | DYNAMIC TABLE | DYNAMIC TABLE | Incremental computation table (different syntax) |
18
+ | Studio Task | TASK | Scheduled tasks |
19
+
20
+ ---
21
+
22
+ ## DDL Differences
23
+
24
+ ### CREATE OR REPLACE vs IF NOT EXISTS
25
+
26
+ ```sql
27
+ -- Snowflake: supports CREATE OR REPLACE
28
+ CREATE OR REPLACE TABLE orders (id INT, amount DECIMAL);
29
+
30
+ -- ClickZetta: does not support CREATE OR REPLACE, use IF NOT EXISTS
31
+ CREATE TABLE IF NOT EXISTS orders (id INT, amount DECIMAL);
32
+ -- Use ALTER TABLE to modify existing tables
33
+ ```
34
+
35
+ ### Comment Syntax
36
+
37
+ ```sql
38
+ -- Snowflake: supports // and ///
39
+ // This is a comment
40
+ /// This is also a comment
41
+
42
+ -- ClickZetta: only supports -- and /* */
43
+ -- This is a comment
44
+ /* This is also a comment */
45
+ ```
46
+
47
+ ### Data Type Differences
48
+
49
+ | ClickZetta | Snowflake | Description |
50
+ |---|---|---|
51
+ | `STRING` | `VARCHAR` / `TEXT` | ClickZetta recommends STRING |
52
+ | `TIMESTAMP` | `TIMESTAMP_LTZ` | Local timezone timestamp |
53
+ | `TIMESTAMP_NTZ` | `TIMESTAMP_NTZ` | Without timezone timestamp |
54
+ | `JSON` | `VARIANT` | Semi-structured data |
55
+ | `ARRAY<T>` | `ARRAY` | ClickZetta requires element type |
56
+ | `MAP<K,V>` | `OBJECT` | Key-value pairs |
57
+ | `STRUCT<f:T,...>` | `OBJECT` | Struct type |
58
+ | `VECTOR(FLOAT, N)` | No native support | Vector type (ClickZetta-specific) |
59
+ | `TINYINT` | `NUMBER(3,0)` | 1-byte integer |
60
+ | `SMALLINT` | `NUMBER(5,0)` | 2-byte integer |
61
+ | No `NUMBER` | `NUMBER(p,s)` | ClickZetta uses `DECIMAL(p,s)` |
62
+
63
+ ### ⚠️ Implicit Type Conversion on Write (Important Difference)
64
+
65
+ Snowflake allows implicit string conversion to date/boolean types on write; ClickZetta **does not**:
66
+
67
+ | Operation | Snowflake | ClickZetta |
68
+ |---|---|---|
69
+ | INSERT string→DATE | ✅ Allowed | ❌ Error, requires `CAST` or `DATE '...'` |
70
+ | INSERT string→TIMESTAMP | ✅ Allowed | ❌ Error, requires `CAST` or `TIMESTAMP '...'` |
71
+ | INSERT string→BOOLEAN | ✅ Allowed | ❌ Error, requires `TRUE`/`FALSE` or `CAST` |
72
+ | INSERT string→INT | ✅ Allowed | ❌ Error, requires `CAST('123' AS INT)` |
73
+ | INSERT string→JSON | ✅ Allowed | ❌ Error, requires `PARSE_JSON(...)` or `CAST` |
74
+ | UPDATE string→DATE | ✅ Allowed | ❌ Error, requires `CAST` |
75
+ | WHERE string=DATE | ✅ Allowed | ✅ Allowed (implicit comparison in queries) |
76
+
77
+ ### Table Creation Syntax Differences
78
+
79
+ ```sql
80
+ -- Snowflake: CLUSTER BY
81
+ CREATE TABLE orders (id INT, dt DATE)
82
+ CLUSTER BY (dt);
83
+
84
+ -- ClickZetta: CLUSTERED BY + PARTITIONED BY
85
+ CREATE TABLE orders (
86
+ id INT,
87
+ dt DATE
88
+ )
89
+ PARTITIONED BY (dt)
90
+ CLUSTERED BY (id) INTO 8 BUCKETS;
91
+
92
+ -- ClickZetta-specific: Sort Key (inline index)
93
+ CREATE TABLE orders (
94
+ id INT,
95
+ amount DECIMAL,
96
+ INDEX amount_bf (amount) USING BLOOM_FILTER
97
+ );
98
+ ```
99
+
100
+ ---
101
+
102
+ ## DML Differences
103
+
104
+ ### INSERT
105
+
106
+ ```sql
107
+ -- Both are basically the same; ClickZetta additionally supports:
108
+ INSERT OVERWRITE TABLE orders SELECT * FROM staging; -- overwrite (Hive style)
109
+ INSERT INTO orders PARTITION (dt='2024-01-01') VALUES (1, 100); -- static partition
110
+ ```
111
+
112
+ ### UPDATE
113
+
114
+ ```sql
115
+ -- Snowflake
116
+ UPDATE orders SET amount = amount * 1.1 WHERE status = 'VIP';
117
+
118
+ -- ClickZetta: same syntax, additionally supports ORDER BY + LIMIT
119
+ UPDATE orders SET amount = amount * 1.1
120
+ WHERE status = 'VIP'
121
+ ORDER BY created_at DESC
122
+ LIMIT 1000;
123
+ ```
124
+
125
+ ### MERGE INTO
126
+
127
+ ```sql
128
+ -- ClickZetta limitation: WHEN NOT MATCHED can only appear once
129
+ -- Snowflake supports multiple WHEN NOT MATCHED
130
+
131
+ -- ClickZetta MERGE example (⚠️ UPDATE must come before DELETE)
132
+ MERGE INTO target t
133
+ USING source s ON t.id = s.id
134
+ WHEN MATCHED THEN UPDATE SET t.amount = s.amount
135
+ WHEN MATCHED AND s.action = 'DELETE' THEN DELETE
136
+ WHEN NOT MATCHED THEN INSERT (id, amount) VALUES (s.id, s.amount);
137
+ ```
138
+
139
+ ---
140
+
141
+ ## Query Syntax Differences
142
+
143
+ ### SELECT Extensions
144
+
145
+ ```sql
146
+ -- ClickZetta-specific: SELECT * EXCEPT(col)
147
+ SELECT * EXCEPT(sensitive_col) FROM users;
148
+
149
+ -- ClickZetta-specific: GROUP BY ALL (auto-infer grouping columns)
150
+ SELECT year, month, SUM(amount)
151
+ FROM orders
152
+ GROUP BY ALL;
153
+
154
+ -- Both support: GROUPING SETS / ROLLUP / CUBE
155
+ SELECT region, product, SUM(sales)
156
+ FROM orders
157
+ GROUP BY GROUPING SETS ((region), (product), ());
158
+ ```
159
+
160
+ ### JSON Queries
161
+
162
+ ```sql
163
+ -- Snowflake: VARIANT type, access with :
164
+ SELECT data:address:city FROM users;
165
+ SELECT data[0]:name FROM users;
166
+
167
+ -- ClickZetta: JSON type, access with []
168
+ SELECT data['address']['city'] FROM users;
169
+ SELECT data['phoneNumbers'][0]['number'] FROM users;
170
+
171
+ -- Both support PARSE_JSON
172
+ SELECT parse_json('{"name":"Alice"}')['name'];
173
+ ```
174
+
175
+ ### LATERAL VIEW (Array Expansion)
176
+
177
+ ```sql
178
+ -- ClickZetta (Hive style)
179
+ SELECT e.id, s.skill
180
+ FROM employees e
181
+ LATERAL VIEW EXPLODE(e.skills) s AS skill;
182
+
183
+ -- Snowflake (uses FLATTEN)
184
+ SELECT e.id, f.value::STRING AS skill
185
+ FROM employees e,
186
+ LATERAL FLATTEN(input => e.skills) f;
187
+ ```
188
+
189
+ ### QUALIFY (Window Function Filtering)
190
+
191
+ ```sql
192
+ -- Both support QUALIFY
193
+ SELECT * FROM orders
194
+ QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) = 1;
195
+ ```
196
+
197
+ ### PIVOT / UNPIVOT
198
+
199
+ ```sql
200
+ -- Snowflake natively supports PIVOT
201
+ SELECT * FROM sales
202
+ PIVOT (SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'));
203
+
204
+ -- ClickZetta: use CASE WHEN
205
+ SELECT
206
+ product,
207
+ SUM(CASE WHEN month = 'Jan' THEN amount END) AS Jan,
208
+ SUM(CASE WHEN month = 'Feb' THEN amount END) AS Feb
209
+ FROM sales GROUP BY product;
210
+ ```
211
+
212
+ ---
213
+
214
+ ## Stream Differences
215
+
216
+ ```sql
217
+ -- Snowflake Stream metadata fields
218
+ METADATA$ACTION -- 'INSERT' / 'DELETE'
219
+ METADATA$ISUPDATE -- TRUE/FALSE
220
+ METADATA$ROW_ID -- row unique identifier
221
+
222
+ -- ClickZetta Table Stream metadata fields
223
+ __change_type -- 'INSERT' / 'UPDATE_BEFORE' / 'UPDATE_AFTER' / 'DELETE'
224
+ __commit_version -- commit version number
225
+ __commit_timestamp -- commit timestamp
226
+ ```
227
+
228
+ ---
229
+
230
+ ## Dynamic Table Differences
231
+
232
+ ```sql
233
+ -- Snowflake Dynamic Table
234
+ CREATE DYNAMIC TABLE product_sales
235
+ TARGET_LAG = '1 minutes'
236
+ WAREHOUSE = my_warehouse
237
+ AS SELECT ...;
238
+
239
+ -- ClickZetta Dynamic Table (does not support TARGET_LAG)
240
+ CREATE DYNAMIC TABLE product_sales
241
+ REFRESH INTERVAL 1 MINUTE VCLUSTER default_ap
242
+ AS SELECT ...;
243
+ ```
244
+
245
+ ---
246
+
247
+ ## Object Storage (Stage vs Volume)
248
+
249
+ ```sql
250
+ -- Snowflake: Stage
251
+ CREATE STAGE my_stage
252
+ URL = 's3://bucket/path'
253
+ STORAGE_INTEGRATION = my_integration;
254
+
255
+ COPY INTO orders FROM @my_stage/data.csv;
256
+
257
+ -- ClickZetta: Volume
258
+ CREATE EXTERNAL VOLUME my_volume
259
+ LOCATION = 'oss://bucket/path'
260
+ USING CONNECTION my_oss_conn;
261
+
262
+ COPY INTO orders FROM VOLUME my_volume USING CSV;
263
+ ```
264
+
265
+ ---
266
+
267
+ ## Function Differences
268
+
269
+ ### Date Functions
270
+
271
+ ```sql
272
+ -- Snowflake
273
+ DATEADD(day, 7, order_date)
274
+ DATEDIFF(day, start_date, end_date)
275
+ DATE_TRUNC('month', order_date)
276
+ TO_DATE('2024-01-01')
277
+ CURRENT_TIMESTAMP()
278
+
279
+ -- ClickZetta (compatible with Hive/Spark style, also supports Snowflake style)
280
+ DATEADD(day, 7, order_date) -- ✅ same Snowflake syntax also supported
281
+ DATE_ADD(order_date, 7) -- or Hive style
282
+ DATEDIFF(end_date, start_date) -- note: parameter order reversed!
283
+ DATE_TRUNC('month', order_date) -- same
284
+ TO_DATE('2024-01-01') -- same
285
+ CURRENT_TIMESTAMP() -- same, also supports NOW()
286
+ ```
287
+
288
+ ### String Functions
289
+
290
+ ```sql
291
+ -- Snowflake
292
+ CHARINDEX('sub', str) -- find substring position
293
+ EDITDISTANCE(s1, s2) -- edit distance
294
+ SOUNDEX(str) -- phonetic similarity
295
+ INITCAP(str) -- capitalize first letter
296
+
297
+ -- ClickZetta
298
+ INSTR(str, 'sub') -- find substring position (Hive style)
299
+ LOCATE('sub', str) -- also supported
300
+ LEVENSHTEIN(s1, s2) -- edit distance
301
+ INITCAP(str) -- same
302
+ ```
303
+
304
+ ### Conditional Functions
305
+
306
+ ```sql
307
+ -- Snowflake
308
+ IFF(condition, true_val, false_val)
309
+ ZEROIFNULL(expr)
310
+ NULLIFZERO(expr)
311
+ DECODE(expr, val1, res1, val2, res2, default)
312
+
313
+ -- ClickZetta
314
+ IF(condition, true_val, false_val) -- or CASE WHEN
315
+ COALESCE(expr, 0) -- replaces ZEROIFNULL
316
+ NULLIF(expr, 0) -- replaces NULLIFZERO
317
+ DECODE(expr, val1, res1, ...) -- supported (compatible)
318
+ ```
319
+
320
+ ### Aggregate Functions
321
+
322
+ ```sql
323
+ -- Snowflake
324
+ LISTAGG(col, ',') WITHIN GROUP (ORDER BY col)
325
+ ARRAY_AGG(col)
326
+ OBJECT_AGG(key, value)
327
+ APPROX_COUNT_DISTINCT(col)
328
+
329
+ -- ClickZetta
330
+ GROUP_CONCAT(col ORDER BY col SEPARATOR ',') -- replaces LISTAGG
331
+ ARRAY_AGG(col) -- same
332
+ MAP_AGG(key, value) -- replaces OBJECT_AGG
333
+ APPROX_COUNT_DISTINCT(col) -- same
334
+ ```
335
+
336
+ ---
337
+
338
+ ## Permission System Differences
339
+
340
+ | Concept | ClickZetta | Snowflake |
341
+ |---|---|---|
342
+ | Top-level container | WORKSPACE | DATABASE |
343
+ | Permission objects | VCLUSTER / SCHEMA / TABLE / VIEW | WAREHOUSE / DATABASE / SCHEMA / TABLE |
344
+ | Role grant | `GRANT ROLE r TO USER u` | `GRANT ROLE r TO USER u` |
345
+ | View permissions | `SHOW GRANTS TO USER u` | `SHOW GRANTS TO USER u` |
346
+ | System roles | instance_admin / workspace_admin / workspace_dev / workspace_analyst | ACCOUNTADMIN / SYSADMIN / USERADMIN |
@@ -0,0 +1,229 @@
1
+ # ClickZetta Lakehouse vs Spark SQL Differences
2
+
3
+ > Source: Product documentation + Spark Connector documentation
4
+
5
+ ## Data Type Mapping
6
+
7
+ | ClickZetta | Spark SQL | Description |
8
+ |---|---|---|
9
+ | `BOOLEAN` | `BooleanType` | Same |
10
+ | `TINYINT` | `ByteType` | 1 byte |
11
+ | `SMALLINT` | `ShortType` | 2 bytes |
12
+ | `INT` | `IntegerType` | 4 bytes |
13
+ | `BIGINT` | `LongType` | 8 bytes |
14
+ | `FLOAT` | `FloatType` | 4-byte float |
15
+ | `DOUBLE` | `DoubleType` | 8-byte float |
16
+ | `DECIMAL(p,s)` | `DecimalType(p,s)` | Exact numeric |
17
+ | `STRING` / `VARCHAR` | `StringType` | String |
18
+ | `BINARY` | `BinaryType` | Binary |
19
+ | `DATE` | `DateType` | Date |
20
+ | `TIMESTAMP` | `TimestampType` | Timestamp with timezone |
21
+ | `TIMESTAMP_NTZ` | `TimestampNTZType` | Timestamp without timezone |
22
+ | `ARRAY<T>` | `ArrayType` | Array |
23
+ | `MAP<K,V>` | `MapType` | Key-value pairs |
24
+ | `STRUCT<f:T>` | `StructType` | Struct |
25
+
26
+ ---
27
+
28
+ ## Table Creation Syntax Differences
29
+
30
+ ### Partitioning
31
+
32
+ ```sql
33
+ -- Spark SQL: PARTITIONED BY
34
+ CREATE TABLE orders (id INT, amount DECIMAL, dt STRING)
35
+ USING PARQUET
36
+ PARTITIONED BY (dt);
37
+
38
+ -- ClickZetta: same syntax, but no USING clause needed
39
+ CREATE TABLE orders (id INT, amount DECIMAL, dt STRING)
40
+ PARTITIONED BY (dt);
41
+ ```
42
+
43
+ ### Bucketing
44
+
45
+ ```sql
46
+ -- Spark SQL
47
+ CREATE TABLE orders (id INT, amount DECIMAL)
48
+ CLUSTERED BY (id) INTO 8 BUCKETS;
49
+
50
+ -- ClickZetta: same syntax
51
+ CREATE TABLE orders (id INT, amount DECIMAL)
52
+ CLUSTERED BY (id) INTO 8 BUCKETS;
53
+ ```
54
+
55
+ ### Table Properties
56
+
57
+ ```sql
58
+ -- Spark SQL: TBLPROPERTIES
59
+ CREATE TABLE orders (id INT)
60
+ TBLPROPERTIES ('delta.enableChangeDataFeed' = 'true');
61
+
62
+ -- ClickZetta: PROPERTIES
63
+ CREATE TABLE orders (id INT)
64
+ PROPERTIES ('data_lifecycle' = '30'); -- data retention in days
65
+ ```
66
+
67
+ ---
68
+
69
+ ## Query Syntax Differences
70
+
71
+ ### LATERAL VIEW (Array Expansion)
72
+
73
+ ```sql
74
+ -- Both have the same syntax (ClickZetta is compatible with Hive/Spark style)
75
+ SELECT id, skill
76
+ FROM employees
77
+ LATERAL VIEW EXPLODE(skills) t AS skill;
78
+
79
+ -- POSEXPLODE (with position index)
80
+ SELECT id, pos, skill
81
+ FROM employees
82
+ LATERAL VIEW POSEXPLODE(skills) t AS pos, skill;
83
+ ```
84
+
85
+ ### Window Functions
86
+
87
+ ```sql
88
+ -- Both are basically the same
89
+ SELECT id, amount,
90
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) AS rn,
91
+ SUM(amount) OVER (PARTITION BY customer_id
92
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total
93
+ FROM orders;
94
+ ```
95
+
96
+ ### CTE (Common Table Expressions)
97
+
98
+ ```sql
99
+ -- Both have the same syntax
100
+ WITH
101
+ monthly_sales AS (
102
+ SELECT DATE_TRUNC('month', order_date) AS month, SUM(amount) AS total
103
+ FROM orders GROUP BY 1
104
+ ),
105
+ ranked AS (
106
+ SELECT *, RANK() OVER (ORDER BY total DESC) AS rnk FROM monthly_sales
107
+ )
108
+ SELECT * FROM ranked WHERE rnk <= 3;
109
+ ```
110
+
111
+ ### STRUCT / ARRAY Operations
112
+
113
+ ```sql
114
+ -- Spark SQL
115
+ SELECT address.city FROM users; -- STRUCT field access
116
+ SELECT skills[0] FROM employees; -- ARRAY index
117
+ SELECT EXPLODE(skills) FROM employees; -- expand array
118
+ SELECT TRANSFORM(skills, x -> UPPER(x)) FROM emp; -- array transform
119
+
120
+ -- ClickZetta (same syntax)
121
+ SELECT address.city FROM users;
122
+ SELECT skills[0] FROM employees;
123
+ SELECT EXPLODE(skills) FROM employees;
124
+ SELECT TRANSFORM(skills, x -> UPPER(x)) FROM emp;
125
+ ```
126
+
127
+ ---
128
+
129
+ ## Function Differences
130
+
131
+ ### Date Functions
132
+
133
+ ```sql
134
+ -- Both are basically compatible
135
+ DATE_ADD(date, days)
136
+ DATE_SUB(date, days)
137
+ DATEDIFF(end_date, start_date) -- note: ClickZetta parameter order is reversed from Snowflake
138
+ DATE_TRUNC('month', date)
139
+ DATE_FORMAT(date, 'yyyy-MM-dd')
140
+ FROM_UNIXTIME(unix_ts)
141
+ UNIX_TIMESTAMP(date_str)
142
+ ```
143
+
144
+ ### String Functions
145
+
146
+ ```sql
147
+ -- Both are basically compatible
148
+ CONCAT(s1, s2, ...)
149
+ CONCAT_WS(',', s1, s2, ...)
150
+ SPLIT(str, ',')
151
+ REGEXP_EXTRACT(str, pattern, group)
152
+ REGEXP_REPLACE(str, pattern, replacement)
153
+ INSTR(str, substr)
154
+ SUBSTR(str, pos, len)
155
+ TRIM(str) / LTRIM(str) / RTRIM(str)
156
+ ```
157
+
158
+ ### Aggregate Functions
159
+
160
+ ```sql
161
+ -- Both are basically compatible
162
+ COUNT(*) / COUNT(DISTINCT col)
163
+ SUM / AVG / MAX / MIN
164
+ COLLECT_LIST(col) -- Spark: returns array (with duplicates)
165
+ COLLECT_SET(col) -- Spark: returns deduplicated array
166
+ ARRAY_AGG(col) -- ClickZetta: equivalent to COLLECT_LIST
167
+ ```
168
+
169
+ ---
170
+
171
+ ## ClickZetta-Specific Features (No Spark Equivalent)
172
+
173
+ ```sql
174
+ -- 1. VCLUSTER (compute cluster management)
175
+ CREATE VCLUSTER my_vc VCLUSTER_TYPE = ANALYTICS VCLUSTER_SIZE = 4;
176
+ USE VCLUSTER my_vc;
177
+
178
+ -- 2. DYNAMIC TABLE (incremental computation)
179
+ CREATE DYNAMIC TABLE sales_summary
180
+ REFRESH INTERVAL 5 MINUTE VCLUSTER default_ap
181
+ AS SELECT customer_id, SUM(amount) FROM orders GROUP BY 1;
182
+
183
+ -- 3. TABLE STREAM (CDC change capture)
184
+ CREATE TABLE STREAM orders_stream ON TABLE orders
185
+ WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
186
+
187
+ -- 4. PIPE (continuous ingestion)
188
+ CREATE PIPE my_pipe
189
+ AS COPY INTO orders FROM VOLUME my_volume USING CSV;
190
+
191
+ -- 5. VECTOR type (vector search)
192
+ CREATE TABLE embeddings (id INT, vec VECTOR(FLOAT, 1024));
193
+ SELECT id, cosine_distance(vec, vector(0.1, 0.2, ...)) AS dist
194
+ FROM embeddings ORDER BY dist LIMIT 10;
195
+
196
+ -- 6. Time Travel
197
+ SELECT * FROM orders TIMESTAMP AS OF '2024-01-01 00:00:00';
198
+ RESTORE TABLE orders TO TIMESTAMP AS OF '2024-01-01 00:00:00';
199
+ UNDROP TABLE orders;
200
+
201
+ -- 7. SHARE (cross-instance data sharing)
202
+ CREATE SHARE my_share;
203
+ GRANT SELECT, READ METADATA ON TABLE public.orders TO SHARE my_share;
204
+ ```
205
+
206
+ ---
207
+
208
+ ## Spark SQL-Specific Features (ClickZetta has no equivalent or different syntax)
209
+
210
+ ```sql
211
+ -- 1. Delta Lake-specific syntax (ClickZetta has no equivalent)
212
+ OPTIMIZE table_name ZORDER BY (col); -- ClickZetta has OPTIMIZE but no ZORDER
213
+ VACUUM table_name RETAIN 168 HOURS; -- ClickZetta manages automatically, no manual VACUUM needed
214
+
215
+ -- 2. SHOW TABLES EXTENDED (ClickZetta has no equivalent)
216
+ SHOW TABLES EXTENDED IN schema LIKE 'orders*';
217
+
218
+ -- 3. DESCRIBE HISTORY (Delta) → ClickZetta uses DESC HISTORY
219
+ -- Spark/Delta:
220
+ DESCRIBE HISTORY orders;
221
+ -- ClickZetta:
222
+ DESC HISTORY orders;
223
+
224
+ -- 4. Generated columns (same syntax)
225
+ -- Spark:
226
+ CREATE TABLE orders (id INT, year INT GENERATED ALWAYS AS (YEAR(order_date)));
227
+ -- ClickZetta (same syntax, also supported):
228
+ CREATE TABLE orders (id INT, year INT GENERATED ALWAYS AS (YEAR(order_date)));
229
+ ```