@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
  3. package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
  4. package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
  5. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
  6. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
  7. package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
  8. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
  9. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
  10. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
  11. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
  12. package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
  13. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
  14. package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
  15. package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
  16. package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
  17. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
  18. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
  19. package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
  20. package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
  21. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
  22. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
  23. package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
  24. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
  25. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
  26. package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
  27. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
  28. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
  29. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
  30. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
  31. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
  32. package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
  33. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
  34. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
  35. package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
  36. package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
  37. package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
  38. package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
  39. package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
  40. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
  41. package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
  42. package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
  43. package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
  44. package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
  45. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
  46. package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
  47. package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
  48. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
  49. package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
  50. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
  51. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
  52. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
  53. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
  54. package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
  55. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
  56. package/package.json +1 -1
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  58. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  59. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  60. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
  61. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  62. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  63. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  64. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  65. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  66. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  67. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  68. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  69. /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
@@ -0,0 +1,128 @@
1
+ ---
2
+ name: clickzetta-sql-migration
3
+ description: |
4
+ Migration guide for SQL workloads moving to ClickZetta Lakehouse from Snowflake,
5
+ Databricks, or Spark SQL. Covers object concept mapping, syntax differences,
6
+ function mapping tables, MERGE INTO limitations, the strict implicit type
7
+ conversion rule, and migration pitfalls. Use this skill ONLY for migration or
8
+ cross-platform comparison questions. For native ClickZetta SQL syntax (DDL,
9
+ DML, DQL, functions) reference the ClickZetta Lakehouse documentation.
10
+ Triggered when the user mentions migration source platforms (Snowflake,
11
+ Databricks, Delta Lake, Spark SQL) together with ClickZetta, asks "how do I
12
+ write X (from Snowflake/Spark) in ClickZetta", asks about specific Snowflake
13
+ or Spark functions/syntax (IFF, ARRAY_SIZE, LISTAGG, FLATTEN, METADATA$ACTION,
14
+ TARGET_LAG, APPLY CHANGES INTO, ZORDER, WITH RECURSIVE, WHEN NOT MATCHED BY
15
+ SOURCE, OBJECT_CONSTRUCT, VARIANT colon syntax, CHARINDEX, ZEROIFNULL,
16
+ DATEADD/DATEDIFF parameter order), asks about implicit type conversion errors,
17
+ or asks about compatibility/differences between ClickZetta and these
18
+ platforms.
19
+ Keywords: Snowflake migration, Databricks migration, Spark SQL migration,
20
+ Snowflake to ClickZetta, Databricks to ClickZetta, vs Snowflake, vs Spark,
21
+ vs Databricks, syntax differences, function mapping, implicit type conversion,
22
+ WHEN NOT MATCHED BY SOURCE, APPLY CHANGES INTO, WITH RECURSIVE, METADATA$ACTION,
23
+ TARGET_LAG, FLATTEN, IFF, LISTAGG, OBJECT_CONSTRUCT, VARIANT, CHARINDEX
24
+ ---
25
+
26
+ # ClickZetta SQL Migration Guide
27
+
28
+ Use this skill when migrating SQL workloads from Snowflake, Databricks (Delta Lake), or Spark SQL to ClickZetta Lakehouse, or when answering "how does ClickZetta differ from <other system>" questions.
29
+
30
+ For native ClickZetta SQL syntax that does not differ from standard SQL, refer to the ClickZetta Lakehouse documentation.
31
+
32
+ ## Reference Documents
33
+
34
+ | Document | When to read |
35
+ |---|---|
36
+ | [Snowflake migration guide](references/migration-snowflake.md) | Migrating from Snowflake — object mapping, type mapping, syntax + function differences |
37
+ | [Databricks migration guide](references/migration-databricks.md) | Migrating from Databricks/Delta Lake — APPLY CHANGES, ZORDER, WHEN NOT MATCHED BY SOURCE alternatives |
38
+ | [vs Snowflake summary](references/vs-snowflake.md) | Cross-platform comparison summary |
39
+ | [vs Spark SQL summary](references/vs-spark.md) | Cross-platform comparison summary |
40
+ | [DML differences](references/dml-differences.md) | INSERT/UPDATE/DELETE/MERGE/COPY syntax that differs from other systems (concise migration view) |
41
+ | [Implicit type conversion](references/implicit-type-conversion.md) | The #1 migration error — strict CAST rules for INSERT/UPDATE |
42
+ | [Function mapping](references/function-mapping.md) | Function-by-function mapping tables (Snowflake/Spark/Databricks → ClickZetta) and unsupported functions |
43
+ | [DDL reference](references/ddl-reference.md) | Detailed DDL syntax — kept for migration completeness; for native ClickZetta DDL prefer the official documentation |
44
+ | [DML reference](references/dml-reference.md) | Detailed DML syntax — kept for migration completeness; for native ClickZetta DML prefer the official documentation |
45
+ | [DQL reference](references/dql-reference.md) | Detailed DQL syntax — kept for migration completeness; for native ClickZetta DQL prefer the official documentation |
46
+ | [Functions reference](references/functions-reference.md) | Detailed function list — kept for migration completeness; for native ClickZetta functions prefer the official documentation |
47
+
48
+ ---
49
+
50
+ ## ⚠️ Most Common Migration Pitfalls (Quick Reference)
51
+
52
+ | Scenario | Snowflake / Spark / Databricks | ClickZetta |
53
+ |---|---|---|
54
+ | Implicit string→DATE/TIMESTAMP/BOOLEAN/JSON in INSERT | ✅ allowed | ❌ Error — must use `CAST` or typed literals (`DATE '...'`, `TIMESTAMP '...'`, `TRUE`/`FALSE`, `PARSE_JSON(...)`) |
55
+ | `IFF(cond, a, b)` (SF) | — | `IF(cond, a, b)` |
56
+ | `ARRAY_SIZE(arr)` (SF) | `size(arr)` (Spark) | `SIZE(arr)` ✅ or `ARRAY_SIZE(arr)` ✅ — both supported |
57
+ | `LISTAGG(col, ',') WITHIN GROUP (...)` (SF) | — | `GROUP_CONCAT(col ORDER BY col SEPARATOR ',')` |
58
+ | `LATERAL FLATTEN(input => arr)` (SF) | — | `LATERAL VIEW EXPLODE(arr)` |
59
+ | `data:key` JSON access (SF) | — | `data['key']` |
60
+ | `OBJECT_CONSTRUCT('k', v)` (SF) | `STRUCT(v AS k)` (Spark) | `named_struct('k', v)` |
61
+ | `VARIANT` type (SF) | — | `JSON` type |
62
+ | `NUMBER(p, s)` (SF) | — | `DECIMAL(p, s)` |
63
+ | `CHARINDEX(sub, s)` (SF) | — | `INSTR(s, sub)` ⚠️ parameter order reversed |
64
+ | `DATEDIFF(day, start, end)` (SF) | `DATEDIFF(end, start)` (Spark) | both supported, ⚠️ Snowflake order has unit as first arg |
65
+ | `WHEN NOT MATCHED BY SOURCE THEN DELETE` (Databricks) | — | ❌ Not supported — use MERGE INTO + separate DELETE |
66
+ | `APPLY CHANGES INTO` (DLT) | — | TABLE STREAM + MERGE INTO |
67
+ | `WITH RECURSIVE` (SF/Databricks) | ✅ supported | ❌ Not supported — iterate via Python/ZettaPark or pre-build helper tables |
68
+ | `BEGIN; COMMIT; ROLLBACK;` (transactions) | ✅ | ❌ Not supported — use MERGE INTO for atomic operations |
69
+ | `TARGET_LAG = '1 minute'` for dynamic tables (SF) | — | `REFRESH INTERVAL 1 MINUTE VCLUSTER xx` |
70
+ | `METADATA$ACTION` for streams (SF) | — | `__change_type` (values: INSERT / UPDATE_BEFORE / UPDATE_AFTER / DELETE) |
71
+ | `OPTIMIZE t ZORDER BY (col)` (Databricks) | — | `OPTIMIZE t` (small file compaction only, no ZORDER) |
72
+ | `STRUCT(1 AS id, 'a' AS name)` (Spark) | — | `named_struct('id', 1, 'name', 'a')` |
73
+ | `TABLESAMPLE (50 PERCENT)` | — | ❌ PERCENT not supported — use `ORDER BY RAND() LIMIT n` |
74
+ | `CREATE SEQUENCE` (SF) | — | ❌ Not supported — use `IDENTITY(seed)` column (BIGINT only) |
75
+ | `CREATE TEMPORARY TABLE` (SF) | — | ❌ Not supported — use CTE |
76
+ | `CHARINDEX` / `EDITDISTANCE` / `SOUNDEX` (SF) | — | `INSTR` (reversed args) / Python UDF / no equivalent |
77
+
78
+ ---
79
+
80
+ ## Object Concept Mapping
81
+
82
+ | Snowflake | Databricks | ClickZetta |
83
+ |---|---|---|
84
+ | DATABASE | Catalog (internal) | WORKSPACE |
85
+ | SCHEMA / DATABASE.SCHEMA | Database / Schema | SCHEMA |
86
+ | WAREHOUSE | Cluster / SQL Warehouse | VCLUSTER |
87
+ | STAGE | External Location | VOLUME (+ STORAGE CONNECTION) |
88
+ | STORAGE INTEGRATION | — | STORAGE CONNECTION |
89
+ | SNOWPIPE | Auto Loader | PIPE |
90
+ | STREAM | (Delta CDF / DLT CDC) | TABLE STREAM |
91
+ | DYNAMIC TABLE | DLT (Live Tables) | DYNAMIC TABLE (different syntax) |
92
+ | TASK | Job | Studio Task |
93
+ | SEQUENCE | — | IDENTITY column |
94
+ | SHARE | Delta Sharing | SHARE |
95
+ | — | Unity Catalog (federation) | EXTERNAL CATALOG |
96
+
97
+ ---
98
+
99
+ ## Data Type Mapping Quick Reference
100
+
101
+ | Snowflake | Spark / Databricks | ClickZetta |
102
+ |---|---|---|
103
+ | `NUMBER(p, s)` / `NUMERIC` | `DECIMAL(p, s)` | `DECIMAL(p, s)` |
104
+ | `INTEGER` / `NUMBER(10,0)` | `INT` / `BIGINT` | `INT` / `BIGINT` |
105
+ | `VARCHAR(n)` / `TEXT` | `STRING` | `STRING` (recommended) or `VARCHAR(n)` |
106
+ | `TIMESTAMP_LTZ` | `TIMESTAMP` | `TIMESTAMP` |
107
+ | `TIMESTAMP_NTZ` | `TIMESTAMP_NTZ` | `TIMESTAMP_NTZ` |
108
+ | `VARIANT` | — | `JSON` |
109
+ | `ARRAY` (untyped) | `ARRAY<T>` | `ARRAY<T>` (must specify element type) |
110
+ | `OBJECT` | `MAP<K,V>` / `STRUCT<...>` | `MAP<K,V>` or `STRUCT<...>` |
111
+ | `GEOGRAPHY` | — | not supported |
112
+ | — | — | `VECTOR(FLOAT, N)` (ClickZetta-specific) |
113
+
114
+ ---
115
+
116
+ ## Migration Workflow Pointers
117
+
118
+ This skill focuses on **SQL syntax compatibility**. A complete migration involves more than SQL rewrites:
119
+
120
+ 1. **Object mapping** — see table above
121
+ 2. **Schema/DDL conversion** — see [migration-snowflake.md](references/migration-snowflake.md) and [migration-databricks.md](references/migration-databricks.md)
122
+ 3. **Data movement** — typically via object storage (S3/OSS) staging + COPY INTO; not covered in detail here
123
+ 4. **SQL rewrites** — see this skill's reference documents
124
+ 5. **Application/driver layer** — JDBC, Python connector, BI tool reconnection; refer to `clickzetta-lakehouse-connect` skill
125
+ 6. **Permission migration** — RBAC concept comparison; refer to `clickzetta-access-control` skill
126
+ 7. **Performance tuning re-mapping** — Snowflake CLUSTER BY / Databricks ZORDER → ClickZetta partitioning + indexes; refer to `clickzetta-query-optimizer` skill
127
+
128
+ For end-to-end migration planning, combine this skill with the skills listed above.
@@ -0,0 +1,10 @@
1
+ {"case_id":"001","type":"should_call","user_input":"How do I write Snowflake's IFF, ARRAY_SIZE, and LISTAGG in ClickZetta?","expected_skill":"clickzetta-sql-migration","expected_output_contains":["IF(","SIZE("]}
2
+ {"case_id":"002","type":"should_call","user_input":"How to replace Databricks APPLY CHANGES INTO in ClickZetta?","expected_skill":"clickzetta-sql-migration","expected_output_contains":["MERGE INTO"]}
3
+ {"case_id":"003","type":"should_call","user_input":"What are ClickZetta's implicit type conversion rules when migrating from Snowflake?","expected_skill":"clickzetta-sql-migration","expected_output_contains":["implicit","conversion"]}
4
+ {"case_id":"004","type":"should_call","user_input":"How do I migrate a Snowflake VARIANT column to ClickZetta?","expected_skill":"clickzetta-sql-migration","expected_output_contains":["JSON"]}
5
+ {"case_id":"005","type":"should_call","user_input":"Databricks ZORDER equivalent in ClickZetta","expected_skill":"clickzetta-sql-migration","expected_output_contains":["OPTIMIZE"]}
6
+ {"case_id":"006","type":"should_call","user_input":"How to write Snowflake LATERAL FLATTEN in ClickZetta?","expected_skill":"clickzetta-sql-migration","expected_output_contains":["LATERAL VIEW EXPLODE"]}
7
+ {"case_id":"007","type":"should_not_call","user_input":"How do I create a partitioned table in ClickZetta?","forbidden_skill":"clickzetta-sql-migration"}
8
+ {"case_id":"008","type":"should_not_call","user_input":"What is the syntax for SELECT in ClickZetta?","forbidden_skill":"clickzetta-sql-migration"}
9
+ {"case_id":"009","type":"should_not_call","user_input":"How to use window functions in ClickZetta?","forbidden_skill":"clickzetta-sql-migration"}
10
+ {"case_id":"010","type":"should_not_call","user_input":"How do I create a Bloom Filter index?","forbidden_skill":"clickzetta-sql-migration"}
@@ -0,0 +1,350 @@
1
+ # DDL Complete Syntax Reference
2
+
3
+ > Based on ClickZetta Lakehouse product documentation, with Snowflake / Spark SQL difference annotations
4
+
5
+ ---
6
+
7
+ ## SCHEMA Operations
8
+
9
+ ```sql
10
+ -- Create
11
+ CREATE SCHEMA IF NOT EXISTS my_schema COMMENT 'description';
12
+
13
+ -- Alter
14
+ ALTER SCHEMA my_schema RENAME TO new_schema;
15
+ ALTER SCHEMA my_schema SET COMMENT 'new comment';
16
+
17
+ -- Drop (cascades all objects)
18
+ DROP SCHEMA IF EXISTS my_schema;
19
+
20
+ -- Show
21
+ SHOW SCHEMAS;
22
+ SHOW SCHEMAS EXTENDED; -- includes type column (MANAGED/EXTERNAL)
23
+ SHOW SCHEMAS LIKE 'sales%';
24
+ SHOW SCHEMAS WHERE schema_name = 'public';
25
+
26
+ -- Switch
27
+ USE SCHEMA my_schema;
28
+ USE my_schema; -- SCHEMA keyword is optional
29
+ ```
30
+
31
+ **Differences from Snowflake:**
32
+ - Snowflake uses `USE DATABASE` + `USE SCHEMA`; ClickZetta has no DATABASE layer, use `USE SCHEMA` directly
33
+ - Snowflake supports `CREATE OR REPLACE SCHEMA`; ClickZetta does not, use `IF NOT EXISTS`
34
+
35
+ ---
36
+
37
+ ## TABLE Operations
38
+
39
+ ### CREATE TABLE
40
+
41
+ ```sql
42
+ -- Basic table creation
43
+ CREATE TABLE IF NOT EXISTS orders (
44
+ id BIGINT,
45
+ customer_id INT,
46
+ amount DECIMAL(18, 2) NOT NULL,
47
+ status STRING DEFAULT 'pending',
48
+ created_at TIMESTAMP,
49
+ tags ARRAY<STRING>,
50
+ meta JSON,
51
+ COMMENT 'Orders table'
52
+ );
53
+
54
+ -- Primary key table (ENABLE VALIDATE RELY: SQL writes also deduplicate)
55
+ CREATE TABLE pk_orders (
56
+ id BIGINT PRIMARY KEY,
57
+ amount DECIMAL(18, 2)
58
+ );
59
+
60
+ -- Primary key table (DISABLE NOVALIDATE RELY: only real-time writes deduplicate, SQL writes do not)
61
+ CREATE TABLE cdc_orders (
62
+ id BIGINT PRIMARY KEY DISABLE NOVALIDATE RELY,
63
+ amount DECIMAL(18, 2)
64
+ );
65
+
66
+ -- Auto-increment column (BIGINT only, not guaranteed sequential)
67
+ CREATE TABLE auto_id_table (
68
+ id BIGINT IDENTITY(1), -- starts from 1
69
+ col STRING
70
+ );
71
+
72
+ -- Generated column (deterministic expression, cannot be manually inserted)
73
+ CREATE TABLE orders_with_year (
74
+ id BIGINT,
75
+ created_at TIMESTAMP,
76
+ year INT GENERATED ALWAYS AS (YEAR(created_at))
77
+ );
78
+
79
+ -- Default values (supports non-deterministic functions)
80
+ CREATE TABLE t_default (
81
+ id INT,
82
+ created_at TIMESTAMP DEFAULT current_timestamp(),
83
+ status STRING DEFAULT 'active',
84
+ score DOUBLE DEFAULT random()
85
+ );
86
+
87
+ -- Partitioned table (Iceberg hidden partitions)
88
+ CREATE TABLE orders_partitioned (
89
+ id BIGINT,
90
+ amount DECIMAL(18, 2),
91
+ created_at TIMESTAMP
92
+ )
93
+ PARTITIONED BY (days(created_at)); -- partition by day
94
+
95
+ -- Partition transform functions
96
+ -- years(col) months(col) days(col) hours(col)
97
+ -- bucket(N, col) truncate(col, W)
98
+
99
+ -- Bucketed table
100
+ CREATE TABLE orders_bucketed (
101
+ id BIGINT,
102
+ customer_id INT,
103
+ amount DECIMAL(18, 2)
104
+ )
105
+ CLUSTERED BY (customer_id)
106
+ SORTED BY (id ASC)
107
+ INTO 16 BUCKETS;
108
+
109
+ -- Data retention period
110
+ CREATE TABLE orders (id BIGINT)
111
+ PROPERTIES ('data_lifecycle' = '30'); -- retain for 30 days
112
+
113
+ -- CTAS (Create Table As Select)
114
+ CREATE TABLE orders_copy AS
115
+ SELECT * FROM orders WHERE status = 'completed';
116
+
117
+ -- External table (maps to object storage)
118
+ CREATE EXTERNAL TABLE ext_orders (
119
+ id BIGINT,
120
+ amount DECIMAL(18, 2)
121
+ )
122
+ LOCATION 'oss://bucket/orders/'
123
+ STORED AS PARQUET;
124
+ ```
125
+
126
+ **Differences from Snowflake:**
127
+ - Snowflake `CREATE OR REPLACE TABLE` → ClickZetta `CREATE TABLE IF NOT EXISTS`
128
+ - Snowflake `CLUSTER BY (col)` → ClickZetta `CLUSTERED BY (col) INTO N BUCKETS`
129
+ - Snowflake `AUTOINCREMENT` → ClickZetta `IDENTITY[(seed)]`
130
+ - Snowflake `TRANSIENT TABLE` → ClickZetta has no equivalent (use `data_lifecycle` to control retention)
131
+ - Snowflake `TEMPORARY TABLE` → ClickZetta has no temporary table concept
132
+ - Snowflake `COPY GRANTS` → ClickZetta does not support
133
+
134
+ **Differences from Spark SQL:**
135
+ - Spark `USING PARQUET` → ClickZetta does not need it (default is Parquet)
136
+ - Spark `TBLPROPERTIES` → ClickZetta `PROPERTIES`
137
+ - Spark `LOCATION` external table syntax is basically the same
138
+
139
+ ### ALTER TABLE
140
+
141
+ ```sql
142
+ -- Rename
143
+ ALTER TABLE orders RENAME TO orders_v2;
144
+
145
+ -- Comment
146
+ ALTER TABLE orders SET COMMENT 'new comment';
147
+
148
+ -- Data retention period
149
+ ALTER TABLE orders SET PROPERTIES ('data_retention_days' = '7');
150
+
151
+ -- Add column
152
+ ALTER TABLE orders ADD COLUMN region STRING AFTER status;
153
+ ALTER TABLE orders ADD COLUMN region STRING FIRST;
154
+
155
+ -- Add nested field in complex types
156
+ ALTER TABLE t ADD COLUMN address.zip STRING; -- STRUCT nested
157
+ ALTER TABLE t ADD COLUMN items.ELEMENT.price DOUBLE; -- ARRAY<STRUCT> nested
158
+
159
+ -- Alter column type (limited)
160
+ ALTER TABLE orders ALTER COLUMN amount TYPE DOUBLE;
161
+
162
+ -- Rename column
163
+ ALTER TABLE orders RENAME COLUMN old_col TO new_col;
164
+
165
+ -- Drop column
166
+ ALTER TABLE orders DROP COLUMN unnecessary_col;
167
+
168
+ -- Alter column comment
169
+ ALTER TABLE orders ALTER COLUMN amount COMMENT 'Order amount';
170
+
171
+ -- Add index (tables with ARRAY/JSON columns must add separately)
172
+ -- ⚠️ Index syntax: BLOOMFILTER (not USING BLOOM_FILTER)
173
+ CREATE BLOOMFILTER INDEX IF NOT EXISTS id_bf ON TABLE orders(id);
174
+ CREATE BLOOMFILTER INDEX IF NOT EXISTS name_bf ON TABLE orders(name)
175
+ PROPERTIES ('analyzer' = 'ngram', 'n' = '3'); -- ngram tokenizer
176
+
177
+ -- Inverted index
178
+ CREATE INVERTED INDEX IF NOT EXISTS content_inv ON TABLE articles(content);
179
+
180
+ -- Vector index (inline at table creation)
181
+ -- See CREATE TABLE examples
182
+
183
+ -- Drop index (⚠️ does not need ON table_name)
184
+ DROP INDEX IF EXISTS id_bf;
185
+ DROP INDEX id_bf;
186
+ ```
187
+
188
+ **Differences from Snowflake:**
189
+ - Snowflake `ALTER TABLE ... ADD COLUMN` can only add to the end; ClickZetta supports `FIRST/AFTER/BEFORE`
190
+ - Snowflake does not support `DROP COLUMN` (requires table rebuild); ClickZetta supports it
191
+ - Snowflake has no BLOOM_FILTER/INVERTED/VECTOR indexes
192
+
193
+ ### DROP / TRUNCATE TABLE
194
+
195
+ ```sql
196
+ -- Drop table (can be recovered with UNDROP)
197
+ DROP TABLE IF EXISTS orders;
198
+ DROP TABLE my_schema.orders;
199
+
200
+ -- Truncate table (preserves structure)
201
+ TRUNCATE TABLE orders;
202
+ TRUNCATE TABLE IF EXISTS orders; -- ✅ supports IF EXISTS
203
+
204
+ -- Truncate specific partition
205
+ TRUNCATE TABLE orders PARTITION (dt = '2024-01-01');
206
+ TRUNCATE TABLE orders PARTITION (dt > '2024-01-01');
207
+ TRUNCATE TABLE orders PARTITION (dt >= '2024-01-01' AND dt < '2024-02-01');
208
+ ```
209
+
210
+ **Differences from Snowflake:**
211
+ - Snowflake `TRUNCATE TABLE` does not support partition conditions; ClickZetta does
212
+ - Snowflake `DROP TABLE ... PURGE` deletes immediately; ClickZetta can UNDROP within retention period
213
+
214
+ ---
215
+
216
+ ## VIEW Operations
217
+
218
+ ```sql
219
+ -- Create view
220
+ CREATE VIEW IF NOT EXISTS order_summary AS
221
+ SELECT customer_id, COUNT(*) AS cnt, SUM(amount) AS total
222
+ FROM orders GROUP BY customer_id;
223
+
224
+ -- Replace view (ClickZetta supports OR REPLACE, same as Snowflake)
225
+ CREATE OR REPLACE VIEW order_summary AS
226
+ SELECT customer_id, SUM(amount) AS total FROM orders GROUP BY customer_id;
227
+
228
+ -- With column aliases and comments
229
+ CREATE VIEW order_summary (cust_id COMMENT 'Customer ID', total COMMENT 'Total amount')
230
+ COMMENT 'Order summary view'
231
+ AS SELECT customer_id, SUM(amount) FROM orders GROUP BY 1;
232
+
233
+ -- Drop
234
+ DROP VIEW IF EXISTS order_summary;
235
+
236
+ -- Show
237
+ SHOW TABLES WHERE is_view = true;
238
+ SHOW TABLES IN my_schema WHERE is_view = true;
239
+ ```
240
+
241
+ **Note:** ClickZetta's `CREATE OR REPLACE VIEW` is the same as Snowflake, but `CREATE OR REPLACE TABLE` is not supported.
242
+
243
+ ---
244
+
245
+ ## INDEX Operations
246
+
247
+ ```sql
248
+ -- Show indexes
249
+ SHOW INDEX FROM table_name;
250
+ SHOW INDEX FROM my_schema.table_name;
251
+
252
+ -- Show index details
253
+ DESC INDEX index_name;
254
+ DESC INDEX EXTENDED index_name;
255
+
256
+ -- Build index on existing data (vector and inverted indexes only, not Bloom Filter)
257
+ BUILD INDEX index_name ON table_name;
258
+ BUILD INDEX index_name ON table_name WHERE partition_col = '2024-01-01';
259
+ ```
260
+
261
+ ---
262
+
263
+ ## Viewing Object Information
264
+
265
+ ```sql
266
+ -- Table structure
267
+ DESC table_name;
268
+ DESC EXTENDED table_name; -- includes size, record count, etc.
269
+ DESCRIBE TABLE table_name; -- same as DESC
270
+
271
+ -- Column information
272
+ SHOW COLUMNS IN table_name;
273
+ SHOW COLUMNS FROM table_name IN schema_name;
274
+
275
+ -- Create table statement
276
+ SHOW CREATE TABLE table_name;
277
+
278
+ -- Table list
279
+ SHOW TABLES;
280
+ SHOW TABLES IN my_schema;
281
+ SHOW TABLES LIKE 'order%';
282
+ SHOW TABLES WHERE is_view = false AND is_materialized_view = false;
283
+ SHOW TABLES WHERE is_dynamic = true;
284
+ SHOW TABLES WHERE is_external = true;
285
+
286
+ -- Partition information
287
+ SHOW PARTITIONS table_name;
288
+ SHOW PARTITIONS EXTENDED table_name; -- includes file count, size, modification time
289
+ SHOW PARTITIONS table_name PARTITION (dt = '2024-01-01');
290
+ SHOW PARTITIONS table_name WHERE total_rows > 1000;
291
+
292
+ -- History versions
293
+ DESC HISTORY table_name;
294
+ SHOW TABLES HISTORY; -- includes deleted tables
295
+ ```
296
+
297
+ ---
298
+
299
+ ## SYNONYM Operations
300
+
301
+ ```sql
302
+ -- Create synonym for a table (cross-schema access)
303
+ CREATE SYNONYM my_orders FOR TABLE other_schema.orders;
304
+
305
+ -- Create synonym for a Volume
306
+ CREATE SYNONYM my_vol FOR VOLUME other_schema.data_volume;
307
+
308
+ -- Create synonym for a function
309
+ CREATE SYNONYM my_func FOR FUNCTION other_schema.udf_name;
310
+
311
+ -- Show synonyms
312
+ SHOW SYNONYMS;
313
+ SHOW SYNONYMS IN my_schema;
314
+ SHOW SYNONYMS LIKE 'my_%';
315
+
316
+ -- Drop synonym (must specify object type)
317
+ DROP SYNONYM my_orders FOR TABLE;
318
+ DROP SYNONYM my_vol FOR VOLUME;
319
+ DROP SYNONYM my_func FOR FUNCTION;
320
+ ```
321
+
322
+ > Supported object types for synonyms: TABLE (including regular tables, Table Streams, materialized views, dynamic tables), VOLUME, FUNCTION.
323
+ > Use cases: cross-schema access, data consistency maintenance, application layer decoupling.
324
+
325
+ ---
326
+
327
+ ## Time Travel & Data Recovery
328
+
329
+ ```sql
330
+ -- Query historical version
331
+ SELECT * FROM orders TIMESTAMP AS OF '2024-01-01 00:00:00';
332
+ SELECT * FROM orders TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL 12 HOURS;
333
+ SELECT * FROM orders TIMESTAMP AS OF CAST('2024-01-01' AS TIMESTAMP);
334
+
335
+ -- Restore table to historical version (table not deleted)
336
+ RESTORE TABLE orders TO TIMESTAMP AS OF '2024-01-01 00:00:00';
337
+
338
+ -- Restore deleted table
339
+ UNDROP TABLE orders;
340
+ UNDROP TABLE my_schema.orders;
341
+
342
+ -- Set retention period (0-90 days, default 1 day)
343
+ ALTER TABLE orders SET PROPERTIES ('data_retention_days' = '7');
344
+ ```
345
+
346
+ **Differences from Snowflake:**
347
+ - Snowflake `AT (TIMESTAMP => ...)` → ClickZetta `TIMESTAMP AS OF ...`
348
+ - Snowflake `BEFORE (STATEMENT => ...)` → ClickZetta does not support rollback by statement_id
349
+ - Snowflake `UNDROP TABLE` → ClickZetta same
350
+ - Snowflake default retention 1 day (Enterprise 90 days); ClickZetta default 1 day, max 90 days
@@ -0,0 +1,192 @@
1
+ # DML Differences vs Snowflake / Databricks / Spark
2
+
3
+ > Focuses only on the DML (INSERT/UPDATE/DELETE/MERGE/COPY) syntax that **differs** from Snowflake, Databricks, or Spark SQL.
4
+ > For the basic ClickZetta DML syntax that works the same as standard SQL, refer to the official ClickZetta Lakehouse documentation.
5
+
6
+ ---
7
+
8
+ ## Critical: Implicit Type Conversion
9
+
10
+ ⚠️ **The single most common migration error.** See [implicit-type-conversion.md](implicit-type-conversion.md) for the full rules table.
11
+
12
+ Short version: ClickZetta rejects implicit string→date/timestamp/boolean/json/numeric conversion in INSERT/UPDATE. You must use explicit `CAST` or typed literals.
13
+
14
+ ---
15
+
16
+ ## INSERT Differences
17
+
18
+ ### Snowflake → ClickZetta
19
+
20
+ | Snowflake | ClickZetta | Notes |
21
+ |---|---|---|
22
+ | `INSERT OVERWRITE` not supported | `INSERT OVERWRITE TABLE t SELECT ...` ✅ | Use TRUNCATE+INSERT in Snowflake |
23
+ | No `PARTITION (...)` clause | `INSERT INTO t PARTITION (dt='2024-01-01') VALUES ...` ✅ | Hive-style static partition |
24
+ | No dynamic partition syntax | `INSERT INTO t PARTITION (dt) SELECT ..., dt FROM s` ✅ | Hive-style dynamic partition |
25
+
26
+ ### Spark → ClickZetta
27
+
28
+ INSERT syntax is largely identical. ClickZetta is fully compatible with Spark INSERT.
29
+
30
+ ---
31
+
32
+ ## UPDATE Differences
33
+
34
+ ### Snowflake → ClickZetta
35
+
36
+ ```sql
37
+ -- Snowflake: UPDATE ... FROM (JOIN-style update)
38
+ UPDATE orders o SET amount = c.discount * o.amount
39
+ FROM customers c WHERE o.customer_id = c.id;
40
+
41
+ -- ClickZetta: use subquery
42
+ UPDATE orders SET amount = (
43
+ SELECT discount * orders.amount FROM customers WHERE customers.id = orders.customer_id
44
+ ) * amount WHERE customer_id IN (SELECT id FROM customers);
45
+ ```
46
+
47
+ ClickZetta additionally supports `ORDER BY + LIMIT` in UPDATE, which Snowflake does not:
48
+
49
+ ```sql
50
+ -- ClickZetta-only: batch update
51
+ UPDATE orders SET status = 'archived'
52
+ WHERE created_at < '2020-01-01'
53
+ ORDER BY created_at ASC
54
+ LIMIT 10000;
55
+ ```
56
+
57
+ ### Spark → ClickZetta
58
+
59
+ Spark SQL itself does not support UPDATE (only Delta Lake does). ClickZetta natively supports UPDATE on all tables.
60
+
61
+ ---
62
+
63
+ ## DELETE Differences
64
+
65
+ ### Spark → ClickZetta
66
+
67
+ Spark SQL itself does not support DELETE (only Delta Lake does). ClickZetta natively supports DELETE on all tables.
68
+
69
+ Snowflake DELETE syntax is essentially identical to ClickZetta.
70
+
71
+ ---
72
+
73
+ ## MERGE INTO: Important Limitations
74
+
75
+ ### Multiple WHEN NOT MATCHED clauses
76
+
77
+ ```sql
78
+ -- ❌ Snowflake supports multiple WHEN NOT MATCHED — ClickZetta does NOT
79
+ MERGE INTO t USING s ON t.id = s.id
80
+ WHEN NOT MATCHED AND s.type = 'A' THEN INSERT ...
81
+ WHEN NOT MATCHED AND s.type = 'B' THEN INSERT ...;
82
+
83
+ -- ✅ ClickZetta: only one WHEN NOT MATCHED — combine logic with CASE
84
+ MERGE INTO t USING s ON t.id = s.id
85
+ WHEN NOT MATCHED THEN INSERT (id, val) VALUES (
86
+ s.id,
87
+ CASE s.type WHEN 'A' THEN ... WHEN 'B' THEN ... END
88
+ );
89
+ ```
90
+
91
+ ### WHEN NOT MATCHED BY SOURCE (Databricks Delta Lake)
92
+
93
+ ```sql
94
+ -- ❌ Databricks supports WHEN NOT MATCHED BY SOURCE — ClickZetta does NOT
95
+ MERGE INTO target t USING source s ON t.id = s.id
96
+ WHEN MATCHED THEN UPDATE ...
97
+ WHEN NOT MATCHED THEN INSERT ...
98
+ WHEN NOT MATCHED BY SOURCE THEN DELETE; -- ❌ unsupported
99
+
100
+ -- ✅ ClickZetta: split into two operations
101
+ MERGE INTO target t USING source s ON t.id = s.id
102
+ WHEN MATCHED THEN UPDATE SET t.val = s.val
103
+ WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val);
104
+
105
+ DELETE FROM target WHERE id NOT IN (SELECT id FROM source);
106
+ ```
107
+
108
+ ### Order of Multiple WHEN MATCHED clauses
109
+
110
+ ```sql
111
+ -- ⚠️ ClickZetta requires UPDATE clauses BEFORE DELETE clauses
112
+ MERGE INTO target t USING source s ON t.id = s.id
113
+ WHEN MATCHED AND s.is_deleted = 0 THEN UPDATE SET ... -- UPDATE first
114
+ WHEN MATCHED AND s.is_deleted = 1 THEN DELETE -- DELETE after
115
+ WHEN NOT MATCHED THEN INSERT ...;
116
+ ```
117
+
118
+ In Snowflake/Databricks, DELETE may appear before UPDATE.
119
+
120
+ ---
121
+
122
+ ## Transactions: Not Supported
123
+
124
+ ```sql
125
+ -- ❌ All of these are unsupported in ClickZetta
126
+ BEGIN;
127
+ BEGIN TRANSACTION;
128
+ START TRANSACTION;
129
+ COMMIT;
130
+ ROLLBACK;
131
+
132
+ -- ✅ Use MERGE INTO for atomic UPSERT
133
+ MERGE INTO target t USING source s ON t.id = s.id
134
+ WHEN MATCHED THEN UPDATE SET ...
135
+ WHEN NOT MATCHED THEN INSERT ...;
136
+ ```
137
+
138
+ For multi-statement atomicity, design idempotent operations or use the `__commit_version` from Time Travel for compensating reads.
139
+
140
+ ---
141
+
142
+ ## Bulk Load: Stage → Volume, COPY INTO Differences
143
+
144
+ ### Snowflake → ClickZetta
145
+
146
+ ```sql
147
+ -- Snowflake
148
+ COPY INTO orders
149
+ FROM @my_stage/data/2024/
150
+ FILE_FORMAT = (TYPE = CSV FIELD_DELIMITER = ',' SKIP_HEADER = 1)
151
+ PATTERN = '.*\.csv';
152
+
153
+ -- ClickZetta
154
+ COPY INTO orders
155
+ FROM VOLUME my_oss_volume
156
+ USING CSV
157
+ OPTIONS('header' = 'true', 'sep' = ',')
158
+ SUBDIRECTORY 'data/2024/'
159
+ REGEXP '.*\.csv';
160
+ ```
161
+
162
+ | Snowflake | ClickZetta |
163
+ |---|---|
164
+ | `@stage_name` | `VOLUME volume_name` |
165
+ | `FILE_FORMAT = (TYPE = CSV ...)` | `USING CSV OPTIONS(...)` |
166
+ | `PATTERN = '...'` | `REGEXP '...'` |
167
+ | `FILES = ('a.csv','b.csv')` | `FILES('a.csv','b.csv')` |
168
+
169
+ ### Export
170
+
171
+ ```sql
172
+ -- Snowflake
173
+ COPY INTO @my_stage FROM orders FILE_FORMAT = (TYPE = PARQUET);
174
+
175
+ -- ClickZetta
176
+ COPY INTO VOLUME my_oss_volume
177
+ SUBDIRECTORY 'export/orders/'
178
+ FROM orders
179
+ USING PARQUET;
180
+ ```
181
+
182
+ ---
183
+
184
+ ## Other ClickZetta-Specific DML Notes
185
+
186
+ These are ClickZetta features without direct Snowflake/Databricks/Spark equivalents:
187
+
188
+ - `INSERT INTO ... PARTITION (col)` — Hive-style dynamic partition (Snowflake auto-clusters via CLUSTER BY)
189
+ - `COPY OVERWRITE INTO` — atomic overwrite-on-load
190
+ - `RESTORE TABLE ... TO TIMESTAMP AS OF ...` — Time Travel restore (Snowflake uses different syntax, Delta uses VERSION AS OF)
191
+
192
+ For the full DML syntax of these features, refer to ClickZetta Lakehouse documentation.