@clickzetta/cz-cli-darwin-x64 0.3.92 → 0.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-ai-function/SKILL.md +109 -0
  3. package/bin/skills/clickzetta-ai-function/eval_cases.jsonl +4 -0
  4. package/bin/skills/clickzetta-ai-function/references/ai-function-ddl.md +106 -0
  5. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +124 -124
  6. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +5 -5
  7. package/bin/skills/clickzetta-bi-connect/SKILL.md +79 -78
  8. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +56 -56
  9. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +386 -382
  10. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -5
  11. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +73 -212
  12. package/bin/skills/clickzetta-data-science/SKILL.md +57 -56
  13. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +38 -38
  14. package/bin/skills/clickzetta-data-science/references/data-patterns.md +16 -16
  15. package/bin/skills/clickzetta-data-science/references/setup.md +28 -28
  16. package/bin/skills/clickzetta-data-science/references/stats-functions.md +44 -44
  17. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +22 -22
  18. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +32 -32
  19. package/bin/skills/clickzetta-dw-modeling/SKILL.md +1 -1
  20. package/bin/skills/clickzetta-external-function/SKILL.md +51 -109
  21. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +4 -4
  22. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +39 -77
  23. package/bin/skills/clickzetta-java-sdk/SKILL.md +49 -48
  24. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -12
  25. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +34 -34
  26. package/bin/skills/clickzetta-java-sdk/references/realtime.md +44 -44
  27. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +273 -507
  28. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +197 -231
  29. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +231 -304
  30. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +180 -179
  31. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -5
  32. package/bin/skills/clickzetta-semantic-view/SKILL.md +74 -72
  33. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -12
  34. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +75 -75
  35. package/bin/skills/clickzetta-sql-migration/SKILL.md +128 -0
  36. package/bin/skills/clickzetta-sql-migration/eval_cases.jsonl +10 -0
  37. package/bin/skills/clickzetta-sql-migration/references/ddl-reference.md +350 -0
  38. package/bin/skills/clickzetta-sql-migration/references/dml-differences.md +192 -0
  39. package/bin/skills/clickzetta-sql-migration/references/dml-reference.md +279 -0
  40. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/dql-reference.md +128 -128
  41. package/bin/skills/clickzetta-sql-migration/references/function-mapping.md +194 -0
  42. package/bin/skills/clickzetta-sql-migration/references/functions-reference.md +372 -0
  43. package/bin/skills/clickzetta-sql-migration/references/implicit-type-conversion.md +143 -0
  44. package/bin/skills/clickzetta-sql-migration/references/migration-databricks.md +260 -0
  45. package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/references/migration-snowflake.md +112 -112
  46. package/bin/skills/clickzetta-sql-migration/references/vs-snowflake.md +346 -0
  47. package/bin/skills/clickzetta-sql-migration/references/vs-spark.md +229 -0
  48. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +326 -329
  49. package/bin/skills/clickzetta-table-lineage/SKILL.md +57 -55
  50. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -1
  51. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +5 -5
  52. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +6 -6
  53. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +2 -2
  54. package/bin/skills/clickzetta-volume-manager/SKILL.md +186 -100
  55. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +153 -52
  56. package/package.json +1 -1
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  58. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  59. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  60. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -191
  61. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  62. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  63. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  64. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  65. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  66. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  67. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  68. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  69. /package/bin/skills/{clickzetta-sql-syntax-guide → clickzetta-sql-migration}/LICENSE +0 -0
@@ -0,0 +1,194 @@
1
+ # Function Mapping: Snowflake / Spark / Databricks → ClickZetta
2
+
3
+ > Comprehensive mapping table for functions that **differ** between systems, plus a list of unsupported functions and their workarounds.
4
+ > For the full ClickZetta function reference, refer to the official ClickZetta Lakehouse documentation.
5
+
6
+ ---
7
+
8
+ ## Conditional Functions
9
+
10
+ | Snowflake | Spark / Databricks | ClickZetta | Notes |
11
+ |---|---|---|---|
12
+ | `IFF(cond, a, b)` | `IF(cond, a, b)` | `IF(cond, a, b)` | ClickZetta does not support `IFF` |
13
+ | `ZEROIFNULL(x)` | — | `COALESCE(x, 0)` or `NVL(x, 0)` | |
14
+ | `NULLIFZERO(x)` | — | `NULLIF(x, 0)` | |
15
+ | `BOOLAND(a, b)` | — | `a AND b` | use boolean operator |
16
+ | `BOOLOR(a, b)` | — | `a OR b` | |
17
+ | `DECODE(...)` | `DECODE(...)` | `DECODE(...)` | ✅ all supported |
18
+ | `NULLIF` / `COALESCE` / `NVL` | same | same | ✅ all supported |
19
+
20
+ ---
21
+
22
+ ## Date / Time Functions
23
+
24
+ | Snowflake | Spark / Databricks | ClickZetta | Notes |
25
+ |---|---|---|---|
26
+ | `DATEADD(day, n, dt)` | `DATE_ADD(dt, n)` | `DATEADD(day, n, dt)` ✅ or `DATE_ADD(dt, n)` ✅ | both syntaxes work |
27
+ | `DATEDIFF(day, start, end)` | `DATEDIFF(end, start)` | `DATEDIFF(day, start, end)` ✅ or `DATEDIFF(end, start)` ✅ | both supported, but **2-arg form has reversed order from Snowflake** |
28
+ | `DATE_TRUNC('month', dt)` | `DATE_TRUNC('month', dt)` | same | ✅ identical |
29
+ | `TO_DATE(s)` / `TO_TIMESTAMP(s)` | same | same | ✅ identical |
30
+ | `CONVERT_TIMEZONE(tz, dt)` | `from_utc_timestamp(dt, tz)` | `FROM_UTC_TIMESTAMP(dt, tz)` / `TO_UTC_TIMESTAMP(dt, tz)` | |
31
+ | `SYSDATE()` / `GETDATE()` | `current_timestamp()` | `CURRENT_TIMESTAMP()` or `NOW()` | both supported |
32
+ | `TIMESTAMPADD(unit, n, dt)` | — | `dt + INTERVAL n unit` | |
33
+ | `LAST_DAY(dt)` | `last_day(dt)` | `LAST_DAY(dt)` | ✅ identical |
34
+ | `MONTHS_BETWEEN(d1, d2)` | `months_between(d1, d2)` | `MONTHS_BETWEEN(d1, d2)` | ✅ identical |
35
+ | `YEAR(dt)` / `MONTH(dt)` / `DAY(dt)` | same | same | ✅ identical |
36
+ | `DATE_PART('year', dt)` | `date_part('year', dt)` | ❌ not supported | use `EXTRACT(YEAR FROM dt)` or `YEAR(dt)` |
37
+ | `MAKEDATE(year, dayofyear)` | — | ❌ not supported | use `MAKE_DATE(year, month, day)` |
38
+ | `CONVERT_TZ(dt, from, to)` | — | ❌ not supported | use `FROM_UTC_TIMESTAMP` / `TO_UTC_TIMESTAMP` |
39
+
40
+ ---
41
+
42
+ ## String Functions
43
+
44
+ | Snowflake | Spark / Databricks | ClickZetta | Notes |
45
+ |---|---|---|---|
46
+ | `CHARINDEX(sub, s)` | `instr(s, sub)` | `INSTR(s, sub)` | ⚠️ **parameter order is reversed from Snowflake** |
47
+ | `EDITDISTANCE(s1, s2)` | `levenshtein(s1, s2)` | ❌ `LEVENSHTEIN` not supported | use Python UDF / ZettaPark |
48
+ | `SOUNDEX(s)` | `soundex(s)` | ❌ not supported | no alternative |
49
+ | `STRTOK(s, delim, n)` | `split(s, delim)[n-1]` | `SPLIT_PART(s, delim, n)` | |
50
+ | `ILIKE` | `ilike` | `ILIKE` | ✅ all supported |
51
+ | `RLIKE` / `REGEXP_LIKE` | `rlike` | `RLIKE` / `REGEXP_LIKE` | ✅ all supported |
52
+ | `CONTAINS(s, sub)` | `contains(s, sub)` | `INSTR(s, sub) > 0` | |
53
+ | `STARTSWITH(s, p)` | `startswith(s, p)` | `STARTSWITH(s, p)` ✅ or `s LIKE 'p%'` | both supported |
54
+ | `ENDSWITH(s, p)` | `endswith(s, p)` | `ENDSWITH(s, p)` ✅ or `s LIKE '%p'` | both supported |
55
+ | `INITCAP(s)` | `initcap(s)` | `INITCAP(s)` | ✅ identical |
56
+ | `REGEXP_SUBSTR(s, p)` | `regexp_extract(s, p, 0)` | ❌ `REGEXP_SUBSTR` not supported | use `REGEXP_EXTRACT(s, '(p)')` |
57
+ | `OVERLAY(s PLACING new FROM pos)` | `overlay(...)` | ❌ not supported | use `CONCAT(LEFT(s, pos-1), new, SUBSTR(s, pos+len))` |
58
+ | `FORMAT(num, decimals)` | — | ❌ thousand-separator format not supported | use `ROUND` + `CAST` |
59
+
60
+ ---
61
+
62
+ ## Aggregate Functions
63
+
64
+ | Snowflake | Spark / Databricks | ClickZetta | Notes |
65
+ |---|---|---|---|
66
+ | `LISTAGG(col, ',') WITHIN GROUP (ORDER BY col)` | `concat_ws(',', collect_list(col))` | `GROUP_CONCAT(col ORDER BY col SEPARATOR ',')` | |
67
+ | `ARRAY_AGG(col) WITHIN GROUP (ORDER BY col)` | `array_agg(col)` (no ordering) | `ARRAY_AGG(col)` | ⚠️ `WITHIN GROUP` not supported |
68
+ | `OBJECT_AGG(key, value)` | `map_from_entries(...)` | `MAP_AGG(key, value)` | |
69
+ | `APPROX_COUNT_DISTINCT(col)` | `approx_count_distinct(col)` | `APPROX_COUNT_DISTINCT(col)` | ✅ identical |
70
+ | `MEDIAN(col)` | — | `MEDIAN(col)` | ✅ identical |
71
+ | `BITAND_AGG / BITOR_AGG / BITXOR_AGG` | — | `BIT_AND / BIT_OR / BIT_XOR` | |
72
+ | `REGR_SLOPE / REGR_INTERCEPT` | — | ❌ not supported | manually compute via `CORR` + `STDDEV` |
73
+
74
+ ---
75
+
76
+ ## Array / Object Functions
77
+
78
+ | Snowflake | Spark / Databricks | ClickZetta | Notes |
79
+ |---|---|---|---|
80
+ | `ARRAY_CONSTRUCT(...)` | `array(...)` | `ARRAY(...)` | |
81
+ | `OBJECT_CONSTRUCT('k', v, ...)` | `named_struct('k', v, ...)` or `map(...)` | `named_struct('k', v, ...)` ✅ or `MAP(...)` | |
82
+ | `ARRAY_SIZE(arr)` | `size(arr)` | `SIZE(arr)` ✅ or `ARRAY_SIZE(arr)` ✅ | both supported |
83
+ | `ARRAY_CONTAINS(val, arr)` | `array_contains(arr, val)` | `ARRAY_CONTAINS(arr, val)` | ⚠️ **Snowflake parameter order reversed** |
84
+ | `OBJECT_KEYS(obj)` | `map_keys(map)` | `MAP_KEYS(map)` | |
85
+ | `FLATTEN(arr)` | `flatten(arr)` | `FLATTEN(arr)` | ✅ for array of arrays |
86
+ | `LATERAL FLATTEN(input => arr)` | `LATERAL VIEW EXPLODE(arr)` | `LATERAL VIEW EXPLODE(arr)` | ⚠️ Snowflake → Hive-style syntax change |
87
+ | `STRUCT(1 AS id, 'a' AS name)` (Spark) | same | `named_struct('id', 1, 'name', 'a')` | ⚠️ ClickZetta `STRUCT` does not accept `AS` for named fields |
88
+ | `TO_ARRAY(expr)` | — | ❌ not supported | use `ARRAY(expr)` or `CAST(... AS ARRAY<T>)` |
89
+ | `MAP_FROM_ZIP(keys, values)` | — | ❌ not supported | use `MAP_FROM_ARRAYS(keys, values)` |
90
+
91
+ ClickZetta supports higher-order functions (Spark style) which Snowflake does not:
92
+
93
+ ```sql
94
+ SELECT TRANSFORM(skills, x -> UPPER(x)) FROM emp;
95
+ SELECT FILTER(scores, x -> x > 90) FROM students;
96
+ SELECT EXISTS(scores, x -> x > 100) FROM students;
97
+ SELECT FORALL(scores, x -> x >= 0) FROM students;
98
+ SELECT ZIP_WITH(a, b, (x, y) -> x + y) FROM t;
99
+ ```
100
+
101
+ `AGGREGATE` / `REDUCE` (Spark names) are not supported — use `ARRAY_AGG` + aggregate functions instead.
102
+
103
+ ---
104
+
105
+ ## JSON / Semi-structured Access
106
+
107
+ ```sql
108
+ -- Snowflake (colon syntax + double-colon cast)
109
+ SELECT data:address:city AS city FROM users;
110
+ SELECT data:age::INT AS age FROM users;
111
+ SELECT data:phoneNumbers[0]:number FROM users;
112
+
113
+ -- ClickZetta (bracket syntax)
114
+ SELECT data['address']['city'] AS city FROM users;
115
+ SELECT CAST(data['age'] AS INT) AS age FROM users;
116
+ SELECT data['phoneNumbers'][0]['number'] FROM users;
117
+
118
+ -- ClickZetta also accepts :: cast operator
119
+ SELECT data['amount']::DOUBLE AS amount FROM orders;
120
+ ```
121
+
122
+ | Snowflake | ClickZetta |
123
+ |---|---|
124
+ | `data:key` | `data['key']` |
125
+ | `data[0]:key` | `data[0]['key']` |
126
+ | `data:key::TYPE` | `CAST(data['key'] AS TYPE)` or `data['key']::TYPE` |
127
+ | `PARSE_JSON(s)` | `PARSE_JSON(s)` ✅ identical |
128
+ | `TO_VARIANT(x)` | `PARSE_JSON(TO_JSON(x))` |
129
+ | `TO_JSON(x)` | `TO_JSON(x)` ✅ identical |
130
+ | `IS_NULL_VALUE(json:key)` | `data['key'] IS NULL` |
131
+
132
+ ---
133
+
134
+ ## System / Context Functions
135
+
136
+ | Snowflake | Spark / Databricks | ClickZetta | Notes |
137
+ |---|---|---|---|
138
+ | `CURRENT_DATABASE()` | `current_database()` | `CURRENT_WORKSPACE()` | concept rename |
139
+ | `CURRENT_WAREHOUSE()` | — | `CURRENT_VCLUSTER()` | concept rename |
140
+ | `CURRENT_ROLE()` | `current_user()` | `CURRENT_USER()` | no role function |
141
+ | `CURRENT_SCHEMA()` | `current_database()` | `CURRENT_SCHEMA()` | ✅ |
142
+ | — | — | `CURRENT_INSTANCE_ID()` | ClickZetta-specific |
143
+
144
+ ---
145
+
146
+ ## Type Conversion Functions
147
+
148
+ | Snowflake | Spark / Databricks | ClickZetta | Notes |
149
+ |---|---|---|---|
150
+ | `TRY_TO_NUMBER(s)` / `TRY_TO_DATE(s)` | `try_cast(s AS ...)` | `TRY_CAST(s AS ...)` | |
151
+ | `TO_VARIANT(x)` | — | `PARSE_JSON(TO_JSON(x))` | |
152
+ | `CAST(...)` / `::TYPE` | `CAST(...)` / `::TYPE` | `CAST(...)` / `::TYPE` | ✅ all supported |
153
+
154
+ ---
155
+
156
+ ## Functions with No Direct ClickZetta Equivalent
157
+
158
+ | Function | Source | Workaround |
159
+ |---|---|---|
160
+ | `SOUNDEX(s)` | Snowflake | None |
161
+ | `EDITDISTANCE` / `LEVENSHTEIN` | Snowflake / Spark | Python UDF |
162
+ | `JSON_ARRAY_LENGTH` | various | `SIZE(CAST(json AS ARRAY<STRING>))` |
163
+ | `JSON_OBJECT_KEYS` | various | manually parse |
164
+ | `REGEXP_SUBSTR` | Snowflake | `REGEXP_EXTRACT(s, '(p)')` |
165
+ | `GENERATE_SERIES(s, e)` / `RANGE(n)` | various | `EXPLODE(SEQUENCE(s, e))` |
166
+ | `TABLESAMPLE (n PERCENT)` | various | `ORDER BY RAND() LIMIT n` |
167
+ | `ST_*` geospatial functions | various | None — geospatial not supported |
168
+ | `TO_IPV4` / IP address functions | various | None |
169
+ | `HLL_APPROX` | various | `APPROX_COUNT_DISTINCT(col)` |
170
+ | `BITAND(a, b)` / `BITOR(a, b)` / `BITXOR(a, b)` | various | bitwise operators `&` / `\|` / `^` |
171
+ | `INITCAP(s)` (in versions that miss it) | — | `CONCAT(UPPER(SUBSTR(s,1,1)), LOWER(SUBSTR(s,2)))` |
172
+ | `SQUARE(x)` | Snowflake | `POWER(x, 2)` |
173
+ | `HAVERSINE(...)` | Snowflake | None |
174
+ | `WIDTH_BUCKET(...)` | Snowflake | None |
175
+ | `FACTORIAL(n)` | various | `EXP(SUM(LN(generate)))` over a sequence |
176
+ | `BIN(x)` | various | `CONV(x, 10, 2)` |
177
+
178
+ ---
179
+
180
+ ## Vector Functions (ClickZetta-Specific)
181
+
182
+ ClickZetta has native vector functions for similarity search, which Snowflake/Spark do not provide:
183
+
184
+ ```sql
185
+ L2_DISTANCE(v1, v2) -- Euclidean distance
186
+ COSINE_DISTANCE(v1, v2) -- Cosine distance
187
+ DOT_PRODUCT(v1, v2) -- Dot product
188
+ HAMMING_DISTANCE(v1, v2) -- Hamming distance (binary)
189
+ JACCARD_DISTANCE(v1, v2) -- Jaccard distance
190
+ BINARY_QUANTIZE(v) -- float vector → binary
191
+ VECTOR(v1, v2, ...) -- construct vector
192
+ ```
193
+
194
+ If migrating from Snowflake Cortex Search or Databricks Vector Search, redesign around these primitives + the `VECTOR INDEX` (see ClickZetta Lakehouse documentation).
@@ -0,0 +1,372 @@
1
+ # Functions Complete Reference
2
+
3
+ > With Snowflake / Spark SQL difference annotations
4
+
5
+ ---
6
+
7
+ ## Numeric Functions
8
+
9
+ ```sql
10
+ ABS(x) -- absolute value
11
+ CEIL(x) / CEILING(x) -- round up
12
+ FLOOR(x) -- round down
13
+ ROUND(x, d) -- round to d decimal places
14
+ TRUNCATE(x, d) -- truncate to d decimal places
15
+ MOD(x, y) / x % y -- modulo
16
+ POWER(x, y) / POW(x, y) -- exponentiation
17
+ SQRT(x) -- square root
18
+ EXP(x) -- e^x
19
+ LN(x) / LOG(x) -- natural logarithm
20
+ LOG(base, x) -- logarithm with specified base
21
+ LOG2(x) / LOG10(x) -- base-2/base-10 logarithm
22
+ SIGN(x) -- sign (-1/0/1)
23
+ GREATEST(a, b, c, ...) -- maximum value
24
+ LEAST(a, b, c, ...) -- minimum value
25
+ RANDOM() / RAND() -- random number 0-1
26
+ PI() -- π
27
+ SIN(x) / COS(x) / TAN(x) -- trigonometric functions
28
+ ASIN(x) / ACOS(x) / ATAN(x) -- inverse trigonometric functions
29
+ ATAN2(y, x) -- arctangent
30
+ DEGREES(x) / RADIANS(x) -- degree/radian conversion
31
+ -- ⚠️ FACTORIAL not supported, use EXP(SUM(LN(n))) instead
32
+ -- ⚠️ BIN(x) not supported, use CONV(x, 10, 2) instead
33
+ HEX(x) -- convert to hexadecimal string
34
+ UNHEX(s) -- hexadecimal to string
35
+ CONV(x, from_base, to_base) -- base conversion (e.g., CONV(10,10,2) gives '1010')
36
+ ```
37
+
38
+ **Differences from Snowflake:**
39
+ - Snowflake `SQUARE(x)` → ClickZetta `POWER(x, 2)`
40
+ - Snowflake `HAVERSINE(lat1, lon1, lat2, lon2)` → ClickZetta not supported
41
+ - Snowflake `WIDTH_BUCKET` → ClickZetta not supported
42
+
43
+ ---
44
+
45
+ ## String Functions
46
+
47
+ ```sql
48
+ -- Basic operations
49
+ LENGTH(s) / CHAR_LENGTH(s) -- character length
50
+ OCTET_LENGTH(s) -- byte length
51
+ UPPER(s) / LOWER(s) -- case conversion
52
+ INITCAP(s) -- capitalize first letter
53
+ TRIM(s) / LTRIM(s) / RTRIM(s) -- trim whitespace
54
+ TRIM(BOTH 'x' FROM s) -- trim specified character
55
+ LPAD(s, n, pad) / RPAD(s, n, pad) -- padding
56
+ REPEAT(s, n) -- repeat
57
+ REVERSE(s) -- reverse
58
+ SPACE(n) -- n spaces
59
+
60
+ -- Concatenation
61
+ CONCAT(s1, s2, ...) -- concatenate (NULL propagates)
62
+ CONCAT_WS(sep, s1, s2, ...) -- concatenate with separator (skips NULL)
63
+ s1 || s2 -- concatenation operator
64
+
65
+ -- Substring
66
+ SUBSTR(s, pos) / SUBSTRING(s, pos)
67
+ SUBSTR(s, pos, len) / SUBSTRING(s, pos, len)
68
+ LEFT(s, n) / RIGHT(s, n)
69
+ MID(s, pos, len) -- same as SUBSTR
70
+
71
+ -- Search
72
+ INSTR(s, substr) -- find position (1-based, 0 means not found)
73
+ LOCATE(substr, s) -- same as INSTR, different parameter order
74
+ LOCATE(substr, s, pos) -- search from pos
75
+ POSITION(substr IN s) -- ✅ supported, returns substring position (1-based)
76
+ FIND_IN_SET(s, list) -- find in comma-separated list
77
+
78
+ -- Replace
79
+ REPLACE(s, old, new) -- replace all occurrences
80
+ TRANSLATE(s, from_chars, to_chars) -- character-level replacement
81
+ -- ⚠️ OVERLAY syntax not supported, use CONCAT(LEFT(s,pos-1), new, SUBSTR(s,pos+len)) instead
82
+
83
+ -- Regex
84
+ REGEXP_EXTRACT(s, pattern, group) -- extract matching group
85
+ REGEXP_EXTRACT_ALL(s, pattern) -- extract all matches
86
+ REGEXP_REPLACE(s, pattern, repl) -- regex replace
87
+ REGEXP_LIKE(s, pattern) -- regex match (returns boolean)
88
+ RLIKE(s, pattern) -- same as REGEXP_LIKE
89
+ s RLIKE pattern -- operator form
90
+ REGEXP_COUNT(s, pattern) -- match count
91
+ REGEXP_SUBSTR(s, pattern) -- extract first match
92
+
93
+ -- Split
94
+ SPLIT(s, delimiter) -- split by delimiter, returns ARRAY
95
+ SPLIT_PART(s, delimiter, n) -- get nth split part (1-based)
96
+
97
+ -- Formatting
98
+ FORMAT_STRING(fmt, args...) -- printf style (e.g., FORMAT_STRING('%d items', 5) → '5 items')
99
+ -- ⚠️ FORMAT(number, decimals) number thousand-separator formatting not supported, use ROUND + CAST instead
100
+
101
+ -- Encoding
102
+ BASE64(s) / UNBASE64(s) -- Base64 encode/decode
103
+ MD5(s) -- MD5 hash
104
+ SHA1(s) / SHA2(s, bits) -- SHA hash
105
+ CRC32(s) -- CRC32
106
+ ENCODE(s, charset) / DECODE(s, charset) -- charset encode/decode
107
+
108
+ -- Other
109
+ ASCII(s) -- ASCII code of first character
110
+ CHAR(n) -- ASCII code to character
111
+ -- ⚠️ SOUNDEX not supported
112
+ -- ⚠️ LEVENSHTEIN not supported, use Python UDF or ZettaPark instead
113
+ HAMMING_DISTANCE(s1, s2) -- Hamming distance (strings)
114
+ ```
115
+
116
+ **Differences from Snowflake:**
117
+ - Snowflake `CHARINDEX(substr, s)` → ClickZetta `INSTR(s, substr)` or `LOCATE(substr, s)` (different parameter order!)
118
+ - Snowflake `EDITDISTANCE(s1, s2)` → ClickZetta does not support LEVENSHTEIN, use Python UDF
119
+ - Snowflake `STRTOK(s, delim, n)` → ClickZetta `SPLIT_PART(s, delim, n)`
120
+ - Snowflake `ILIKE(s, pattern)` → ClickZetta `ILIKE` ✅ also supported!
121
+ - Snowflake `CONTAINS(s, substr)` → ClickZetta `INSTR(s, substr) > 0`
122
+ - Snowflake `STARTSWITH(s, prefix)` → ClickZetta `s LIKE 'prefix%'` or `STARTSWITH(s, prefix)`
123
+ - Snowflake `ENDSWITH(s, suffix)` → ClickZetta `s LIKE '%suffix'` or `ENDSWITH(s, suffix)`
124
+
125
+ ---
126
+
127
+ ## Date/Time Functions
128
+
129
+ ```sql
130
+ -- Get current time
131
+ CURRENT_DATE() -- current date
132
+ CURRENT_TIMESTAMP() / NOW() -- current timestamp (with timezone)
133
+ CURRENT_TIME() -- current time
134
+ LOCALTIMESTAMP() -- local timestamp
135
+
136
+ -- Extract parts
137
+ YEAR(dt) / MONTH(dt) / DAY(dt)
138
+ HOUR(dt) / MINUTE(dt) / SECOND(dt)
139
+ DAYOFWEEK(dt) -- 1=Sunday, 7=Saturday
140
+ DAYOFMONTH(dt) -- same as DAY
141
+ DAYOFYEAR(dt) -- day of year
142
+ WEEKOFYEAR(dt) -- week of year
143
+ QUARTER(dt) -- quarter (1-4)
144
+ EXTRACT(YEAR FROM dt) -- standard SQL extraction
145
+ -- ⚠️ DATE_PART('year', dt) not supported, use EXTRACT or YEAR(dt) instead
146
+
147
+ -- Date arithmetic
148
+ DATE_ADD(dt, n) -- add n days
149
+ DATE_SUB(dt, n) -- subtract n days
150
+ dt + INTERVAL n DAY -- add n days (standard SQL)
151
+ dt - INTERVAL n DAY -- subtract n days
152
+ dt + INTERVAL '1-2' YEAR TO MONTH -- add 1 year 2 months
153
+ ADDDATE(dt, n) -- same as DATE_ADD
154
+ SUBDATE(dt, n) -- same as DATE_SUB
155
+ ADD_MONTHS(dt, n) -- add n months
156
+ MONTHS_BETWEEN(dt1, dt2) -- month difference
157
+
158
+ -- Date difference
159
+ DATEDIFF(end_dt, start_dt) -- two-parameter form: returns day difference (end first)
160
+ DATEDIFF(unit, start_dt, end_dt) -- three-parameter form: specify unit (day/hour/month etc.), Snowflake-compatible
161
+ TIMESTAMPDIFF(unit, dt1, dt2) -- difference in specified unit
162
+
163
+ -- Truncation
164
+ DATE_TRUNC('year', dt) -- truncate to year
165
+ DATE_TRUNC('month', dt) -- truncate to month
166
+ DATE_TRUNC('day', dt) -- truncate to day
167
+ DATE_TRUNC('hour', dt) -- truncate to hour
168
+ DATE_TRUNC('week', dt) -- truncate to week (Monday)
169
+ TRUNC(dt, 'MM') -- Oracle-style truncation
170
+
171
+ -- Formatting
172
+ DATE_FORMAT(dt, 'yyyy-MM-dd') -- format to string
173
+ DATE_FORMAT(dt, 'yyyy-MM-dd HH:mm:ss')
174
+ TO_CHAR(dt, 'YYYY-MM-DD') -- same as DATE_FORMAT
175
+
176
+ -- Conversion
177
+ TO_DATE('2024-01-01') -- string to date
178
+ TO_DATE('2024-01-01', 'yyyy-MM-dd')
179
+ TO_TIMESTAMP('2024-01-01 12:00:00')
180
+ TO_TIMESTAMP('2024-01-01', 'yyyy-MM-dd')
181
+ CAST('2024-01-01' AS DATE)
182
+ CAST('2024-01-01 12:00:00' AS TIMESTAMP)
183
+ FROM_UNIXTIME(unix_ts) -- Unix timestamp to timestamp
184
+ FROM_UNIXTIME(unix_ts, fmt) -- to formatted string
185
+ UNIX_TIMESTAMP() -- current Unix timestamp
186
+ UNIX_TIMESTAMP(dt) -- date to Unix timestamp
187
+ UNIX_TIMESTAMP(s, fmt) -- string to Unix timestamp
188
+
189
+ -- Other
190
+ LAST_DAY(dt) -- last day of month
191
+ NEXT_DAY(dt, 'Monday') -- next specified day of week
192
+ MAKE_DATE(year, month, day) -- construct date (note: MAKE_DATE not MAKEDATE)
193
+ ADD_MONTHS(dt, n) -- add n months
194
+ MONTHS_BETWEEN(dt1, dt2) -- month difference
195
+ TIMESTAMPDIFF(unit, dt1, dt2) -- difference in specified unit (e.g., TIMESTAMPDIFF(MONTH, ...))
196
+ FROM_UTC_TIMESTAMP(ts, tz) -- UTC to specified timezone
197
+ TO_UTC_TIMESTAMP(ts, tz) -- specified timezone to UTC
198
+ -- ⚠️ CONVERT_TZ(dt, from_tz, to_tz) not supported, use FROM_UTC_TIMESTAMP/TO_UTC_TIMESTAMP instead
199
+ -- ⚠️ MAKEDATE(year, dayofyear) not supported, use MAKE_DATE(year, month, day) instead
200
+ -- ⚠️ MAKETIME / PERIOD_ADD / PERIOD_DIFF not supported
201
+ ```
202
+
203
+ **Differences from Snowflake:**
204
+ - Snowflake `DATEADD(day, n, dt)` → ClickZetta `DATEADD(day, n, dt)` ✅ also supported; or use `DATE_ADD(dt, n)` / `dt + INTERVAL n DAY`
205
+ - Snowflake `DATEDIFF(day, start, end)` → ClickZetta `DATEDIFF(day, start, end)` ✅ three-parameter form also supported; or use `DATEDIFF(end, start)` two-parameter form (returns days)
206
+ - Snowflake `DATE_TRUNC('day', dt)` → ClickZetta same
207
+ - Snowflake `TO_DATE(s)` → ClickZetta same
208
+ - Snowflake `CONVERT_TIMEZONE(from, to, ts)` → ClickZetta `FROM_UTC_TIMESTAMP` / `TO_UTC_TIMESTAMP`
209
+ - Snowflake `CONVERT_TIMEZONE(tz, dt)` → ClickZetta `CONVERT_TZ(dt, from_tz, to_tz)`
210
+ - Snowflake `SYSDATE()` / `GETDATE()` → ClickZetta `CURRENT_TIMESTAMP()` / `NOW()`
211
+ - Snowflake `TIMESTAMPADD(unit, n, dt)` → ClickZetta `dt + INTERVAL n unit`
212
+
213
+ **Differences from Spark SQL:**
214
+ - Most functions are the same; ClickZetta is compatible with Spark date functions
215
+
216
+ ---
217
+
218
+ ## Conditional Functions
219
+
220
+ ```sql
221
+ -- IF
222
+ IF(condition, true_val, false_val)
223
+
224
+ -- CASE WHEN
225
+ CASE WHEN cond1 THEN val1
226
+ WHEN cond2 THEN val2
227
+ ELSE default_val
228
+ END
229
+
230
+ -- Simple CASE
231
+ CASE status
232
+ WHEN 'A' THEN 'Active'
233
+ WHEN 'I' THEN 'Inactive'
234
+ ELSE 'Unknown'
235
+ END
236
+
237
+ -- NULL handling
238
+ COALESCE(a, b, c) -- first non-NULL value
239
+ NVL(a, b) -- return b if a is NULL (same as IFNULL)
240
+ IFNULL(a, b) -- same as NVL
241
+ NULLIF(a, b) -- return NULL if a=b, otherwise return a
242
+ NVL2(a, b, c) -- return b if a is not NULL, otherwise c
243
+ ISNULL(a) -- is NULL (returns boolean)
244
+ ISNOTNULL(a) -- is not NULL
245
+
246
+ -- DECODE (Oracle/Hive style)
247
+ DECODE(expr, val1, res1, val2, res2, ..., default)
248
+
249
+ -- Type checking
250
+ TYPEOF(expr) -- returns type name as string
251
+ ```
252
+
253
+ **Differences from Snowflake:**
254
+ - Snowflake `IFF(cond, a, b)` → ClickZetta `IF(cond, a, b)`
255
+ - Snowflake `ZEROIFNULL(x)` → ClickZetta `COALESCE(x, 0)` or `NVL(x, 0)`
256
+ - Snowflake `NULLIFZERO(x)` → ClickZetta `NULLIF(x, 0)`
257
+ - Snowflake `BOOLAND(a, b)` / `BOOLOR(a, b)` → ClickZetta `a AND b` / `a OR b`
258
+
259
+ ---
260
+
261
+ ## Aggregate Functions
262
+
263
+ ```sql
264
+ -- Basic aggregation
265
+ COUNT(*) / COUNT(col) / COUNT(DISTINCT col)
266
+ SUM(col) / AVG(col) / MAX(col) / MIN(col)
267
+ STDDEV(col) / STDDEV_POP(col) / STDDEV_SAMP(col)
268
+ VARIANCE(col) / VAR_POP(col) / VAR_SAMP(col)
269
+
270
+ -- Boolean aggregation
271
+ BOOL_OR(cond) -- any one is true
272
+ BOOL_AND(cond) -- all are true
273
+ EVERY(cond) -- same as BOOL_AND
274
+
275
+ -- String aggregation
276
+ GROUP_CONCAT(col ORDER BY col SEPARATOR ',') -- replaces Snowflake LISTAGG
277
+ GROUP_CONCAT(DISTINCT col SEPARATOR ',')
278
+
279
+ -- Array aggregation
280
+ ARRAY_AGG(col) -- collect into array (includes NULL)
281
+ COLLECT_LIST(col) -- same as ARRAY_AGG
282
+ COLLECT_SET(col) -- collect deduplicated
283
+
284
+ -- Approximate aggregation
285
+ APPROX_COUNT_DISTINCT(col) -- approximate distinct count (HyperLogLog)
286
+ APPROX_PERCENTILE(col, p) -- approximate percentile
287
+
288
+ -- Statistical aggregation
289
+ CORR(x, y) -- correlation coefficient
290
+ COVAR_POP(x, y) / COVAR_SAMP(x, y) -- covariance
291
+ -- ⚠️ REGR_SLOPE / REGR_INTERCEPT not supported
292
+ -- Alternative: CORR(y,x) * STDDEV(y) / STDDEV(x) to calculate slope
293
+
294
+ -- Ordered set aggregation
295
+ PERCENTILE(col, p) -- exact percentile
296
+ PERCENTILE_APPROX(col, p) -- approximate percentile
297
+ MEDIAN(col) -- median
298
+ ```
299
+
300
+ **Differences from Snowflake:**
301
+ - Snowflake `LISTAGG(col, ',') WITHIN GROUP (ORDER BY col)` → ClickZetta `GROUP_CONCAT(col ORDER BY col SEPARATOR ',')`
302
+ - Snowflake `ARRAY_AGG(col) WITHIN GROUP (ORDER BY col)` → ClickZetta `ARRAY_AGG(col)` does not support WITHIN GROUP
303
+ - Snowflake `OBJECT_AGG(key, value)` → ClickZetta `MAP_AGG(key, value)`
304
+ - Snowflake `BITAND_AGG / BITOR_AGG / BITXOR_AGG` → ClickZetta `BIT_AND / BIT_OR / BIT_XOR`
305
+
306
+ ---
307
+
308
+ ## Type Conversion Functions
309
+
310
+ ```sql
311
+ -- Explicit conversion
312
+ CAST(expr AS target_type)
313
+ expr::target_type -- shorthand syntax
314
+
315
+ -- Safe conversion (returns NULL on failure instead of error)
316
+ TRY_CAST(expr AS target_type)
317
+
318
+ -- String conversion
319
+ TO_NUMBER(s) / TO_DECIMAL(s)
320
+ TO_DOUBLE(s)
321
+ TO_BOOLEAN(s) -- 'true'/'false'/'1'/'0'
322
+
323
+ -- Examples
324
+ CAST('123' AS INT)
325
+ CAST(123 AS STRING)
326
+ CAST('2024-01-01' AS DATE)
327
+ CAST('[1,2,3]' AS VECTOR(3)) -- string to vector
328
+ TRY_CAST('abc' AS INT) -- returns NULL
329
+ ```
330
+
331
+ **Differences from Snowflake:**
332
+ - Snowflake `TRY_TO_NUMBER / TRY_TO_DATE` → ClickZetta `TRY_CAST`
333
+ - Snowflake `TO_VARIANT(x)` → ClickZetta `PARSE_JSON(TO_JSON(x))`
334
+
335
+ ---
336
+
337
+ ## System/Context Functions
338
+
339
+ ```sql
340
+ CURRENT_USER() -- current username
341
+ CURRENT_WORKSPACE() -- current workspace
342
+ CURRENT_SCHEMA() -- current schema
343
+ CURRENT_VCLUSTER() -- current compute cluster
344
+ CURRENT_INSTANCE_ID() -- current instance ID
345
+ VERSION() -- version information
346
+ ```
347
+
348
+ **Differences from Snowflake:**
349
+ - Snowflake `CURRENT_DATABASE()` → ClickZetta `CURRENT_WORKSPACE()`
350
+ - Snowflake `CURRENT_WAREHOUSE()` → ClickZetta `CURRENT_VCLUSTER()`
351
+ - Snowflake `CURRENT_ROLE()` → ClickZetta has no direct equivalent
352
+
353
+ ---
354
+
355
+ ## Vector Functions
356
+
357
+ ```sql
358
+ -- Distance calculation
359
+ L2_DISTANCE(v1, v2) -- Euclidean distance (smaller = more similar)
360
+ COSINE_DISTANCE(v1, v2) -- Cosine distance (smaller = more similar)
361
+ DOT_PRODUCT(v1, v2) -- Dot product (larger = more similar, requires normalization)
362
+ HAMMING_DISTANCE(v1, v2) -- Hamming distance (binary vectors)
363
+ JACCARD_DISTANCE(v1, v2) -- Jaccard distance
364
+
365
+ -- Vector operations
366
+ BINARY_QUANTIZE(v) -- binarize float vector
367
+ VECTOR(v1, v2, ...) -- build vector
368
+
369
+ -- Build vector
370
+ SELECT VECTOR(0.1, 0.2, 0.3, 0.4);
371
+ SELECT CAST('[0.1, 0.2, 0.3]' AS VECTOR(3));
372
+ ```