@clickzetta/cz-cli-darwin-arm64 0.3.40 → 0.3.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/clickzetta-app-python-sdk/SKILL.md +153 -0
  3. package/bin/skills/clickzetta-app-python-sdk/eval_cases.jsonl +12 -0
  4. package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +196 -0
  5. package/bin/skills/clickzetta-app-python-sdk/references/connector.md +143 -0
  6. package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +122 -0
  7. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +128 -287
  8. package/bin/skills/clickzetta-bi-connect/SKILL.md +176 -0
  9. package/bin/skills/clickzetta-bi-connect/eval_cases.jsonl +5 -0
  10. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +170 -0
  11. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +633 -0
  12. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +5 -0
  13. package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +5 -0
  14. package/bin/skills/clickzetta-data-science/SKILL.md +125 -0
  15. package/bin/skills/clickzetta-data-science/eval_cases.jsonl +12 -0
  16. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +146 -0
  17. package/bin/skills/clickzetta-data-science/references/data-patterns.md +110 -0
  18. package/bin/skills/clickzetta-data-science/references/setup.md +160 -0
  19. package/bin/skills/clickzetta-data-science/references/stats-functions.md +195 -0
  20. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +122 -0
  21. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +156 -0
  22. package/bin/skills/clickzetta-data-sharing/SKILL.md +160 -0
  23. package/bin/skills/clickzetta-data-sharing/eval_cases.jsonl +3 -0
  24. package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +134 -0
  25. package/bin/skills/clickzetta-dw-modeling/SKILL.md +103 -11
  26. package/bin/skills/clickzetta-dynamic-table/SKILL.md +58 -2
  27. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +4 -4
  28. package/bin/skills/clickzetta-external-catalog/SKILL.md +123 -0
  29. package/bin/skills/clickzetta-external-catalog/eval_cases.jsonl +5 -0
  30. package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +130 -0
  31. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +34 -0
  32. package/bin/skills/clickzetta-java-sdk/SKILL.md +186 -0
  33. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +12 -0
  34. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +163 -0
  35. package/bin/skills/clickzetta-java-sdk/references/realtime.md +212 -0
  36. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +31 -0
  37. package/bin/skills/clickzetta-metadata/SKILL.md +28 -30
  38. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +39 -0
  39. package/bin/skills/clickzetta-pipeline-review/SKILL.md +377 -0
  40. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +323 -0
  41. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +5 -0
  42. package/bin/skills/clickzetta-semantic-view/SKILL.md +207 -0
  43. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +12 -0
  44. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +167 -0
  45. package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +92 -0
  46. package/bin/skills/clickzetta-spark-flink-connector/eval_cases.jsonl +5 -0
  47. package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +147 -0
  48. package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +132 -0
  49. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +115 -9
  50. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +249 -0
  51. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +3 -0
  52. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +350 -0
  53. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +279 -0
  54. package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +504 -0
  55. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +372 -0
  56. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +260 -0
  57. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +382 -0
  58. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +346 -0
  59. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +229 -0
  60. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +652 -0
  61. package/bin/skills/clickzetta-table-lineage/SKILL.md +90 -0
  62. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +1 -0
  63. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +14 -0
  64. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +38 -0
  65. package/bin/skills/clickzetta-table-lineage/references/table_lineage_standalone.html +562 -0
  66. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +25 -0
  67. package/bin/skills/clickzetta-zettapark/SKILL.md +248 -0
  68. package/bin/skills/clickzetta-zettapark/eval_cases.jsonl +12 -0
  69. package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +283 -0
  70. package/package.json +1 -1
  71. package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
  72. package/bin/skills/clickzetta-ai-vector-search/eval_cases.jsonl +0 -4
  73. package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
@@ -1,4 +0,0 @@
1
- {"case_id":"001","type":"should_call","user_input":"怎么在 ClickZetta 里创建 VECTOR 类型的列存储 embedding?","expected_skill":"clickzetta-ai-vector-search","expected_output_contains":["VECTOR","FLOAT"]}
2
- {"case_id":"002","type":"should_call","user_input":"HNSW 向量索引怎么创建?支持哪些距离函数?","expected_skill":"clickzetta-ai-vector-search","expected_output_contains":["HNSW","cosine"]}
3
- {"case_id":"003","type":"should_call","user_input":"怎么做 ANN 近似最近邻检索?SQL 怎么写?","expected_skill":"clickzetta-ai-vector-search","expected_output_contains":["ANN","distance"]}
4
- {"case_id":"004","type":"should_call","user_input":"向量检索和倒排索引能融合查询吗?怎么写?","expected_skill":"clickzetta-ai-vector-search","expected_output_contains":["向量","检索"]}
@@ -1,155 +0,0 @@
1
- # 向量检索参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/vector-search 等
4
-
5
- ## VECTOR 数据类型
6
-
7
- ```sql
8
- -- 语法
9
- vector(scalar_type, dimension)
10
- vector(dimension) -- 默认 float 类型
11
-
12
- -- 示例
13
- CREATE TABLE embeddings (
14
- id INT,
15
- content STRING,
16
- vec VECTOR(FLOAT, 1024), -- 1024 维 float 向量
17
- vec_bin VECTOR(TINYINT, 128) -- 128 维 tinyint 向量(二值化)
18
- );
19
- ```
20
-
21
- 支持的元素类型:`FLOAT`(f32)、`TINYINT`(i8/b1)
22
-
23
- ---
24
-
25
- ## 创建向量索引
26
-
27
- ```sql
28
- -- 建表时内联创建
29
- CREATE TABLE doc_embeddings (
30
- id INT,
31
- content STRING,
32
- vec VECTOR(FLOAT, 1024),
33
- INDEX vec_idx (vec) USING VECTOR PROPERTIES (
34
- "distance.function" = "cosine_distance",
35
- "scalar.type" = "f32",
36
- "m" = "16",
37
- "ef.construction" = "128"
38
- )
39
- );
40
-
41
- -- 在已有表上添加向量索引
42
- ALTER TABLE doc_embeddings ADD INDEX vec_idx (vec) USING VECTOR PROPERTIES (
43
- "distance.function" = "cosine_distance",
44
- "scalar.type" = "f32"
45
- );
46
-
47
- -- 为存量数据构建索引
48
- BUILD INDEX vec_idx ON doc_embeddings;
49
- ```
50
-
51
- ### 关键参数
52
-
53
- | 参数 | 可选值 | 默认值 | 说明 |
54
- |---|---|---|---|
55
- | distance.function | l2_distance, cosine_distance, jaccard_distance, hamming_distance | cosine_distance | 距离函数 |
56
- | scalar.type | f32, f16, i8, b1 | f32 | 索引元素类型 |
57
- | m | 建议 ≤ 1000 | 16 | HNSW 最大邻居数 |
58
- | ef.construction | 建议 ≤ 5000 | 128 | 构建时候选集大小 |
59
- | compress.codec | uncompressed/zstd/lz4 | uncompressed | 压缩算法 |
60
-
61
- ---
62
-
63
- ## 插入向量数据
64
-
65
- ```sql
66
- -- 直接插入
67
- INSERT INTO doc_embeddings (id, content, vec) VALUES
68
- (1, 'hello world', vector(0.1, 0.2, 0.3, ...)),
69
- (2, 'foo bar', vector(0.4, 0.5, 0.6, ...));
70
-
71
- -- 从字符串转换
72
- INSERT INTO doc_embeddings (id, vec)
73
- SELECT id, CAST('[0.1, 0.2, 0.3]' AS VECTOR(3))
74
- FROM source_table;
75
-
76
- -- 从 ARRAY 列转换(外部系统写入场景)
77
- INSERT OVERWRITE doc_embeddings
78
- SELECT id, content, CAST(vec_array AS VECTOR(FLOAT, 1024))
79
- FROM staging_table;
80
- ```
81
-
82
- ---
83
-
84
- ## 向量检索
85
-
86
- ```sql
87
- -- 调整探索因子(精度 vs 速度权衡)
88
- SET cz.vector.index.search.ef = 64;
89
-
90
- -- L2 距离检索(欧几里得距离,越小越相似)
91
- SELECT id, content, l2_distance(vec, vector(0.1, 0.2, 0.3, ...)) AS dist
92
- FROM doc_embeddings
93
- ORDER BY dist
94
- LIMIT 10;
95
-
96
- -- 余弦距离检索(越小越相似)
97
- SELECT id, content, cosine_distance(vec, CAST('[0.1,0.2,0.3]' AS VECTOR(3))) AS dist
98
- FROM doc_embeddings
99
- ORDER BY dist
100
- LIMIT 10;
101
-
102
- -- 带过滤条件的向量检索(向量 + 标量融合)
103
- SELECT id, content, cosine_distance(vec, :query_vec) AS dist
104
- FROM doc_embeddings
105
- WHERE category = 'tech'
106
- AND cosine_distance(vec, :query_vec) < 0.3
107
- ORDER BY dist
108
- LIMIT 10;
109
- ```
110
-
111
- ---
112
-
113
- ## 距离函数速查
114
-
115
- | 函数 | 适用场景 | 说明 |
116
- |---|---|---|
117
- | `l2_distance(v1, v2)` | 通用语义检索 | 欧几里得距离,越小越相似 |
118
- | `cosine_distance(v1, v2)` | 文本语义检索 | 余弦距离,越小越相似 |
119
- | `dot_product(v1, v2)` | 归一化向量 | 点积,越大越相似 |
120
- | `hamming_distance(v1, v2)` | 二值向量 | 汉明距离,越小越相似 |
121
- | `jaccard_distance(v1, v2)` | 集合相似度 | 雅卡德距离 |
122
- | `binary_quantize(v)` | 向量压缩 | 将 float 向量二值化 |
123
-
124
- ---
125
-
126
- ## 向量 + 倒排索引融合检索
127
-
128
- ```sql
129
- -- 建表:同时支持向量索引和倒排索引
130
- CREATE TABLE hybrid_search (
131
- id INT,
132
- content STRING,
133
- vec VECTOR(FLOAT, 1024),
134
- INDEX content_inv_idx (content) USING INVERTED,
135
- INDEX vec_idx (vec) USING VECTOR PROPERTIES (
136
- "distance.function" = "cosine_distance"
137
- )
138
- );
139
-
140
- -- 融合检索:先用倒排过滤,再用向量排序
141
- SELECT id, content, cosine_distance(vec, :query_vec) AS dist
142
- FROM hybrid_search
143
- WHERE content LIKE '%关键词%'
144
- ORDER BY dist
145
- LIMIT 10;
146
- ```
147
-
148
- ---
149
-
150
- ## 注意事项
151
-
152
- - 向量类型不支持 `ORDER BY` 或 `GROUP BY`(只能对距离函数结果排序)
153
- - 向量索引性能与内存/磁盘缓存直接相关,建议**单独占用 VCluster**
154
- - 外部系统写入时不能直接写 VECTOR 类型,需先写 ARRAY 再 CAST 转换
155
- - `ef` 值越大,检索精度越高但延迟越大;建议从 64 开始调优