@clickzetta/cz-cli-linux-x64 0.3.2 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/bin/cz-cli +0 -0
  2. package/package.json +1 -1
  3. package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
  4. package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
  5. package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
  6. package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
  7. package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
  8. package/bin/skills/clickzetta-ai-vector-search/SKILL.md +0 -160
  9. package/bin/skills/clickzetta-ai-vector-search/references/vector-search.md +0 -155
  10. package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
  11. package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
  12. package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
  13. package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
  14. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -293
  15. package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
  16. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
  17. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -450
  18. package/bin/skills/clickzetta-concepts/SKILL.md +0 -282
  19. package/bin/skills/clickzetta-concepts/references/brands-and-endpoints.md +0 -79
  20. package/bin/skills/clickzetta-concepts/references/object-model.md +0 -311
  21. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -165
  22. package/bin/skills/clickzetta-data-lifecycle/SKILL.md +0 -211
  23. package/bin/skills/clickzetta-data-lifecycle/references/lifecycle-reference.md +0 -175
  24. package/bin/skills/clickzetta-data-recovery/SKILL.md +0 -215
  25. package/bin/skills/clickzetta-data-recovery/evals/evals.json +0 -35
  26. package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
  27. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
  28. package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
  29. package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
  30. package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
  31. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
  32. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
  33. package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
  34. package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
  35. package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -540
  36. package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -259
  37. package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
  38. package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -86
  39. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -257
  40. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
  41. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
  42. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
  43. package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
  44. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  45. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -429
  46. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -268
  47. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
  48. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
  49. package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -120
  50. package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
  51. package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
  52. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
  53. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -117
  54. package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
  55. package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
  56. package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
  57. package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
  58. package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
  59. package/bin/skills/clickzetta-information-schema/SKILL.md +0 -367
  60. package/bin/skills/clickzetta-information-schema/references/instance-views-reference.md +0 -276
  61. package/bin/skills/clickzetta-information-schema/references/metering-views-reference.md +0 -137
  62. package/bin/skills/clickzetta-information-schema/references/views-reference.md +0 -271
  63. package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
  64. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
  65. package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
  66. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -531
  67. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -186
  68. package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
  69. package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
  70. package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
  71. package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
  72. package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
  73. package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
  74. package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
  75. package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
  76. package/bin/skills/clickzetta-metadata-query/SKILL.md +0 -298
  77. package/bin/skills/clickzetta-metadata-query/references/show-desc-reference.md +0 -326
  78. package/bin/skills/clickzetta-monitoring/SKILL.md +0 -199
  79. package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
  80. package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
  81. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -402
  82. package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
  83. package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
  84. package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
  85. package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
  86. package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
  87. package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
  88. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -197
  89. package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
  90. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
  91. package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
  92. package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
  93. package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
  94. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -353
  95. package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
  96. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -173
  97. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
  98. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -160
  99. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -123
  100. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -172
  101. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  102. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  103. package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
  104. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  105. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  106. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
  107. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  108. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  109. package/bin/skills/clickzetta-studio-overview/SKILL.md +0 -170
  110. package/bin/skills/clickzetta-studio-overview/references/studio-modules.md +0 -173
  111. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -155
  112. package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
  113. package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
  114. package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
  115. package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -249
  116. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -194
  117. package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
  118. package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
@@ -1,140 +0,0 @@
1
- ---
2
- name: clickzetta-index-manager
3
- description: |
4
- 管理 ClickZetta Lakehouse 的三类索引:Bloom Filter 索引(等值查询加速)、
5
- 倒排索引(全文检索)、向量索引(语义相似度搜索)。覆盖创建、构建存量数据、
6
- 删除、查看等完整生命周期,以及索引类型选择指南。
7
- 当用户说"创建索引"、"加索引"、"Bloom Filter"、"布隆过滤器"、"倒排索引"、
8
- "全文检索"、"向量索引"、"向量搜索"、"相似度搜索"、"BUILD INDEX"、
9
- "DROP INDEX"、"SHOW INDEX"、"查询加速"、"索引优化"时触发。
10
- Keywords: index, bloom filter, inverted index, vector index, full-text search
11
- ---
12
-
13
- # ClickZetta 索引管理
14
-
15
- ## 索引类型选择
16
-
17
- | 需求 | 推荐索引 | 参考文件 |
18
- |---|---|---|
19
- | 高基数列等值查询(ID、邮箱、手机号) | Bloom Filter | [references/bloomfilter-index.md](references/bloomfilter-index.md) |
20
- | 文本关键词搜索、全文检索 | 倒排索引 | [references/inverted-index.md](references/inverted-index.md) |
21
- | 向量相似度搜索、语义检索、RAG | 向量索引 | [references/vector-index.md](references/vector-index.md) |
22
- | 存量数据补建索引、删除、查看 | — | [references/index-management.md](references/index-management.md) |
23
-
24
- ## ⚠️ 关键注意事项
25
-
26
- - **所有索引只对新写入数据生效**,旧数据需用 `BUILD INDEX` 补建(Bloom Filter 除外,不支持 BUILD INDEX)
27
- - Bloom Filter 旧数据生效方法:`INSERT OVERWRITE table SELECT * FROM table`(重写数据)
28
- - `BUILD INDEX` 是同步任务,大表建议按分区逐批执行
29
- - **索引必须与表在同一 Schema 中**,跨 Schema 创建索引会报错(`index and table must in the same schema`)
30
-
31
- ---
32
-
33
- ## 步骤 1:选择索引类型并创建
34
-
35
- ### Bloom Filter(等值查询加速)
36
-
37
- 阅读 [references/bloomfilter-index.md](references/bloomfilter-index.md)
38
-
39
- ```sql
40
- -- 建表时指定
41
- CREATE TABLE orders (
42
- order_id INT,
43
- INDEX order_id_idx (order_id) BLOOMFILTER
44
- );
45
-
46
- -- 已有表添加
47
- CREATE BLOOMFILTER INDEX idx_name
48
- ON TABLE my_schema.orders(order_id)
49
- COMMENT '订单ID布隆过滤器';
50
- ```
51
-
52
- ### 倒排索引(全文检索)
53
-
54
- 阅读 [references/inverted-index.md](references/inverted-index.md)
55
-
56
- ```sql
57
- -- 数值/日期列(不需要 PROPERTIES)
58
- CREATE INVERTED INDEX id_idx ON TABLE t(order_id);
59
-
60
- -- 字符串列(必须指定分词器,否则报错)
61
- -- ⚠️ 字符串列不指定 analyzer 会创建失败
62
- CREATE INVERTED INDEX title_idx
63
- ON TABLE articles(title)
64
- PROPERTIES('analyzer'='chinese'); -- 中文内容用 chinese
65
-
66
- -- 其他分词器选项:
67
- -- 'keyword' → 不分词,整列作为一个词(适合精确匹配:状态码、标签)
68
- -- 'english' → 英文分词
69
- -- 'unicode' → 通用 Unicode 分词(中英混合)
70
- -- 'chinese' → 中文分词(默认推荐)
71
-
72
- -- 查询
73
- SELECT * FROM articles WHERE match_any(title, '关键词', 'analyzer'='chinese');
74
- ```
75
-
76
- ### 向量索引(相似度搜索)
77
-
78
- 阅读 [references/vector-index.md](references/vector-index.md)
79
-
80
- ```sql
81
- CREATE VECTOR INDEX vec_idx
82
- ON TABLE embeddings(vec)
83
- PROPERTIES(
84
- "scalar.type" = "f32",
85
- "distance.function" = "cosine_distance"
86
- );
87
- ```
88
-
89
- ---
90
-
91
- ## 步骤 2:为存量数据构建索引
92
-
93
- 阅读 [references/index-management.md](references/index-management.md)
94
-
95
- ```sql
96
- -- 全表构建(倒排索引和向量索引支持,Bloom Filter 不支持)
97
- BUILD INDEX index_name ON my_schema.table_name;
98
-
99
- -- 按分区构建(大表推荐)
100
- BUILD INDEX index_name ON table_name WHERE dt = '2024-01-01';
101
- ```
102
-
103
- ---
104
-
105
- ## 步骤 3:查看和管理索引
106
-
107
- ```sql
108
- -- 列出表的所有索引
109
- SHOW INDEX FROM my_schema.orders;
110
-
111
- -- 查看索引详情
112
- DESC INDEX index_name;
113
- DESC INDEX EXTENDED index_name; -- 含索引大小
114
-
115
- -- 删除索引
116
- DROP INDEX IF EXISTS index_name;
117
- ```
118
-
119
- ---
120
-
121
- ## 常见问题
122
-
123
- | 问题 | 原因 | 解决方案 |
124
- |---|---|---|
125
- | 加了索引但查询没变快 | 旧数据未建索引 | 执行 `BUILD INDEX`(倒排/向量)或重写数据(Bloom Filter) |
126
- | BUILD INDEX 执行很慢 | 数据量大 | 按分区逐批执行 `BUILD INDEX ... WHERE partition=...` |
127
- | 倒排索引字符串列报错 | 未指定分词器(字符串列必须指定) | 添加 `PROPERTIES('analyzer'='chinese')` 或其他分词器 |
128
- | 向量索引查询结果不准 | ef.construction 太小 | 调大 `ef.construction`(默认 128,可调至 200-500) |
129
-
130
- ---
131
-
132
- ## 参考文档
133
-
134
- - [CREATE BLOOMFILTER INDEX](https://www.yunqi.tech/documents/CREATE-BLOOMFILTER-INDEX)
135
- - [CREATE INVERTED INDEX](https://www.yunqi.tech/documents/create-inverted-index)
136
- - [CREATE VECTOR INDEX](https://www.yunqi.tech/documents/create-vector-index)
137
- - [BUILD INDEX](https://www.yunqi.tech/documents/build-inverted-index)
138
- - [DROP INDEX](https://www.yunqi.tech/documents/DROP-INDEX)
139
- - [SHOW INDEX](https://www.yunqi.tech/documents/SHOW-INDEX)
140
- - [DESC INDEX](https://www.yunqi.tech/documents/DESC-INDEX)
@@ -1,67 +0,0 @@
1
- # Bloom Filter 索引参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/CREATE-BLOOMFILTER-INDEX
4
-
5
- ## 适用场景
6
-
7
- 高基数列(如 ID、邮箱、手机号)的**等值查询**加速。通过跳过不含目标值的数据文件,减少 I/O。
8
-
9
- 不支持的列类型:INTERVAL、STRUCT、MAP、ARRAY。
10
-
11
- ## 建表时创建
12
-
13
- ```sql
14
- CREATE TABLE orders (
15
- order_id INT,
16
- customer_id INT,
17
- amount DOUBLE,
18
- INDEX order_id_idx (order_id) BLOOMFILTER COMMENT 'bloom filter on order_id',
19
- INDEX customer_id_idx (customer_id) BLOOMFILTER
20
- ) USING parquet;
21
- ```
22
-
23
- ## 已有表添加
24
-
25
- ```sql
26
- CREATE BLOOMFILTER INDEX [IF NOT EXISTS] index_name
27
- ON TABLE [schema.]table_name(column_name)
28
- [COMMENT 'comment']
29
- [PROPERTIES ('key' = 'value')];
30
- ```
31
-
32
- ### ngram 分词器(用于字符串模糊匹配)
33
-
34
- ```sql
35
- CREATE BLOOMFILTER INDEX idx_ngram
36
- ON TABLE demo(col_name)
37
- PROPERTIES ('analyzer' = 'ngram', 'n' = '3');
38
- ```
39
-
40
- `n` 为 ngram 长度,例如 n=4 时 "Lakehouse" 被索引为 "Lake"、"akeh"、"keho"...
41
-
42
- ## 注意事项
43
-
44
- - **只对新写入数据生效**,旧数据不生效
45
- - 旧数据需要生效:执行 `INSERT OVERWRITE table SELECT * FROM table` 重写数据
46
- - 一张表可以创建多个 Bloom Filter 索引
47
- - 目前只支持**单列索引**
48
-
49
- ## 示例(完整流程)
50
-
51
- ```sql
52
- -- 建表时指定
53
- CREATE TABLE t (
54
- order_id INT,
55
- customer_id INT,
56
- INDEX order_id_index (order_id) BLOOMFILTER COMMENT 'BLOOMFILTER'
57
- );
58
-
59
- -- 查看索引
60
- SHOW INDEX FROM t;
61
-
62
- -- 查看索引详情
63
- DESC INDEX order_id_index;
64
-
65
- -- 删除索引
66
- DROP INDEX order_id_index;
67
- ```
@@ -1,73 +0,0 @@
1
- # 索引管理命令参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/build-inverted-index、DROP-INDEX、SHOW-INDEX、DESC-INDEX
4
-
5
- ---
6
-
7
- ## BUILD INDEX(为存量数据构建索引)
8
-
9
- 支持向量索引和倒排索引,**不支持 Bloom Filter**。
10
-
11
- ```sql
12
- -- 全表构建
13
- BUILD INDEX index_name ON [schema.]table_name;
14
-
15
- -- 指定分区构建(支持 =, !=, >, >=, <, <=)
16
- BUILD INDEX index_name ON table_name
17
- WHERE partition_col1 = '2024-01-01' AND partition_col2 = 'us';
18
- ```
19
-
20
- 说明:
21
- - `BUILD INDEX` 是**同步任务**,执行过程消耗计算资源
22
- - 大分区表建议**按分区逐批**构建,避免单次消耗过多资源
23
- - 进度可通过 Job Profile 查看
24
-
25
- ---
26
-
27
- ## DROP INDEX(删除索引)
28
-
29
- ```sql
30
- DROP INDEX [IF EXISTS] index_name;
31
- ```
32
-
33
- 注意:删除索引**不会立即释放存储空间**,后续新增数据不再构建该索引数据。
34
-
35
- ---
36
-
37
- ## SHOW INDEX(列出表的所有索引)
38
-
39
- ```sql
40
- SHOW INDEX [IN|FROM] [schema.]table_name [LIMIT num];
41
- ```
42
-
43
- 示例:
44
- ```sql
45
- SHOW INDEX FROM orders;
46
- SHOW INDEX FROM my_schema.orders;
47
- ```
48
-
49
- ---
50
-
51
- ## DESC INDEX(查看索引详情)
52
-
53
- ```sql
54
- DESC INDEX [EXTENDED] index_name;
55
- ```
56
-
57
- - 基础模式:显示名称、创建时间、类型、所属表、列名
58
- - `EXTENDED`:额外显示索引大小(倒排索引支持,Bloom Filter 暂不支持)
59
-
60
- 示例输出:
61
- ```
62
- +--------------------------+--------------------------+
63
- | info_name | info_value |
64
- +--------------------------+--------------------------+
65
- | name | order_year_index |
66
- | creator | my_user |
67
- | created_time | 2024-12-27 10:51:58.977 |
68
- | index_type | inverted |
69
- | table_name | t |
70
- | table_column | order_year |
71
- | total_index_size | 296 |
72
- +--------------------------+--------------------------+
73
- ```
@@ -1,80 +0,0 @@
1
- # 倒排索引参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/create-inverted-index
4
-
5
- ## 适用场景
6
-
7
- 文本搜索、关键词匹配。支持数值、日期、字符串列。字符串列必须指定分词器。
8
-
9
- ## 分词器选择
10
-
11
- | 分词器 | 适用场景 | 说明 |
12
- |---|---|---|
13
- | `keyword` | 精确匹配 | 不分词,整个字符串作为一个词根 |
14
- | `english` | 英文文本 | 识别连续 ASCII 字母和数字,转小写 |
15
- | `chinese` | 中英文混合 | 识别中文和英文,过滤标点,英文转小写 |
16
- | `unicode` | 多语言 | 基于 Unicode 文本分割算法,支持多语言 |
17
-
18
- 数值和日期类型**不需要**指定 PROPERTIES。
19
-
20
- ## 建表时创建
21
-
22
- ```sql
23
- CREATE TABLE articles (
24
- id INT,
25
- title STRING,
26
- content STRING,
27
- INDEX id_idx (id) INVERTED,
28
- INDEX title_idx (title) INVERTED PROPERTIES('analyzer'='chinese'),
29
- INDEX content_idx (content) INVERTED PROPERTIES('analyzer'='english')
30
- );
31
- ```
32
-
33
- ## 已有表添加
34
-
35
- ```sql
36
- CREATE INVERTED INDEX [IF NOT EXISTS] index_name
37
- ON TABLE [schema.]table_name(column_name)
38
- [COMMENT 'comment']
39
- [PROPERTIES('analyzer'='english|chinese|keyword|unicode')];
40
- ```
41
-
42
- ## 注意事项
43
-
44
- - **只对新写入数据生效**,旧数据需用 `BUILD INDEX` 命令补建
45
- - 只支持**单列索引**
46
-
47
- ## 查询语法
48
-
49
- ```sql
50
- -- 匹配任意词(OR)
51
- SELECT * FROM articles WHERE match_any(content, 'keyword1 keyword2');
52
-
53
- -- 匹配所有词(AND)
54
- SELECT * FROM articles WHERE match_all(content, 'keyword1 keyword2');
55
- ```
56
-
57
- ## 完整示例
58
-
59
- ```sql
60
- -- 建表
61
- CREATE TABLE t (
62
- order_id INT,
63
- order_year STRING,
64
- INDEX order_id_index (order_id) INVERTED COMMENT 'INVERTED'
65
- );
66
-
67
- -- 给已有列添加索引
68
- CREATE INVERTED INDEX order_year_index
69
- ON TABLE public.t(order_year)
70
- PROPERTIES('analyzer'='chinese');
71
-
72
- -- 对存量数据构建索引
73
- BUILD INDEX order_year_index ON public.t;
74
-
75
- -- 查询
76
- SELECT * FROM t WHERE match_all(order_year, '2023');
77
-
78
- -- 查看索引详情
79
- DESC INDEX EXTENDED order_year_index;
80
- ```
@@ -1,81 +0,0 @@
1
- # 向量索引参考
2
-
3
- > 来源:https://www.yunqi.tech/documents/create-vector-index
4
-
5
- ## 适用场景
6
-
7
- 语义相似度搜索、RAG 检索、推荐系统。基于 HNSW 算法。
8
-
9
- ## 建表时创建
10
-
11
- ```sql
12
- CREATE TABLE embeddings (
13
- id INT,
14
- vec VECTOR(FLOAT, 512),
15
- INDEX vec_idx (vec) USING VECTOR PROPERTIES(
16
- "scalar.type" = "f32",
17
- "distance.function" = "l2_distance"
18
- )
19
- );
20
- ```
21
-
22
- ## 已有表添加
23
-
24
- ```sql
25
- CREATE VECTOR INDEX [IF NOT EXISTS] index_name
26
- ON TABLE [schema.]table_name(column_name)
27
- PROPERTIES(
28
- "property1" = "value1",
29
- ...
30
- );
31
- ```
32
-
33
- ## PROPERTIES 参数说明
34
-
35
- | 参数 | 可选值 | 默认值 | 说明 |
36
- |---|---|---|---|
37
- | `distance.function` | `l2_distance`, `cosine_distance`, `jaccard_distance`, `hamming_distance` | `cosine_distance` | 距离函数 |
38
- | `scalar.type` | `f32`, `f16`, `i8`, `b1` | `f32` | 向量元素类型 |
39
- | `m` | 建议不超过 1000 | `16` | HNSW 最大邻居数 |
40
- | `ef.construction` | 建议不超过 5000 | `128` | HNSW 构建时候选集大小 |
41
- | `reuse.vector.column` | `true`, `false` | `false` | 复用 vector column 数据节省存储 |
42
- | `compress.codec` | `uncompressed`, `zstd`, `lz4` | `uncompressed` | 压缩算法(复用 column 时不生效) |
43
- | `compress.level` | `fastest`, `default`, `best` | `default` | 压缩级别 |
44
-
45
- ## 向量列类型与索引元素类型对应
46
-
47
- | 索引元素类型(scalar.type) | 支持的向量列类型 |
48
- |---|---|
49
- | `f32` | int, float |
50
- | `f16` | int, float |
51
- | `i8` | tinyint, int, float |
52
- | `b1` | tinyint, int, float(按位建索引需设 `conversion.rule=as_bits`) |
53
-
54
- ## 注意事项
55
-
56
- - **只对新写入数据生效**,旧数据需用 `BUILD INDEX` 命令补建
57
-
58
- ## 完整示例
59
-
60
- ```sql
61
- -- 建表时创建向量索引
62
- CREATE TABLE test_vector (
63
- vec VECTOR(FLOAT, 4),
64
- id INT,
65
- INDEX vec_idx (vec) USING VECTOR PROPERTIES(
66
- "scalar.type" = "f32",
67
- "distance.function" = "l2_distance"
68
- )
69
- );
70
-
71
- -- 已有表添加向量索引
72
- CREATE VECTOR INDEX vec_idx
73
- ON TABLE public.test_vector(vec)
74
- PROPERTIES(
75
- "scalar.type" = "f32",
76
- "distance.function" = "cosine_distance"
77
- );
78
-
79
- -- 对存量数据构建索引
80
- BUILD INDEX vec_idx ON public.test_vector;
81
- ```