chromadb-experimental 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/chromadb/admin_client.rb +6 -0
- data/lib/chromadb/client.rb +317 -0
- data/lib/chromadb/collection.rb +573 -0
- data/lib/chromadb/embedding_functions/chroma_bm25.rb +459 -0
- data/lib/chromadb/embedding_functions/chroma_cloud_qwen.rb +139 -0
- data/lib/chromadb/embedding_functions/chroma_cloud_splade.rb +121 -0
- data/lib/chromadb/embedding_functions.rb +121 -0
- data/lib/chromadb/errors.rb +120 -0
- data/lib/chromadb/http_client.rb +142 -0
- data/lib/chromadb/openapi/lib/chromadb/api/default_api.rb +2349 -0
- data/lib/chromadb/openapi/lib/chromadb/api_client.rb +392 -0
- data/lib/chromadb/openapi/lib/chromadb/api_error.rb +58 -0
- data/lib/chromadb/openapi/lib/chromadb/configuration.rb +295 -0
- data/lib/chromadb/openapi/lib/chromadb/models/add_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attach_function_request.rb +250 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attach_function_response.rb +235 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attached_function_api_response.rb +361 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attached_function_info.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/bool_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/bool_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/checklist_response.rb +245 -0
- data/lib/chromadb/openapi/lib/chromadb/models/collection.rb +315 -0
- data/lib/chromadb/openapi/lib/chromadb/models/collection_configuration.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_collection_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_database_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_tenant_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/database.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/detach_function_request.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/detach_function_response.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/embedding_function_new_configuration.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/error_response.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_list_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/fork_collection_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/fts_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_attached_function_response.rb +224 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_response.rb +270 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_tenant_response.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_user_identity_response.rb +246 -0
- data/lib/chromadb/openapi/lib/chromadb/models/heartbeat_response.rb +235 -0
- data/lib/chromadb/openapi/lib/chromadb/models/hnsw_configuration.rb +330 -0
- data/lib/chromadb/openapi/lib/chromadb/models/hnsw_index_config.rb +371 -0
- data/lib/chromadb/openapi/lib/chromadb/models/include.rb +210 -0
- data/lib/chromadb/openapi/lib/chromadb/models/int_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/int_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/query_response.rb +280 -0
- data/lib/chromadb/openapi/lib/chromadb/models/raw_where_fields.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/schema.rb +258 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload.rb +256 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_filter.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_group_by.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_limit.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_select.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_request_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_response.rb +270 -0
- data/lib/chromadb/openapi/lib/chromadb/models/space.rb +210 -0
- data/lib/chromadb/openapi/lib/chromadb/models/spann_configuration.rb +420 -0
- data/lib/chromadb/openapi/lib/chromadb/models/spann_index_config.rb +536 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector.rb +244 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_config.rb +242 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_type.rb +234 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/string_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/string_value_type.rb +231 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_configuration.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_payload.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_hnsw_configuration.rb +345 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_spann_configuration.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_tenant_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/upsert_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/value_types.rb +271 -0
- data/lib/chromadb/openapi/lib/chromadb/models/vector_index_config.rb +261 -0
- data/lib/chromadb/openapi/lib/chromadb/models/vector_index_type.rb +234 -0
- data/lib/chromadb/openapi/lib/chromadb/version.rb +15 -0
- data/lib/chromadb/openapi/lib/chromadb.rb +102 -0
- data/lib/chromadb/openapi.rb +6 -0
- data/lib/chromadb/schema.rb +744 -0
- data/lib/chromadb/schemas/chroma-cloud-qwen.json +61 -0
- data/lib/chromadb/schemas/chroma-cloud-splade.json +31 -0
- data/lib/chromadb/schemas/chroma_bm25.json +37 -0
- data/lib/chromadb/search/key.rb +94 -0
- data/lib/chromadb/search/limit.rb +41 -0
- data/lib/chromadb/search/rank.rb +425 -0
- data/lib/chromadb/search/search.rb +73 -0
- data/lib/chromadb/search/select.rb +54 -0
- data/lib/chromadb/search/where.rb +157 -0
- data/lib/chromadb/search.rb +8 -0
- data/lib/chromadb/types/results.rb +96 -0
- data/lib/chromadb/types/sparse_vector.rb +86 -0
- data/lib/chromadb/types/validation.rb +519 -0
- data/lib/chromadb/types.rb +13 -0
- data/lib/chromadb/version.rb +5 -0
- data/lib/chromadb.rb +15 -0
- metadata +233 -0
|
@@ -0,0 +1,744 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Chroma
|
|
4
|
+
DOCUMENT_KEY = "#document"
|
|
5
|
+
EMBEDDING_KEY = "#embedding"
|
|
6
|
+
|
|
7
|
+
STRING_VALUE_NAME = "string"
|
|
8
|
+
FLOAT_LIST_VALUE_NAME = "float_list"
|
|
9
|
+
SPARSE_VECTOR_VALUE_NAME = "sparse_vector"
|
|
10
|
+
INT_VALUE_NAME = "int"
|
|
11
|
+
FLOAT_VALUE_NAME = "float"
|
|
12
|
+
BOOL_VALUE_NAME = "bool"
|
|
13
|
+
|
|
14
|
+
FTS_INDEX_NAME = "fts_index"
|
|
15
|
+
STRING_INVERTED_INDEX_NAME = "string_inverted_index"
|
|
16
|
+
VECTOR_INDEX_NAME = "vector_index"
|
|
17
|
+
SPARSE_VECTOR_INDEX_NAME = "sparse_vector_index"
|
|
18
|
+
INT_INVERTED_INDEX_NAME = "int_inverted_index"
|
|
19
|
+
FLOAT_INVERTED_INDEX_NAME = "float_inverted_index"
|
|
20
|
+
BOOL_INVERTED_INDEX_NAME = "bool_inverted_index"
|
|
21
|
+
|
|
22
|
+
class FtsIndexConfig
|
|
23
|
+
def type = "FtsIndexConfig"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
class StringInvertedIndexConfig
|
|
27
|
+
def type = "StringInvertedIndexConfig"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
class IntInvertedIndexConfig
|
|
31
|
+
def type = "IntInvertedIndexConfig"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
class FloatInvertedIndexConfig
|
|
35
|
+
def type = "FloatInvertedIndexConfig"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
class BoolInvertedIndexConfig
|
|
39
|
+
def type = "BoolInvertedIndexConfig"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
class VectorIndexConfig
|
|
43
|
+
attr_accessor :space, :embedding_function, :source_key, :hnsw, :spann
|
|
44
|
+
|
|
45
|
+
def initialize(space: nil, embedding_function: nil, source_key: nil, hnsw: nil, spann: nil)
|
|
46
|
+
@space = space
|
|
47
|
+
@embedding_function = embedding_function
|
|
48
|
+
@source_key = source_key.respond_to?(:name) ? source_key.name : source_key
|
|
49
|
+
@hnsw = hnsw
|
|
50
|
+
@spann = spann
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def type = "VectorIndexConfig"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
class SparseVectorIndexConfig
|
|
57
|
+
attr_accessor :embedding_function, :source_key, :bm25
|
|
58
|
+
|
|
59
|
+
def initialize(embedding_function: nil, source_key: nil, bm25: nil)
|
|
60
|
+
@embedding_function = embedding_function
|
|
61
|
+
@source_key = source_key.respond_to?(:name) ? source_key.name : source_key
|
|
62
|
+
@bm25 = bm25
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def type = "SparseVectorIndexConfig"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
class FtsIndexType
|
|
69
|
+
attr_accessor :enabled, :config
|
|
70
|
+
|
|
71
|
+
def initialize(enabled, config)
|
|
72
|
+
@enabled = enabled
|
|
73
|
+
@config = config
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
class StringInvertedIndexType
|
|
78
|
+
attr_accessor :enabled, :config
|
|
79
|
+
|
|
80
|
+
def initialize(enabled, config)
|
|
81
|
+
@enabled = enabled
|
|
82
|
+
@config = config
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
class VectorIndexType
|
|
87
|
+
attr_accessor :enabled, :config
|
|
88
|
+
|
|
89
|
+
def initialize(enabled, config)
|
|
90
|
+
@enabled = enabled
|
|
91
|
+
@config = config
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
class SparseVectorIndexType
|
|
96
|
+
attr_accessor :enabled, :config
|
|
97
|
+
|
|
98
|
+
def initialize(enabled, config)
|
|
99
|
+
@enabled = enabled
|
|
100
|
+
@config = config
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
class IntInvertedIndexType
|
|
105
|
+
attr_accessor :enabled, :config
|
|
106
|
+
|
|
107
|
+
def initialize(enabled, config)
|
|
108
|
+
@enabled = enabled
|
|
109
|
+
@config = config
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
class FloatInvertedIndexType
|
|
114
|
+
attr_accessor :enabled, :config
|
|
115
|
+
|
|
116
|
+
def initialize(enabled, config)
|
|
117
|
+
@enabled = enabled
|
|
118
|
+
@config = config
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
class BoolInvertedIndexType
|
|
123
|
+
attr_accessor :enabled, :config
|
|
124
|
+
|
|
125
|
+
def initialize(enabled, config)
|
|
126
|
+
@enabled = enabled
|
|
127
|
+
@config = config
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
class StringValueType
|
|
132
|
+
attr_accessor :fts_index, :string_inverted_index
|
|
133
|
+
|
|
134
|
+
def initialize(fts_index = nil, string_inverted_index = nil)
|
|
135
|
+
@fts_index = fts_index
|
|
136
|
+
@string_inverted_index = string_inverted_index
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
class FloatListValueType
|
|
141
|
+
attr_accessor :vector_index
|
|
142
|
+
|
|
143
|
+
def initialize(vector_index = nil)
|
|
144
|
+
@vector_index = vector_index
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
class SparseVectorValueType
|
|
149
|
+
attr_accessor :sparse_vector_index
|
|
150
|
+
|
|
151
|
+
def initialize(sparse_vector_index = nil)
|
|
152
|
+
@sparse_vector_index = sparse_vector_index
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
class IntValueType
|
|
157
|
+
attr_accessor :int_inverted_index
|
|
158
|
+
|
|
159
|
+
def initialize(int_inverted_index = nil)
|
|
160
|
+
@int_inverted_index = int_inverted_index
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
class FloatValueType
|
|
165
|
+
attr_accessor :float_inverted_index
|
|
166
|
+
|
|
167
|
+
def initialize(float_inverted_index = nil)
|
|
168
|
+
@float_inverted_index = float_inverted_index
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
class BoolValueType
|
|
173
|
+
attr_accessor :bool_inverted_index
|
|
174
|
+
|
|
175
|
+
def initialize(bool_inverted_index = nil)
|
|
176
|
+
@bool_inverted_index = bool_inverted_index
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
class ValueTypes
|
|
181
|
+
attr_accessor :string, :float_list, :sparse_vector, :int_value, :float_value, :boolean
|
|
182
|
+
|
|
183
|
+
def initialize
|
|
184
|
+
@string = nil
|
|
185
|
+
@float_list = nil
|
|
186
|
+
@sparse_vector = nil
|
|
187
|
+
@int_value = nil
|
|
188
|
+
@float_value = nil
|
|
189
|
+
@boolean = nil
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
class Schema
|
|
194
|
+
attr_accessor :defaults, :keys
|
|
195
|
+
|
|
196
|
+
def initialize
|
|
197
|
+
@defaults = ValueTypes.new
|
|
198
|
+
@keys = {}
|
|
199
|
+
initialize_defaults
|
|
200
|
+
initialize_keys
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def create_index(config: nil, key: nil)
|
|
204
|
+
config_provided = !config.nil?
|
|
205
|
+
key_provided = !key.nil?
|
|
206
|
+
|
|
207
|
+
if !config_provided && !key_provided
|
|
208
|
+
raise ArgumentError,
|
|
209
|
+
"Cannot enable all index types globally. Must specify either config or key."
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
if key_provided && [ EMBEDDING_KEY, DOCUMENT_KEY ].include?(key)
|
|
213
|
+
raise ArgumentError,
|
|
214
|
+
"Cannot create index on special key '#{key}'. These keys are managed automatically by the system."
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
if config.is_a?(VectorIndexConfig)
|
|
218
|
+
if !key_provided
|
|
219
|
+
set_vector_index_config(config)
|
|
220
|
+
return self
|
|
221
|
+
end
|
|
222
|
+
raise ArgumentError,
|
|
223
|
+
"Vector index cannot be enabled on specific keys. Use create_index without key to configure globally."
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
if config.is_a?(FtsIndexConfig)
|
|
227
|
+
if !key_provided
|
|
228
|
+
set_fts_index_config(config)
|
|
229
|
+
return self
|
|
230
|
+
end
|
|
231
|
+
raise ArgumentError,
|
|
232
|
+
"FTS index cannot be enabled on specific keys. Use create_index without key to configure globally."
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
if config.is_a?(SparseVectorIndexConfig) && !key_provided
|
|
236
|
+
raise ArgumentError,
|
|
237
|
+
"Sparse vector index must be created on a specific key. Please specify a key using create_index(config: SparseVectorIndexConfig.new, key: 'your_key')"
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
if !config_provided && key_provided
|
|
241
|
+
raise ArgumentError,
|
|
242
|
+
"Cannot enable all index types for key '#{key}'. Please specify a specific index configuration."
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
if config_provided && !key_provided
|
|
246
|
+
set_index_in_defaults(config, true)
|
|
247
|
+
elsif config_provided && key_provided
|
|
248
|
+
set_index_for_key(key, config, true)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
self
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def delete_index(config: nil, key: nil)
|
|
255
|
+
config_provided = !config.nil?
|
|
256
|
+
key_provided = !key.nil?
|
|
257
|
+
|
|
258
|
+
if !config_provided && !key_provided
|
|
259
|
+
raise ArgumentError,
|
|
260
|
+
"Cannot disable all indexes. Must specify either config or key."
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
if key_provided && [ EMBEDDING_KEY, DOCUMENT_KEY ].include?(key)
|
|
264
|
+
raise ArgumentError,
|
|
265
|
+
"Cannot delete index on special key '#{key}'. These keys are managed automatically by the system."
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
if config.is_a?(VectorIndexConfig)
|
|
269
|
+
raise ArgumentError, "Deleting vector index is not currently supported."
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
if config.is_a?(FtsIndexConfig)
|
|
273
|
+
raise ArgumentError, "Deleting FTS index is not currently supported."
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
if config.is_a?(SparseVectorIndexConfig)
|
|
277
|
+
raise ArgumentError, "Deleting sparse vector index is not currently supported."
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
if key_provided && !config_provided
|
|
281
|
+
raise ArgumentError,
|
|
282
|
+
"Cannot disable all index types for key '#{key}'. Please specify a specific index configuration."
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
if key_provided && config_provided
|
|
286
|
+
set_index_for_key(key, config, false)
|
|
287
|
+
elsif !key_provided && config_provided
|
|
288
|
+
set_index_in_defaults(config, false)
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
self
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def serialize_to_json
|
|
295
|
+
defaults = serialize_value_types(@defaults)
|
|
296
|
+
keys = {}
|
|
297
|
+
@keys.each do |key_name, value_types|
|
|
298
|
+
keys[key_name] = serialize_value_types(value_types)
|
|
299
|
+
end
|
|
300
|
+
{ "defaults" => defaults, "keys" => keys }
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def self.deserialize_from_json(json, client: nil)
|
|
304
|
+
return nil if json.nil?
|
|
305
|
+
data = json
|
|
306
|
+
instance = allocate
|
|
307
|
+
instance.defaults = deserialize_value_types(data["defaults"] || {}, client: client)
|
|
308
|
+
instance.keys = {}
|
|
309
|
+
(data["keys"] || {}).each do |key_name, value|
|
|
310
|
+
instance.keys[key_name] = deserialize_value_types(value, client: client)
|
|
311
|
+
end
|
|
312
|
+
instance
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def resolve_embedding_function
|
|
316
|
+
override = @keys[EMBEDDING_KEY]&.float_list&.vector_index&.config&.embedding_function
|
|
317
|
+
return override if override
|
|
318
|
+
|
|
319
|
+
@defaults.float_list&.vector_index&.config&.embedding_function
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
private
|
|
323
|
+
|
|
324
|
+
def set_vector_index_config(config)
|
|
325
|
+
defaults_float_list = ensure_float_list_value_type(@defaults)
|
|
326
|
+
current_vector = defaults_float_list.vector_index || VectorIndexType.new(false, VectorIndexConfig.new)
|
|
327
|
+
defaults_float_list.vector_index = VectorIndexType.new(
|
|
328
|
+
current_vector.enabled,
|
|
329
|
+
VectorIndexConfig.new(
|
|
330
|
+
space: config.space,
|
|
331
|
+
embedding_function: config.embedding_function,
|
|
332
|
+
source_key: config.source_key,
|
|
333
|
+
hnsw: deep_clone(config.hnsw),
|
|
334
|
+
spann: deep_clone(config.spann),
|
|
335
|
+
),
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
embedding_value_types = ensure_value_types(@keys[EMBEDDING_KEY])
|
|
339
|
+
@keys[EMBEDDING_KEY] = embedding_value_types
|
|
340
|
+
override_float_list = ensure_float_list_value_type(embedding_value_types)
|
|
341
|
+
current_override = override_float_list.vector_index || VectorIndexType.new(true, VectorIndexConfig.new(source_key: DOCUMENT_KEY))
|
|
342
|
+
preserved_source_key = current_override.config.source_key || DOCUMENT_KEY
|
|
343
|
+
override_float_list.vector_index = VectorIndexType.new(
|
|
344
|
+
current_override.enabled,
|
|
345
|
+
VectorIndexConfig.new(
|
|
346
|
+
space: config.space,
|
|
347
|
+
embedding_function: config.embedding_function,
|
|
348
|
+
source_key: preserved_source_key,
|
|
349
|
+
hnsw: deep_clone(config.hnsw),
|
|
350
|
+
spann: deep_clone(config.spann),
|
|
351
|
+
),
|
|
352
|
+
)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def set_fts_index_config(config)
|
|
356
|
+
defaults_string = ensure_string_value_type(@defaults)
|
|
357
|
+
current_defaults = defaults_string.fts_index || FtsIndexType.new(false, FtsIndexConfig.new)
|
|
358
|
+
defaults_string.fts_index = FtsIndexType.new(current_defaults.enabled, config)
|
|
359
|
+
|
|
360
|
+
document_value_types = ensure_value_types(@keys[DOCUMENT_KEY])
|
|
361
|
+
@keys[DOCUMENT_KEY] = document_value_types
|
|
362
|
+
override_string = ensure_string_value_type(document_value_types)
|
|
363
|
+
current_override = override_string.fts_index || FtsIndexType.new(true, FtsIndexConfig.new)
|
|
364
|
+
override_string.fts_index = FtsIndexType.new(current_override.enabled, config)
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def set_index_in_defaults(config, enabled)
|
|
368
|
+
case config
|
|
369
|
+
when FtsIndexConfig
|
|
370
|
+
ensure_string_value_type(@defaults).fts_index = FtsIndexType.new(enabled, config)
|
|
371
|
+
when StringInvertedIndexConfig
|
|
372
|
+
ensure_string_value_type(@defaults).string_inverted_index = StringInvertedIndexType.new(enabled, config)
|
|
373
|
+
when VectorIndexConfig
|
|
374
|
+
ensure_float_list_value_type(@defaults).vector_index = VectorIndexType.new(enabled, config)
|
|
375
|
+
when SparseVectorIndexConfig
|
|
376
|
+
ensure_sparse_vector_value_type(@defaults).sparse_vector_index = SparseVectorIndexType.new(enabled, config)
|
|
377
|
+
when IntInvertedIndexConfig
|
|
378
|
+
ensure_int_value_type(@defaults).int_inverted_index = IntInvertedIndexType.new(enabled, config)
|
|
379
|
+
when FloatInvertedIndexConfig
|
|
380
|
+
ensure_float_value_type(@defaults).float_inverted_index = FloatInvertedIndexType.new(enabled, config)
|
|
381
|
+
when BoolInvertedIndexConfig
|
|
382
|
+
ensure_bool_value_type(@defaults).bool_inverted_index = BoolInvertedIndexType.new(enabled, config)
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
def set_index_for_key(key, config, enabled)
|
|
387
|
+
if config.is_a?(SparseVectorIndexConfig) && enabled
|
|
388
|
+
validate_single_sparse_vector_index(key)
|
|
389
|
+
validate_sparse_vector_config(config)
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
current = @keys[key] = ensure_value_types(@keys[key])
|
|
393
|
+
|
|
394
|
+
case config
|
|
395
|
+
when StringInvertedIndexConfig
|
|
396
|
+
ensure_string_value_type(current).string_inverted_index = StringInvertedIndexType.new(enabled, config)
|
|
397
|
+
when FtsIndexConfig
|
|
398
|
+
ensure_string_value_type(current).fts_index = FtsIndexType.new(enabled, config)
|
|
399
|
+
when SparseVectorIndexConfig
|
|
400
|
+
ensure_sparse_vector_value_type(current).sparse_vector_index = SparseVectorIndexType.new(enabled, config)
|
|
401
|
+
when VectorIndexConfig
|
|
402
|
+
ensure_float_list_value_type(current).vector_index = VectorIndexType.new(enabled, config)
|
|
403
|
+
when IntInvertedIndexConfig
|
|
404
|
+
ensure_int_value_type(current).int_inverted_index = IntInvertedIndexType.new(enabled, config)
|
|
405
|
+
when FloatInvertedIndexConfig
|
|
406
|
+
ensure_float_value_type(current).float_inverted_index = FloatInvertedIndexType.new(enabled, config)
|
|
407
|
+
when BoolInvertedIndexConfig
|
|
408
|
+
ensure_bool_value_type(current).bool_inverted_index = BoolInvertedIndexType.new(enabled, config)
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def validate_single_sparse_vector_index(target_key)
|
|
413
|
+
@keys.each do |existing_key, value_types|
|
|
414
|
+
next if existing_key == target_key
|
|
415
|
+
sparse_index = value_types.sparse_vector&.sparse_vector_index
|
|
416
|
+
if sparse_index&.enabled
|
|
417
|
+
raise ArgumentError,
|
|
418
|
+
"Cannot enable sparse vector index on key '#{target_key}'. A sparse vector index is already enabled on key '#{existing_key}'. Only one sparse vector index is allowed per collection."
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def validate_sparse_vector_config(config)
|
|
424
|
+
if config.source_key && config.embedding_function.nil?
|
|
425
|
+
raise ArgumentError,
|
|
426
|
+
"If source_key is provided then embedding_function must also be provided since there is no default embedding function."
|
|
427
|
+
end
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
def initialize_defaults
|
|
431
|
+
@defaults.string = StringValueType.new(
|
|
432
|
+
FtsIndexType.new(false, FtsIndexConfig.new),
|
|
433
|
+
StringInvertedIndexType.new(true, StringInvertedIndexConfig.new),
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
@defaults.float_list = FloatListValueType.new(
|
|
437
|
+
VectorIndexType.new(false, VectorIndexConfig.new),
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
@defaults.sparse_vector = SparseVectorValueType.new(
|
|
441
|
+
SparseVectorIndexType.new(false, SparseVectorIndexConfig.new),
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
@defaults.int_value = IntValueType.new(
|
|
445
|
+
IntInvertedIndexType.new(true, IntInvertedIndexConfig.new),
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
@defaults.float_value = FloatValueType.new(
|
|
449
|
+
FloatInvertedIndexType.new(true, FloatInvertedIndexConfig.new),
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
@defaults.boolean = BoolValueType.new(
|
|
453
|
+
BoolInvertedIndexType.new(true, BoolInvertedIndexConfig.new),
|
|
454
|
+
)
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
def initialize_keys
|
|
458
|
+
@keys[DOCUMENT_KEY] = ValueTypes.new
|
|
459
|
+
@keys[DOCUMENT_KEY].string = StringValueType.new(
|
|
460
|
+
FtsIndexType.new(true, FtsIndexConfig.new),
|
|
461
|
+
StringInvertedIndexType.new(false, StringInvertedIndexConfig.new),
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
@keys[EMBEDDING_KEY] = ValueTypes.new
|
|
465
|
+
@keys[EMBEDDING_KEY].float_list = FloatListValueType.new(
|
|
466
|
+
VectorIndexType.new(true, VectorIndexConfig.new(source_key: DOCUMENT_KEY)),
|
|
467
|
+
)
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
def serialize_value_types(value_types)
|
|
471
|
+
result = {}
|
|
472
|
+
if value_types.string
|
|
473
|
+
serialized = serialize_string_value_type(value_types.string)
|
|
474
|
+
result[STRING_VALUE_NAME] = serialized unless serialized.empty?
|
|
475
|
+
end
|
|
476
|
+
if value_types.float_list
|
|
477
|
+
serialized = serialize_float_list_value_type(value_types.float_list)
|
|
478
|
+
result[FLOAT_LIST_VALUE_NAME] = serialized unless serialized.empty?
|
|
479
|
+
end
|
|
480
|
+
if value_types.sparse_vector
|
|
481
|
+
serialized = serialize_sparse_vector_value_type(value_types.sparse_vector)
|
|
482
|
+
result[SPARSE_VECTOR_VALUE_NAME] = serialized unless serialized.empty?
|
|
483
|
+
end
|
|
484
|
+
if value_types.int_value
|
|
485
|
+
serialized = serialize_int_value_type(value_types.int_value)
|
|
486
|
+
result[INT_VALUE_NAME] = serialized unless serialized.empty?
|
|
487
|
+
end
|
|
488
|
+
if value_types.float_value
|
|
489
|
+
serialized = serialize_float_value_type(value_types.float_value)
|
|
490
|
+
result[FLOAT_VALUE_NAME] = serialized unless serialized.empty?
|
|
491
|
+
end
|
|
492
|
+
if value_types.boolean
|
|
493
|
+
serialized = serialize_bool_value_type(value_types.boolean)
|
|
494
|
+
result[BOOL_VALUE_NAME] = serialized unless serialized.empty?
|
|
495
|
+
end
|
|
496
|
+
result
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
def serialize_string_value_type(value_type)
|
|
500
|
+
result = {}
|
|
501
|
+
if value_type.fts_index
|
|
502
|
+
result[FTS_INDEX_NAME] = {
|
|
503
|
+
"enabled" => value_type.fts_index.enabled,
|
|
504
|
+
"config" => {}
|
|
505
|
+
}
|
|
506
|
+
end
|
|
507
|
+
if value_type.string_inverted_index
|
|
508
|
+
result[STRING_INVERTED_INDEX_NAME] = {
|
|
509
|
+
"enabled" => value_type.string_inverted_index.enabled,
|
|
510
|
+
"config" => {}
|
|
511
|
+
}
|
|
512
|
+
end
|
|
513
|
+
result
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def serialize_float_list_value_type(value_type)
|
|
517
|
+
result = {}
|
|
518
|
+
if value_type.vector_index
|
|
519
|
+
result[VECTOR_INDEX_NAME] = {
|
|
520
|
+
"enabled" => value_type.vector_index.enabled,
|
|
521
|
+
"config" => serialize_config(value_type.vector_index.config)
|
|
522
|
+
}
|
|
523
|
+
end
|
|
524
|
+
result
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
def serialize_sparse_vector_value_type(value_type)
|
|
528
|
+
result = {}
|
|
529
|
+
if value_type.sparse_vector_index
|
|
530
|
+
result[SPARSE_VECTOR_INDEX_NAME] = {
|
|
531
|
+
"enabled" => value_type.sparse_vector_index.enabled,
|
|
532
|
+
"config" => serialize_config(value_type.sparse_vector_index.config)
|
|
533
|
+
}
|
|
534
|
+
end
|
|
535
|
+
result
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
def serialize_int_value_type(value_type)
|
|
539
|
+
result = {}
|
|
540
|
+
if value_type.int_inverted_index
|
|
541
|
+
result[INT_INVERTED_INDEX_NAME] = {
|
|
542
|
+
"enabled" => value_type.int_inverted_index.enabled,
|
|
543
|
+
"config" => {}
|
|
544
|
+
}
|
|
545
|
+
end
|
|
546
|
+
result
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
def serialize_float_value_type(value_type)
|
|
550
|
+
result = {}
|
|
551
|
+
if value_type.float_inverted_index
|
|
552
|
+
result[FLOAT_INVERTED_INDEX_NAME] = {
|
|
553
|
+
"enabled" => value_type.float_inverted_index.enabled,
|
|
554
|
+
"config" => {}
|
|
555
|
+
}
|
|
556
|
+
end
|
|
557
|
+
result
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
def serialize_bool_value_type(value_type)
|
|
561
|
+
result = {}
|
|
562
|
+
if value_type.bool_inverted_index
|
|
563
|
+
result[BOOL_INVERTED_INDEX_NAME] = {
|
|
564
|
+
"enabled" => value_type.bool_inverted_index.enabled,
|
|
565
|
+
"config" => {}
|
|
566
|
+
}
|
|
567
|
+
end
|
|
568
|
+
result
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
def serialize_config(config)
|
|
572
|
+
case config
|
|
573
|
+
when VectorIndexConfig
|
|
574
|
+
serialize_vector_config(config)
|
|
575
|
+
when SparseVectorIndexConfig
|
|
576
|
+
serialize_sparse_vector_config(config)
|
|
577
|
+
else
|
|
578
|
+
{}
|
|
579
|
+
end
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
def serialize_vector_config(config)
|
|
583
|
+
serialized = {}
|
|
584
|
+
embedding_function = config.embedding_function
|
|
585
|
+
serialized["embedding_function"] = EmbeddingFunctions.prepare_embedding_function_config(embedding_function)
|
|
586
|
+
|
|
587
|
+
resolved_space = config.space
|
|
588
|
+
if resolved_space.nil? && embedding_function&.respond_to?(:default_space)
|
|
589
|
+
resolved_space = embedding_function.default_space
|
|
590
|
+
end
|
|
591
|
+
serialized["space"] = resolved_space if resolved_space
|
|
592
|
+
serialized["source_key"] = config.source_key if config.source_key
|
|
593
|
+
serialized["hnsw"] = deep_clone(config.hnsw) if config.hnsw
|
|
594
|
+
serialized["spann"] = deep_clone(config.spann) if config.spann
|
|
595
|
+
serialized
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
def serialize_sparse_vector_config(config)
|
|
599
|
+
serialized = {}
|
|
600
|
+
serialized["embedding_function"] = EmbeddingFunctions.prepare_embedding_function_config(config.embedding_function)
|
|
601
|
+
serialized["source_key"] = config.source_key if config.source_key
|
|
602
|
+
serialized["bm25"] = config.bm25 if [ true, false ].include?(config.bm25)
|
|
603
|
+
serialized
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
def self.deserialize_value_types(json, client: nil)
|
|
607
|
+
result = ValueTypes.new
|
|
608
|
+
|
|
609
|
+
result.string = deserialize_string_value_type(json[STRING_VALUE_NAME]) if json[STRING_VALUE_NAME]
|
|
610
|
+
result.float_list = deserialize_float_list_value_type(json[FLOAT_LIST_VALUE_NAME], client: client) if json[FLOAT_LIST_VALUE_NAME]
|
|
611
|
+
result.sparse_vector = deserialize_sparse_vector_value_type(json[SPARSE_VECTOR_VALUE_NAME], client: client) if json[SPARSE_VECTOR_VALUE_NAME]
|
|
612
|
+
result.int_value = deserialize_int_value_type(json[INT_VALUE_NAME]) if json[INT_VALUE_NAME]
|
|
613
|
+
result.float_value = deserialize_float_value_type(json[FLOAT_VALUE_NAME]) if json[FLOAT_VALUE_NAME]
|
|
614
|
+
result.boolean = deserialize_bool_value_type(json[BOOL_VALUE_NAME]) if json[BOOL_VALUE_NAME]
|
|
615
|
+
|
|
616
|
+
result
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
def self.deserialize_string_value_type(json)
|
|
620
|
+
fts_index = nil
|
|
621
|
+
string_index = nil
|
|
622
|
+
if json[FTS_INDEX_NAME]
|
|
623
|
+
cfg = json[FTS_INDEX_NAME]
|
|
624
|
+
fts_index = FtsIndexType.new(cfg["enabled"], FtsIndexConfig.new)
|
|
625
|
+
end
|
|
626
|
+
if json[STRING_INVERTED_INDEX_NAME]
|
|
627
|
+
cfg = json[STRING_INVERTED_INDEX_NAME]
|
|
628
|
+
string_index = StringInvertedIndexType.new(cfg["enabled"], StringInvertedIndexConfig.new)
|
|
629
|
+
end
|
|
630
|
+
StringValueType.new(fts_index, string_index)
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
def self.deserialize_float_list_value_type(json, client: nil)
|
|
634
|
+
vector_index = nil
|
|
635
|
+
if json[VECTOR_INDEX_NAME]
|
|
636
|
+
cfg = json[VECTOR_INDEX_NAME]
|
|
637
|
+
config = deserialize_vector_config(cfg["config"], client: client)
|
|
638
|
+
vector_index = VectorIndexType.new(cfg["enabled"], config)
|
|
639
|
+
end
|
|
640
|
+
FloatListValueType.new(vector_index)
|
|
641
|
+
end
|
|
642
|
+
|
|
643
|
+
def self.deserialize_sparse_vector_value_type(json, client: nil)
|
|
644
|
+
sparse_index = nil
|
|
645
|
+
if json[SPARSE_VECTOR_INDEX_NAME]
|
|
646
|
+
cfg = json[SPARSE_VECTOR_INDEX_NAME]
|
|
647
|
+
config = deserialize_sparse_vector_config(cfg["config"], client: client)
|
|
648
|
+
sparse_index = SparseVectorIndexType.new(cfg["enabled"], config)
|
|
649
|
+
end
|
|
650
|
+
SparseVectorValueType.new(sparse_index)
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
def self.deserialize_int_value_type(json)
|
|
654
|
+
int_index = nil
|
|
655
|
+
if json[INT_INVERTED_INDEX_NAME]
|
|
656
|
+
cfg = json[INT_INVERTED_INDEX_NAME]
|
|
657
|
+
int_index = IntInvertedIndexType.new(cfg["enabled"], IntInvertedIndexConfig.new)
|
|
658
|
+
end
|
|
659
|
+
IntValueType.new(int_index)
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
def self.deserialize_float_value_type(json)
|
|
663
|
+
float_index = nil
|
|
664
|
+
if json[FLOAT_INVERTED_INDEX_NAME]
|
|
665
|
+
cfg = json[FLOAT_INVERTED_INDEX_NAME]
|
|
666
|
+
float_index = FloatInvertedIndexType.new(cfg["enabled"], FloatInvertedIndexConfig.new)
|
|
667
|
+
end
|
|
668
|
+
FloatValueType.new(float_index)
|
|
669
|
+
end
|
|
670
|
+
|
|
671
|
+
def self.deserialize_bool_value_type(json)
|
|
672
|
+
bool_index = nil
|
|
673
|
+
if json[BOOL_INVERTED_INDEX_NAME]
|
|
674
|
+
cfg = json[BOOL_INVERTED_INDEX_NAME]
|
|
675
|
+
bool_index = BoolInvertedIndexType.new(cfg["enabled"], BoolInvertedIndexConfig.new)
|
|
676
|
+
end
|
|
677
|
+
BoolValueType.new(bool_index)
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
def self.deserialize_vector_config(json, client: nil)
|
|
681
|
+
embedding_function = EmbeddingFunctions.build_embedding_function(json["embedding_function"], client: client)
|
|
682
|
+
space = json["space"]
|
|
683
|
+
config = VectorIndexConfig.new(
|
|
684
|
+
space: space,
|
|
685
|
+
embedding_function: embedding_function,
|
|
686
|
+
source_key: json["source_key"],
|
|
687
|
+
hnsw: json["hnsw"],
|
|
688
|
+
spann: json["spann"],
|
|
689
|
+
)
|
|
690
|
+
if config.space.nil? && embedding_function&.respond_to?(:default_space)
|
|
691
|
+
config.space = embedding_function.default_space
|
|
692
|
+
end
|
|
693
|
+
config
|
|
694
|
+
end
|
|
695
|
+
|
|
696
|
+
def self.deserialize_sparse_vector_config(json, client: nil)
|
|
697
|
+
embedding_function = EmbeddingFunctions.build_sparse_embedding_function(json["embedding_function"], client: client)
|
|
698
|
+
SparseVectorIndexConfig.new(
|
|
699
|
+
embedding_function: embedding_function,
|
|
700
|
+
source_key: json["source_key"],
|
|
701
|
+
bm25: json["bm25"],
|
|
702
|
+
)
|
|
703
|
+
end
|
|
704
|
+
|
|
705
|
+
def deep_clone(value)
|
|
706
|
+
return nil if value.nil?
|
|
707
|
+
Marshal.load(Marshal.dump(value))
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
def ensure_value_types(value_types)
|
|
711
|
+
value_types || ValueTypes.new
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
def ensure_string_value_type(value_types)
|
|
715
|
+
value_types.string ||= StringValueType.new
|
|
716
|
+
value_types.string
|
|
717
|
+
end
|
|
718
|
+
|
|
719
|
+
def ensure_float_list_value_type(value_types)
|
|
720
|
+
value_types.float_list ||= FloatListValueType.new
|
|
721
|
+
value_types.float_list
|
|
722
|
+
end
|
|
723
|
+
|
|
724
|
+
def ensure_sparse_vector_value_type(value_types)
|
|
725
|
+
value_types.sparse_vector ||= SparseVectorValueType.new
|
|
726
|
+
value_types.sparse_vector
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
def ensure_int_value_type(value_types)
|
|
730
|
+
value_types.int_value ||= IntValueType.new
|
|
731
|
+
value_types.int_value
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
def ensure_float_value_type(value_types)
|
|
735
|
+
value_types.float_value ||= FloatValueType.new
|
|
736
|
+
value_types.float_value
|
|
737
|
+
end
|
|
738
|
+
|
|
739
|
+
def ensure_bool_value_type(value_types)
|
|
740
|
+
value_types.boolean ||= BoolValueType.new
|
|
741
|
+
value_types.boolean
|
|
742
|
+
end
|
|
743
|
+
end
|
|
744
|
+
end
|