chromadb-experimental 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/lib/chromadb/admin_client.rb +6 -0
  3. data/lib/chromadb/client.rb +317 -0
  4. data/lib/chromadb/collection.rb +573 -0
  5. data/lib/chromadb/embedding_functions/chroma_bm25.rb +459 -0
  6. data/lib/chromadb/embedding_functions/chroma_cloud_qwen.rb +139 -0
  7. data/lib/chromadb/embedding_functions/chroma_cloud_splade.rb +121 -0
  8. data/lib/chromadb/embedding_functions.rb +121 -0
  9. data/lib/chromadb/errors.rb +120 -0
  10. data/lib/chromadb/http_client.rb +142 -0
  11. data/lib/chromadb/openapi/lib/chromadb/api/default_api.rb +2349 -0
  12. data/lib/chromadb/openapi/lib/chromadb/api_client.rb +392 -0
  13. data/lib/chromadb/openapi/lib/chromadb/api_error.rb +58 -0
  14. data/lib/chromadb/openapi/lib/chromadb/configuration.rb +295 -0
  15. data/lib/chromadb/openapi/lib/chromadb/models/add_collection_records_payload.rb +260 -0
  16. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_request.rb +250 -0
  17. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_response.rb +235 -0
  18. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_api_response.rb +361 -0
  19. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_info.rb +240 -0
  20. data/lib/chromadb/openapi/lib/chromadb/models/bool_inverted_index_type.rb +229 -0
  21. data/lib/chromadb/openapi/lib/chromadb/models/bool_value_type.rb +221 -0
  22. data/lib/chromadb/openapi/lib/chromadb/models/checklist_response.rb +245 -0
  23. data/lib/chromadb/openapi/lib/chromadb/models/collection.rb +315 -0
  24. data/lib/chromadb/openapi/lib/chromadb/models/collection_configuration.rb +240 -0
  25. data/lib/chromadb/openapi/lib/chromadb/models/create_collection_payload.rb +260 -0
  26. data/lib/chromadb/openapi/lib/chromadb/models/create_database_payload.rb +220 -0
  27. data/lib/chromadb/openapi/lib/chromadb/models/create_tenant_payload.rb +220 -0
  28. data/lib/chromadb/openapi/lib/chromadb/models/database.rb +240 -0
  29. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_request.rb +221 -0
  30. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_response.rb +220 -0
  31. data/lib/chromadb/openapi/lib/chromadb/models/embedding_function_new_configuration.rb +230 -0
  32. data/lib/chromadb/openapi/lib/chromadb/models/error_response.rb +230 -0
  33. data/lib/chromadb/openapi/lib/chromadb/models/float_inverted_index_type.rb +229 -0
  34. data/lib/chromadb/openapi/lib/chromadb/models/float_list_value_type.rb +221 -0
  35. data/lib/chromadb/openapi/lib/chromadb/models/float_value_type.rb +221 -0
  36. data/lib/chromadb/openapi/lib/chromadb/models/fork_collection_payload.rb +220 -0
  37. data/lib/chromadb/openapi/lib/chromadb/models/fts_index_type.rb +229 -0
  38. data/lib/chromadb/openapi/lib/chromadb/models/get_attached_function_response.rb +224 -0
  39. data/lib/chromadb/openapi/lib/chromadb/models/get_response.rb +270 -0
  40. data/lib/chromadb/openapi/lib/chromadb/models/get_tenant_response.rb +230 -0
  41. data/lib/chromadb/openapi/lib/chromadb/models/get_user_identity_response.rb +246 -0
  42. data/lib/chromadb/openapi/lib/chromadb/models/heartbeat_response.rb +235 -0
  43. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_configuration.rb +330 -0
  44. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_index_config.rb +371 -0
  45. data/lib/chromadb/openapi/lib/chromadb/models/include.rb +210 -0
  46. data/lib/chromadb/openapi/lib/chromadb/models/int_inverted_index_type.rb +229 -0
  47. data/lib/chromadb/openapi/lib/chromadb/models/int_value_type.rb +221 -0
  48. data/lib/chromadb/openapi/lib/chromadb/models/query_response.rb +280 -0
  49. data/lib/chromadb/openapi/lib/chromadb/models/raw_where_fields.rb +230 -0
  50. data/lib/chromadb/openapi/lib/chromadb/models/schema.rb +258 -0
  51. data/lib/chromadb/openapi/lib/chromadb/models/search_payload.rb +256 -0
  52. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_filter.rb +230 -0
  53. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_group_by.rb +230 -0
  54. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_limit.rb +230 -0
  55. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_select.rb +220 -0
  56. data/lib/chromadb/openapi/lib/chromadb/models/search_request_payload.rb +220 -0
  57. data/lib/chromadb/openapi/lib/chromadb/models/search_response.rb +270 -0
  58. data/lib/chromadb/openapi/lib/chromadb/models/space.rb +210 -0
  59. data/lib/chromadb/openapi/lib/chromadb/models/spann_configuration.rb +420 -0
  60. data/lib/chromadb/openapi/lib/chromadb/models/spann_index_config.rb +536 -0
  61. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector.rb +244 -0
  62. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_config.rb +242 -0
  63. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_type.rb +234 -0
  64. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_value_type.rb +221 -0
  65. data/lib/chromadb/openapi/lib/chromadb/models/string_inverted_index_type.rb +229 -0
  66. data/lib/chromadb/openapi/lib/chromadb/models/string_value_type.rb +231 -0
  67. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_configuration.rb +240 -0
  68. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_payload.rb +240 -0
  69. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_records_payload.rb +260 -0
  70. data/lib/chromadb/openapi/lib/chromadb/models/update_hnsw_configuration.rb +345 -0
  71. data/lib/chromadb/openapi/lib/chromadb/models/update_spann_configuration.rb +260 -0
  72. data/lib/chromadb/openapi/lib/chromadb/models/update_tenant_payload.rb +220 -0
  73. data/lib/chromadb/openapi/lib/chromadb/models/upsert_collection_records_payload.rb +260 -0
  74. data/lib/chromadb/openapi/lib/chromadb/models/value_types.rb +271 -0
  75. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_config.rb +261 -0
  76. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_type.rb +234 -0
  77. data/lib/chromadb/openapi/lib/chromadb/version.rb +15 -0
  78. data/lib/chromadb/openapi/lib/chromadb.rb +102 -0
  79. data/lib/chromadb/openapi.rb +6 -0
  80. data/lib/chromadb/schema.rb +744 -0
  81. data/lib/chromadb/schemas/chroma-cloud-qwen.json +61 -0
  82. data/lib/chromadb/schemas/chroma-cloud-splade.json +31 -0
  83. data/lib/chromadb/schemas/chroma_bm25.json +37 -0
  84. data/lib/chromadb/search/key.rb +94 -0
  85. data/lib/chromadb/search/limit.rb +41 -0
  86. data/lib/chromadb/search/rank.rb +425 -0
  87. data/lib/chromadb/search/search.rb +73 -0
  88. data/lib/chromadb/search/select.rb +54 -0
  89. data/lib/chromadb/search/where.rb +157 -0
  90. data/lib/chromadb/search.rb +8 -0
  91. data/lib/chromadb/types/results.rb +96 -0
  92. data/lib/chromadb/types/sparse_vector.rb +86 -0
  93. data/lib/chromadb/types/validation.rb +519 -0
  94. data/lib/chromadb/types.rb +13 -0
  95. data/lib/chromadb/version.rb +5 -0
  96. data/lib/chromadb.rb +15 -0
  97. metadata +233 -0
@@ -0,0 +1,744 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ DOCUMENT_KEY = "#document"
5
+ EMBEDDING_KEY = "#embedding"
6
+
7
+ STRING_VALUE_NAME = "string"
8
+ FLOAT_LIST_VALUE_NAME = "float_list"
9
+ SPARSE_VECTOR_VALUE_NAME = "sparse_vector"
10
+ INT_VALUE_NAME = "int"
11
+ FLOAT_VALUE_NAME = "float"
12
+ BOOL_VALUE_NAME = "bool"
13
+
14
+ FTS_INDEX_NAME = "fts_index"
15
+ STRING_INVERTED_INDEX_NAME = "string_inverted_index"
16
+ VECTOR_INDEX_NAME = "vector_index"
17
+ SPARSE_VECTOR_INDEX_NAME = "sparse_vector_index"
18
+ INT_INVERTED_INDEX_NAME = "int_inverted_index"
19
+ FLOAT_INVERTED_INDEX_NAME = "float_inverted_index"
20
+ BOOL_INVERTED_INDEX_NAME = "bool_inverted_index"
21
+
22
+ class FtsIndexConfig
23
+ def type = "FtsIndexConfig"
24
+ end
25
+
26
+ class StringInvertedIndexConfig
27
+ def type = "StringInvertedIndexConfig"
28
+ end
29
+
30
+ class IntInvertedIndexConfig
31
+ def type = "IntInvertedIndexConfig"
32
+ end
33
+
34
+ class FloatInvertedIndexConfig
35
+ def type = "FloatInvertedIndexConfig"
36
+ end
37
+
38
+ class BoolInvertedIndexConfig
39
+ def type = "BoolInvertedIndexConfig"
40
+ end
41
+
42
+ class VectorIndexConfig
43
+ attr_accessor :space, :embedding_function, :source_key, :hnsw, :spann
44
+
45
+ def initialize(space: nil, embedding_function: nil, source_key: nil, hnsw: nil, spann: nil)
46
+ @space = space
47
+ @embedding_function = embedding_function
48
+ @source_key = source_key.respond_to?(:name) ? source_key.name : source_key
49
+ @hnsw = hnsw
50
+ @spann = spann
51
+ end
52
+
53
+ def type = "VectorIndexConfig"
54
+ end
55
+
56
+ class SparseVectorIndexConfig
57
+ attr_accessor :embedding_function, :source_key, :bm25
58
+
59
+ def initialize(embedding_function: nil, source_key: nil, bm25: nil)
60
+ @embedding_function = embedding_function
61
+ @source_key = source_key.respond_to?(:name) ? source_key.name : source_key
62
+ @bm25 = bm25
63
+ end
64
+
65
+ def type = "SparseVectorIndexConfig"
66
+ end
67
+
68
+ class FtsIndexType
69
+ attr_accessor :enabled, :config
70
+
71
+ def initialize(enabled, config)
72
+ @enabled = enabled
73
+ @config = config
74
+ end
75
+ end
76
+
77
+ class StringInvertedIndexType
78
+ attr_accessor :enabled, :config
79
+
80
+ def initialize(enabled, config)
81
+ @enabled = enabled
82
+ @config = config
83
+ end
84
+ end
85
+
86
+ class VectorIndexType
87
+ attr_accessor :enabled, :config
88
+
89
+ def initialize(enabled, config)
90
+ @enabled = enabled
91
+ @config = config
92
+ end
93
+ end
94
+
95
+ class SparseVectorIndexType
96
+ attr_accessor :enabled, :config
97
+
98
+ def initialize(enabled, config)
99
+ @enabled = enabled
100
+ @config = config
101
+ end
102
+ end
103
+
104
+ class IntInvertedIndexType
105
+ attr_accessor :enabled, :config
106
+
107
+ def initialize(enabled, config)
108
+ @enabled = enabled
109
+ @config = config
110
+ end
111
+ end
112
+
113
+ class FloatInvertedIndexType
114
+ attr_accessor :enabled, :config
115
+
116
+ def initialize(enabled, config)
117
+ @enabled = enabled
118
+ @config = config
119
+ end
120
+ end
121
+
122
+ class BoolInvertedIndexType
123
+ attr_accessor :enabled, :config
124
+
125
+ def initialize(enabled, config)
126
+ @enabled = enabled
127
+ @config = config
128
+ end
129
+ end
130
+
131
+ class StringValueType
132
+ attr_accessor :fts_index, :string_inverted_index
133
+
134
+ def initialize(fts_index = nil, string_inverted_index = nil)
135
+ @fts_index = fts_index
136
+ @string_inverted_index = string_inverted_index
137
+ end
138
+ end
139
+
140
+ class FloatListValueType
141
+ attr_accessor :vector_index
142
+
143
+ def initialize(vector_index = nil)
144
+ @vector_index = vector_index
145
+ end
146
+ end
147
+
148
+ class SparseVectorValueType
149
+ attr_accessor :sparse_vector_index
150
+
151
+ def initialize(sparse_vector_index = nil)
152
+ @sparse_vector_index = sparse_vector_index
153
+ end
154
+ end
155
+
156
+ class IntValueType
157
+ attr_accessor :int_inverted_index
158
+
159
+ def initialize(int_inverted_index = nil)
160
+ @int_inverted_index = int_inverted_index
161
+ end
162
+ end
163
+
164
+ class FloatValueType
165
+ attr_accessor :float_inverted_index
166
+
167
+ def initialize(float_inverted_index = nil)
168
+ @float_inverted_index = float_inverted_index
169
+ end
170
+ end
171
+
172
+ class BoolValueType
173
+ attr_accessor :bool_inverted_index
174
+
175
+ def initialize(bool_inverted_index = nil)
176
+ @bool_inverted_index = bool_inverted_index
177
+ end
178
+ end
179
+
180
+ class ValueTypes
181
+ attr_accessor :string, :float_list, :sparse_vector, :int_value, :float_value, :boolean
182
+
183
+ def initialize
184
+ @string = nil
185
+ @float_list = nil
186
+ @sparse_vector = nil
187
+ @int_value = nil
188
+ @float_value = nil
189
+ @boolean = nil
190
+ end
191
+ end
192
+
193
+ class Schema
194
+ attr_accessor :defaults, :keys
195
+
196
+ def initialize
197
+ @defaults = ValueTypes.new
198
+ @keys = {}
199
+ initialize_defaults
200
+ initialize_keys
201
+ end
202
+
203
+ def create_index(config: nil, key: nil)
204
+ config_provided = !config.nil?
205
+ key_provided = !key.nil?
206
+
207
+ if !config_provided && !key_provided
208
+ raise ArgumentError,
209
+ "Cannot enable all index types globally. Must specify either config or key."
210
+ end
211
+
212
+ if key_provided && [ EMBEDDING_KEY, DOCUMENT_KEY ].include?(key)
213
+ raise ArgumentError,
214
+ "Cannot create index on special key '#{key}'. These keys are managed automatically by the system."
215
+ end
216
+
217
+ if config.is_a?(VectorIndexConfig)
218
+ if !key_provided
219
+ set_vector_index_config(config)
220
+ return self
221
+ end
222
+ raise ArgumentError,
223
+ "Vector index cannot be enabled on specific keys. Use create_index without key to configure globally."
224
+ end
225
+
226
+ if config.is_a?(FtsIndexConfig)
227
+ if !key_provided
228
+ set_fts_index_config(config)
229
+ return self
230
+ end
231
+ raise ArgumentError,
232
+ "FTS index cannot be enabled on specific keys. Use create_index without key to configure globally."
233
+ end
234
+
235
+ if config.is_a?(SparseVectorIndexConfig) && !key_provided
236
+ raise ArgumentError,
237
+ "Sparse vector index must be created on a specific key. Please specify a key using create_index(config: SparseVectorIndexConfig.new, key: 'your_key')"
238
+ end
239
+
240
+ if !config_provided && key_provided
241
+ raise ArgumentError,
242
+ "Cannot enable all index types for key '#{key}'. Please specify a specific index configuration."
243
+ end
244
+
245
+ if config_provided && !key_provided
246
+ set_index_in_defaults(config, true)
247
+ elsif config_provided && key_provided
248
+ set_index_for_key(key, config, true)
249
+ end
250
+
251
+ self
252
+ end
253
+
254
+ def delete_index(config: nil, key: nil)
255
+ config_provided = !config.nil?
256
+ key_provided = !key.nil?
257
+
258
+ if !config_provided && !key_provided
259
+ raise ArgumentError,
260
+ "Cannot disable all indexes. Must specify either config or key."
261
+ end
262
+
263
+ if key_provided && [ EMBEDDING_KEY, DOCUMENT_KEY ].include?(key)
264
+ raise ArgumentError,
265
+ "Cannot delete index on special key '#{key}'. These keys are managed automatically by the system."
266
+ end
267
+
268
+ if config.is_a?(VectorIndexConfig)
269
+ raise ArgumentError, "Deleting vector index is not currently supported."
270
+ end
271
+
272
+ if config.is_a?(FtsIndexConfig)
273
+ raise ArgumentError, "Deleting FTS index is not currently supported."
274
+ end
275
+
276
+ if config.is_a?(SparseVectorIndexConfig)
277
+ raise ArgumentError, "Deleting sparse vector index is not currently supported."
278
+ end
279
+
280
+ if key_provided && !config_provided
281
+ raise ArgumentError,
282
+ "Cannot disable all index types for key '#{key}'. Please specify a specific index configuration."
283
+ end
284
+
285
+ if key_provided && config_provided
286
+ set_index_for_key(key, config, false)
287
+ elsif !key_provided && config_provided
288
+ set_index_in_defaults(config, false)
289
+ end
290
+
291
+ self
292
+ end
293
+
294
+ def serialize_to_json
295
+ defaults = serialize_value_types(@defaults)
296
+ keys = {}
297
+ @keys.each do |key_name, value_types|
298
+ keys[key_name] = serialize_value_types(value_types)
299
+ end
300
+ { "defaults" => defaults, "keys" => keys }
301
+ end
302
+
303
+ def self.deserialize_from_json(json, client: nil)
304
+ return nil if json.nil?
305
+ data = json
306
+ instance = allocate
307
+ instance.defaults = deserialize_value_types(data["defaults"] || {}, client: client)
308
+ instance.keys = {}
309
+ (data["keys"] || {}).each do |key_name, value|
310
+ instance.keys[key_name] = deserialize_value_types(value, client: client)
311
+ end
312
+ instance
313
+ end
314
+
315
+ def resolve_embedding_function
316
+ override = @keys[EMBEDDING_KEY]&.float_list&.vector_index&.config&.embedding_function
317
+ return override if override
318
+
319
+ @defaults.float_list&.vector_index&.config&.embedding_function
320
+ end
321
+
322
+ private
323
+
324
+ def set_vector_index_config(config)
325
+ defaults_float_list = ensure_float_list_value_type(@defaults)
326
+ current_vector = defaults_float_list.vector_index || VectorIndexType.new(false, VectorIndexConfig.new)
327
+ defaults_float_list.vector_index = VectorIndexType.new(
328
+ current_vector.enabled,
329
+ VectorIndexConfig.new(
330
+ space: config.space,
331
+ embedding_function: config.embedding_function,
332
+ source_key: config.source_key,
333
+ hnsw: deep_clone(config.hnsw),
334
+ spann: deep_clone(config.spann),
335
+ ),
336
+ )
337
+
338
+ embedding_value_types = ensure_value_types(@keys[EMBEDDING_KEY])
339
+ @keys[EMBEDDING_KEY] = embedding_value_types
340
+ override_float_list = ensure_float_list_value_type(embedding_value_types)
341
+ current_override = override_float_list.vector_index || VectorIndexType.new(true, VectorIndexConfig.new(source_key: DOCUMENT_KEY))
342
+ preserved_source_key = current_override.config.source_key || DOCUMENT_KEY
343
+ override_float_list.vector_index = VectorIndexType.new(
344
+ current_override.enabled,
345
+ VectorIndexConfig.new(
346
+ space: config.space,
347
+ embedding_function: config.embedding_function,
348
+ source_key: preserved_source_key,
349
+ hnsw: deep_clone(config.hnsw),
350
+ spann: deep_clone(config.spann),
351
+ ),
352
+ )
353
+ end
354
+
355
+ def set_fts_index_config(config)
356
+ defaults_string = ensure_string_value_type(@defaults)
357
+ current_defaults = defaults_string.fts_index || FtsIndexType.new(false, FtsIndexConfig.new)
358
+ defaults_string.fts_index = FtsIndexType.new(current_defaults.enabled, config)
359
+
360
+ document_value_types = ensure_value_types(@keys[DOCUMENT_KEY])
361
+ @keys[DOCUMENT_KEY] = document_value_types
362
+ override_string = ensure_string_value_type(document_value_types)
363
+ current_override = override_string.fts_index || FtsIndexType.new(true, FtsIndexConfig.new)
364
+ override_string.fts_index = FtsIndexType.new(current_override.enabled, config)
365
+ end
366
+
367
+ def set_index_in_defaults(config, enabled)
368
+ case config
369
+ when FtsIndexConfig
370
+ ensure_string_value_type(@defaults).fts_index = FtsIndexType.new(enabled, config)
371
+ when StringInvertedIndexConfig
372
+ ensure_string_value_type(@defaults).string_inverted_index = StringInvertedIndexType.new(enabled, config)
373
+ when VectorIndexConfig
374
+ ensure_float_list_value_type(@defaults).vector_index = VectorIndexType.new(enabled, config)
375
+ when SparseVectorIndexConfig
376
+ ensure_sparse_vector_value_type(@defaults).sparse_vector_index = SparseVectorIndexType.new(enabled, config)
377
+ when IntInvertedIndexConfig
378
+ ensure_int_value_type(@defaults).int_inverted_index = IntInvertedIndexType.new(enabled, config)
379
+ when FloatInvertedIndexConfig
380
+ ensure_float_value_type(@defaults).float_inverted_index = FloatInvertedIndexType.new(enabled, config)
381
+ when BoolInvertedIndexConfig
382
+ ensure_bool_value_type(@defaults).bool_inverted_index = BoolInvertedIndexType.new(enabled, config)
383
+ end
384
+ end
385
+
386
+ def set_index_for_key(key, config, enabled)
387
+ if config.is_a?(SparseVectorIndexConfig) && enabled
388
+ validate_single_sparse_vector_index(key)
389
+ validate_sparse_vector_config(config)
390
+ end
391
+
392
+ current = @keys[key] = ensure_value_types(@keys[key])
393
+
394
+ case config
395
+ when StringInvertedIndexConfig
396
+ ensure_string_value_type(current).string_inverted_index = StringInvertedIndexType.new(enabled, config)
397
+ when FtsIndexConfig
398
+ ensure_string_value_type(current).fts_index = FtsIndexType.new(enabled, config)
399
+ when SparseVectorIndexConfig
400
+ ensure_sparse_vector_value_type(current).sparse_vector_index = SparseVectorIndexType.new(enabled, config)
401
+ when VectorIndexConfig
402
+ ensure_float_list_value_type(current).vector_index = VectorIndexType.new(enabled, config)
403
+ when IntInvertedIndexConfig
404
+ ensure_int_value_type(current).int_inverted_index = IntInvertedIndexType.new(enabled, config)
405
+ when FloatInvertedIndexConfig
406
+ ensure_float_value_type(current).float_inverted_index = FloatInvertedIndexType.new(enabled, config)
407
+ when BoolInvertedIndexConfig
408
+ ensure_bool_value_type(current).bool_inverted_index = BoolInvertedIndexType.new(enabled, config)
409
+ end
410
+ end
411
+
412
+ def validate_single_sparse_vector_index(target_key)
413
+ @keys.each do |existing_key, value_types|
414
+ next if existing_key == target_key
415
+ sparse_index = value_types.sparse_vector&.sparse_vector_index
416
+ if sparse_index&.enabled
417
+ raise ArgumentError,
418
+ "Cannot enable sparse vector index on key '#{target_key}'. A sparse vector index is already enabled on key '#{existing_key}'. Only one sparse vector index is allowed per collection."
419
+ end
420
+ end
421
+ end
422
+
423
+ def validate_sparse_vector_config(config)
424
+ if config.source_key && config.embedding_function.nil?
425
+ raise ArgumentError,
426
+ "If source_key is provided then embedding_function must also be provided since there is no default embedding function."
427
+ end
428
+ end
429
+
430
+ def initialize_defaults
431
+ @defaults.string = StringValueType.new(
432
+ FtsIndexType.new(false, FtsIndexConfig.new),
433
+ StringInvertedIndexType.new(true, StringInvertedIndexConfig.new),
434
+ )
435
+
436
+ @defaults.float_list = FloatListValueType.new(
437
+ VectorIndexType.new(false, VectorIndexConfig.new),
438
+ )
439
+
440
+ @defaults.sparse_vector = SparseVectorValueType.new(
441
+ SparseVectorIndexType.new(false, SparseVectorIndexConfig.new),
442
+ )
443
+
444
+ @defaults.int_value = IntValueType.new(
445
+ IntInvertedIndexType.new(true, IntInvertedIndexConfig.new),
446
+ )
447
+
448
+ @defaults.float_value = FloatValueType.new(
449
+ FloatInvertedIndexType.new(true, FloatInvertedIndexConfig.new),
450
+ )
451
+
452
+ @defaults.boolean = BoolValueType.new(
453
+ BoolInvertedIndexType.new(true, BoolInvertedIndexConfig.new),
454
+ )
455
+ end
456
+
457
+ def initialize_keys
458
+ @keys[DOCUMENT_KEY] = ValueTypes.new
459
+ @keys[DOCUMENT_KEY].string = StringValueType.new(
460
+ FtsIndexType.new(true, FtsIndexConfig.new),
461
+ StringInvertedIndexType.new(false, StringInvertedIndexConfig.new),
462
+ )
463
+
464
+ @keys[EMBEDDING_KEY] = ValueTypes.new
465
+ @keys[EMBEDDING_KEY].float_list = FloatListValueType.new(
466
+ VectorIndexType.new(true, VectorIndexConfig.new(source_key: DOCUMENT_KEY)),
467
+ )
468
+ end
469
+
470
+ def serialize_value_types(value_types)
471
+ result = {}
472
+ if value_types.string
473
+ serialized = serialize_string_value_type(value_types.string)
474
+ result[STRING_VALUE_NAME] = serialized unless serialized.empty?
475
+ end
476
+ if value_types.float_list
477
+ serialized = serialize_float_list_value_type(value_types.float_list)
478
+ result[FLOAT_LIST_VALUE_NAME] = serialized unless serialized.empty?
479
+ end
480
+ if value_types.sparse_vector
481
+ serialized = serialize_sparse_vector_value_type(value_types.sparse_vector)
482
+ result[SPARSE_VECTOR_VALUE_NAME] = serialized unless serialized.empty?
483
+ end
484
+ if value_types.int_value
485
+ serialized = serialize_int_value_type(value_types.int_value)
486
+ result[INT_VALUE_NAME] = serialized unless serialized.empty?
487
+ end
488
+ if value_types.float_value
489
+ serialized = serialize_float_value_type(value_types.float_value)
490
+ result[FLOAT_VALUE_NAME] = serialized unless serialized.empty?
491
+ end
492
+ if value_types.boolean
493
+ serialized = serialize_bool_value_type(value_types.boolean)
494
+ result[BOOL_VALUE_NAME] = serialized unless serialized.empty?
495
+ end
496
+ result
497
+ end
498
+
499
+ def serialize_string_value_type(value_type)
500
+ result = {}
501
+ if value_type.fts_index
502
+ result[FTS_INDEX_NAME] = {
503
+ "enabled" => value_type.fts_index.enabled,
504
+ "config" => {}
505
+ }
506
+ end
507
+ if value_type.string_inverted_index
508
+ result[STRING_INVERTED_INDEX_NAME] = {
509
+ "enabled" => value_type.string_inverted_index.enabled,
510
+ "config" => {}
511
+ }
512
+ end
513
+ result
514
+ end
515
+
516
+ def serialize_float_list_value_type(value_type)
517
+ result = {}
518
+ if value_type.vector_index
519
+ result[VECTOR_INDEX_NAME] = {
520
+ "enabled" => value_type.vector_index.enabled,
521
+ "config" => serialize_config(value_type.vector_index.config)
522
+ }
523
+ end
524
+ result
525
+ end
526
+
527
+ def serialize_sparse_vector_value_type(value_type)
528
+ result = {}
529
+ if value_type.sparse_vector_index
530
+ result[SPARSE_VECTOR_INDEX_NAME] = {
531
+ "enabled" => value_type.sparse_vector_index.enabled,
532
+ "config" => serialize_config(value_type.sparse_vector_index.config)
533
+ }
534
+ end
535
+ result
536
+ end
537
+
538
+ def serialize_int_value_type(value_type)
539
+ result = {}
540
+ if value_type.int_inverted_index
541
+ result[INT_INVERTED_INDEX_NAME] = {
542
+ "enabled" => value_type.int_inverted_index.enabled,
543
+ "config" => {}
544
+ }
545
+ end
546
+ result
547
+ end
548
+
549
+ def serialize_float_value_type(value_type)
550
+ result = {}
551
+ if value_type.float_inverted_index
552
+ result[FLOAT_INVERTED_INDEX_NAME] = {
553
+ "enabled" => value_type.float_inverted_index.enabled,
554
+ "config" => {}
555
+ }
556
+ end
557
+ result
558
+ end
559
+
560
+ def serialize_bool_value_type(value_type)
561
+ result = {}
562
+ if value_type.bool_inverted_index
563
+ result[BOOL_INVERTED_INDEX_NAME] = {
564
+ "enabled" => value_type.bool_inverted_index.enabled,
565
+ "config" => {}
566
+ }
567
+ end
568
+ result
569
+ end
570
+
571
+ def serialize_config(config)
572
+ case config
573
+ when VectorIndexConfig
574
+ serialize_vector_config(config)
575
+ when SparseVectorIndexConfig
576
+ serialize_sparse_vector_config(config)
577
+ else
578
+ {}
579
+ end
580
+ end
581
+
582
+ def serialize_vector_config(config)
583
+ serialized = {}
584
+ embedding_function = config.embedding_function
585
+ serialized["embedding_function"] = EmbeddingFunctions.prepare_embedding_function_config(embedding_function)
586
+
587
+ resolved_space = config.space
588
+ if resolved_space.nil? && embedding_function&.respond_to?(:default_space)
589
+ resolved_space = embedding_function.default_space
590
+ end
591
+ serialized["space"] = resolved_space if resolved_space
592
+ serialized["source_key"] = config.source_key if config.source_key
593
+ serialized["hnsw"] = deep_clone(config.hnsw) if config.hnsw
594
+ serialized["spann"] = deep_clone(config.spann) if config.spann
595
+ serialized
596
+ end
597
+
598
+ def serialize_sparse_vector_config(config)
599
+ serialized = {}
600
+ serialized["embedding_function"] = EmbeddingFunctions.prepare_embedding_function_config(config.embedding_function)
601
+ serialized["source_key"] = config.source_key if config.source_key
602
+ serialized["bm25"] = config.bm25 if [ true, false ].include?(config.bm25)
603
+ serialized
604
+ end
605
+
606
+ def self.deserialize_value_types(json, client: nil)
607
+ result = ValueTypes.new
608
+
609
+ result.string = deserialize_string_value_type(json[STRING_VALUE_NAME]) if json[STRING_VALUE_NAME]
610
+ result.float_list = deserialize_float_list_value_type(json[FLOAT_LIST_VALUE_NAME], client: client) if json[FLOAT_LIST_VALUE_NAME]
611
+ result.sparse_vector = deserialize_sparse_vector_value_type(json[SPARSE_VECTOR_VALUE_NAME], client: client) if json[SPARSE_VECTOR_VALUE_NAME]
612
+ result.int_value = deserialize_int_value_type(json[INT_VALUE_NAME]) if json[INT_VALUE_NAME]
613
+ result.float_value = deserialize_float_value_type(json[FLOAT_VALUE_NAME]) if json[FLOAT_VALUE_NAME]
614
+ result.boolean = deserialize_bool_value_type(json[BOOL_VALUE_NAME]) if json[BOOL_VALUE_NAME]
615
+
616
+ result
617
+ end
618
+
619
+ def self.deserialize_string_value_type(json)
620
+ fts_index = nil
621
+ string_index = nil
622
+ if json[FTS_INDEX_NAME]
623
+ cfg = json[FTS_INDEX_NAME]
624
+ fts_index = FtsIndexType.new(cfg["enabled"], FtsIndexConfig.new)
625
+ end
626
+ if json[STRING_INVERTED_INDEX_NAME]
627
+ cfg = json[STRING_INVERTED_INDEX_NAME]
628
+ string_index = StringInvertedIndexType.new(cfg["enabled"], StringInvertedIndexConfig.new)
629
+ end
630
+ StringValueType.new(fts_index, string_index)
631
+ end
632
+
633
+ def self.deserialize_float_list_value_type(json, client: nil)
634
+ vector_index = nil
635
+ if json[VECTOR_INDEX_NAME]
636
+ cfg = json[VECTOR_INDEX_NAME]
637
+ config = deserialize_vector_config(cfg["config"], client: client)
638
+ vector_index = VectorIndexType.new(cfg["enabled"], config)
639
+ end
640
+ FloatListValueType.new(vector_index)
641
+ end
642
+
643
+ def self.deserialize_sparse_vector_value_type(json, client: nil)
644
+ sparse_index = nil
645
+ if json[SPARSE_VECTOR_INDEX_NAME]
646
+ cfg = json[SPARSE_VECTOR_INDEX_NAME]
647
+ config = deserialize_sparse_vector_config(cfg["config"], client: client)
648
+ sparse_index = SparseVectorIndexType.new(cfg["enabled"], config)
649
+ end
650
+ SparseVectorValueType.new(sparse_index)
651
+ end
652
+
653
+ def self.deserialize_int_value_type(json)
654
+ int_index = nil
655
+ if json[INT_INVERTED_INDEX_NAME]
656
+ cfg = json[INT_INVERTED_INDEX_NAME]
657
+ int_index = IntInvertedIndexType.new(cfg["enabled"], IntInvertedIndexConfig.new)
658
+ end
659
+ IntValueType.new(int_index)
660
+ end
661
+
662
+ def self.deserialize_float_value_type(json)
663
+ float_index = nil
664
+ if json[FLOAT_INVERTED_INDEX_NAME]
665
+ cfg = json[FLOAT_INVERTED_INDEX_NAME]
666
+ float_index = FloatInvertedIndexType.new(cfg["enabled"], FloatInvertedIndexConfig.new)
667
+ end
668
+ FloatValueType.new(float_index)
669
+ end
670
+
671
+ def self.deserialize_bool_value_type(json)
672
+ bool_index = nil
673
+ if json[BOOL_INVERTED_INDEX_NAME]
674
+ cfg = json[BOOL_INVERTED_INDEX_NAME]
675
+ bool_index = BoolInvertedIndexType.new(cfg["enabled"], BoolInvertedIndexConfig.new)
676
+ end
677
+ BoolValueType.new(bool_index)
678
+ end
679
+
680
+ def self.deserialize_vector_config(json, client: nil)
681
+ embedding_function = EmbeddingFunctions.build_embedding_function(json["embedding_function"], client: client)
682
+ space = json["space"]
683
+ config = VectorIndexConfig.new(
684
+ space: space,
685
+ embedding_function: embedding_function,
686
+ source_key: json["source_key"],
687
+ hnsw: json["hnsw"],
688
+ spann: json["spann"],
689
+ )
690
+ if config.space.nil? && embedding_function&.respond_to?(:default_space)
691
+ config.space = embedding_function.default_space
692
+ end
693
+ config
694
+ end
695
+
696
+ def self.deserialize_sparse_vector_config(json, client: nil)
697
+ embedding_function = EmbeddingFunctions.build_sparse_embedding_function(json["embedding_function"], client: client)
698
+ SparseVectorIndexConfig.new(
699
+ embedding_function: embedding_function,
700
+ source_key: json["source_key"],
701
+ bm25: json["bm25"],
702
+ )
703
+ end
704
+
705
+ def deep_clone(value)
706
+ return nil if value.nil?
707
+ Marshal.load(Marshal.dump(value))
708
+ end
709
+
710
+ def ensure_value_types(value_types)
711
+ value_types || ValueTypes.new
712
+ end
713
+
714
+ def ensure_string_value_type(value_types)
715
+ value_types.string ||= StringValueType.new
716
+ value_types.string
717
+ end
718
+
719
+ def ensure_float_list_value_type(value_types)
720
+ value_types.float_list ||= FloatListValueType.new
721
+ value_types.float_list
722
+ end
723
+
724
+ def ensure_sparse_vector_value_type(value_types)
725
+ value_types.sparse_vector ||= SparseVectorValueType.new
726
+ value_types.sparse_vector
727
+ end
728
+
729
+ def ensure_int_value_type(value_types)
730
+ value_types.int_value ||= IntValueType.new
731
+ value_types.int_value
732
+ end
733
+
734
+ def ensure_float_value_type(value_types)
735
+ value_types.float_value ||= FloatValueType.new
736
+ value_types.float_value
737
+ end
738
+
739
+ def ensure_bool_value_type(value_types)
740
+ value_types.boolean ||= BoolValueType.new
741
+ value_types.boolean
742
+ end
743
+ end
744
+ end