chromadb-experimental 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/lib/chromadb/admin_client.rb +6 -0
  3. data/lib/chromadb/client.rb +317 -0
  4. data/lib/chromadb/collection.rb +573 -0
  5. data/lib/chromadb/embedding_functions/chroma_bm25.rb +459 -0
  6. data/lib/chromadb/embedding_functions/chroma_cloud_qwen.rb +139 -0
  7. data/lib/chromadb/embedding_functions/chroma_cloud_splade.rb +121 -0
  8. data/lib/chromadb/embedding_functions.rb +121 -0
  9. data/lib/chromadb/errors.rb +120 -0
  10. data/lib/chromadb/http_client.rb +142 -0
  11. data/lib/chromadb/openapi/lib/chromadb/api/default_api.rb +2349 -0
  12. data/lib/chromadb/openapi/lib/chromadb/api_client.rb +392 -0
  13. data/lib/chromadb/openapi/lib/chromadb/api_error.rb +58 -0
  14. data/lib/chromadb/openapi/lib/chromadb/configuration.rb +295 -0
  15. data/lib/chromadb/openapi/lib/chromadb/models/add_collection_records_payload.rb +260 -0
  16. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_request.rb +250 -0
  17. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_response.rb +235 -0
  18. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_api_response.rb +361 -0
  19. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_info.rb +240 -0
  20. data/lib/chromadb/openapi/lib/chromadb/models/bool_inverted_index_type.rb +229 -0
  21. data/lib/chromadb/openapi/lib/chromadb/models/bool_value_type.rb +221 -0
  22. data/lib/chromadb/openapi/lib/chromadb/models/checklist_response.rb +245 -0
  23. data/lib/chromadb/openapi/lib/chromadb/models/collection.rb +315 -0
  24. data/lib/chromadb/openapi/lib/chromadb/models/collection_configuration.rb +240 -0
  25. data/lib/chromadb/openapi/lib/chromadb/models/create_collection_payload.rb +260 -0
  26. data/lib/chromadb/openapi/lib/chromadb/models/create_database_payload.rb +220 -0
  27. data/lib/chromadb/openapi/lib/chromadb/models/create_tenant_payload.rb +220 -0
  28. data/lib/chromadb/openapi/lib/chromadb/models/database.rb +240 -0
  29. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_request.rb +221 -0
  30. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_response.rb +220 -0
  31. data/lib/chromadb/openapi/lib/chromadb/models/embedding_function_new_configuration.rb +230 -0
  32. data/lib/chromadb/openapi/lib/chromadb/models/error_response.rb +230 -0
  33. data/lib/chromadb/openapi/lib/chromadb/models/float_inverted_index_type.rb +229 -0
  34. data/lib/chromadb/openapi/lib/chromadb/models/float_list_value_type.rb +221 -0
  35. data/lib/chromadb/openapi/lib/chromadb/models/float_value_type.rb +221 -0
  36. data/lib/chromadb/openapi/lib/chromadb/models/fork_collection_payload.rb +220 -0
  37. data/lib/chromadb/openapi/lib/chromadb/models/fts_index_type.rb +229 -0
  38. data/lib/chromadb/openapi/lib/chromadb/models/get_attached_function_response.rb +224 -0
  39. data/lib/chromadb/openapi/lib/chromadb/models/get_response.rb +270 -0
  40. data/lib/chromadb/openapi/lib/chromadb/models/get_tenant_response.rb +230 -0
  41. data/lib/chromadb/openapi/lib/chromadb/models/get_user_identity_response.rb +246 -0
  42. data/lib/chromadb/openapi/lib/chromadb/models/heartbeat_response.rb +235 -0
  43. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_configuration.rb +330 -0
  44. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_index_config.rb +371 -0
  45. data/lib/chromadb/openapi/lib/chromadb/models/include.rb +210 -0
  46. data/lib/chromadb/openapi/lib/chromadb/models/int_inverted_index_type.rb +229 -0
  47. data/lib/chromadb/openapi/lib/chromadb/models/int_value_type.rb +221 -0
  48. data/lib/chromadb/openapi/lib/chromadb/models/query_response.rb +280 -0
  49. data/lib/chromadb/openapi/lib/chromadb/models/raw_where_fields.rb +230 -0
  50. data/lib/chromadb/openapi/lib/chromadb/models/schema.rb +258 -0
  51. data/lib/chromadb/openapi/lib/chromadb/models/search_payload.rb +256 -0
  52. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_filter.rb +230 -0
  53. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_group_by.rb +230 -0
  54. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_limit.rb +230 -0
  55. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_select.rb +220 -0
  56. data/lib/chromadb/openapi/lib/chromadb/models/search_request_payload.rb +220 -0
  57. data/lib/chromadb/openapi/lib/chromadb/models/search_response.rb +270 -0
  58. data/lib/chromadb/openapi/lib/chromadb/models/space.rb +210 -0
  59. data/lib/chromadb/openapi/lib/chromadb/models/spann_configuration.rb +420 -0
  60. data/lib/chromadb/openapi/lib/chromadb/models/spann_index_config.rb +536 -0
  61. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector.rb +244 -0
  62. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_config.rb +242 -0
  63. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_type.rb +234 -0
  64. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_value_type.rb +221 -0
  65. data/lib/chromadb/openapi/lib/chromadb/models/string_inverted_index_type.rb +229 -0
  66. data/lib/chromadb/openapi/lib/chromadb/models/string_value_type.rb +231 -0
  67. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_configuration.rb +240 -0
  68. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_payload.rb +240 -0
  69. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_records_payload.rb +260 -0
  70. data/lib/chromadb/openapi/lib/chromadb/models/update_hnsw_configuration.rb +345 -0
  71. data/lib/chromadb/openapi/lib/chromadb/models/update_spann_configuration.rb +260 -0
  72. data/lib/chromadb/openapi/lib/chromadb/models/update_tenant_payload.rb +220 -0
  73. data/lib/chromadb/openapi/lib/chromadb/models/upsert_collection_records_payload.rb +260 -0
  74. data/lib/chromadb/openapi/lib/chromadb/models/value_types.rb +271 -0
  75. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_config.rb +261 -0
  76. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_type.rb +234 -0
  77. data/lib/chromadb/openapi/lib/chromadb/version.rb +15 -0
  78. data/lib/chromadb/openapi/lib/chromadb.rb +102 -0
  79. data/lib/chromadb/openapi.rb +6 -0
  80. data/lib/chromadb/schema.rb +744 -0
  81. data/lib/chromadb/schemas/chroma-cloud-qwen.json +61 -0
  82. data/lib/chromadb/schemas/chroma-cloud-splade.json +31 -0
  83. data/lib/chromadb/schemas/chroma_bm25.json +37 -0
  84. data/lib/chromadb/search/key.rb +94 -0
  85. data/lib/chromadb/search/limit.rb +41 -0
  86. data/lib/chromadb/search/rank.rb +425 -0
  87. data/lib/chromadb/search/search.rb +73 -0
  88. data/lib/chromadb/search/select.rb +54 -0
  89. data/lib/chromadb/search/where.rb +157 -0
  90. data/lib/chromadb/search.rb +8 -0
  91. data/lib/chromadb/types/results.rb +96 -0
  92. data/lib/chromadb/types/sparse_vector.rb +86 -0
  93. data/lib/chromadb/types/validation.rb +519 -0
  94. data/lib/chromadb/types.rb +13 -0
  95. data/lib/chromadb/version.rb +5 -0
  96. data/lib/chromadb.rb +15 -0
  97. metadata +233 -0
@@ -0,0 +1,519 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Chroma
6
+ module Types
7
+ module Validation
8
+ INCLUDE_OPTIONS = %w[documents embeddings metadatas distances uris data].freeze
9
+
10
+ module_function
11
+
12
+ def maybe_cast_one_to_many(target)
13
+ return nil if target.nil?
14
+ return target if target.is_a?(Array)
15
+ [ target ]
16
+ end
17
+
18
+ def normalize_embeddings(target)
19
+ return nil if target.nil?
20
+ return [ target ] if target.is_a?(Array) && !target.empty? && target.all? { |v| v.is_a?(Numeric) }
21
+ return target if target.is_a?(Array)
22
+
23
+ raise ArgumentError, "Expected embeddings to be an Array, got #{target.class}"
24
+ end
25
+
26
+ def normalize_metadata(metadata)
27
+ return nil if metadata.nil?
28
+ unless metadata.is_a?(Hash)
29
+ raise ArgumentError, "Expected metadata to be a Hash, got #{metadata.class}"
30
+ end
31
+ normalized = {}
32
+ metadata.each do |key, value|
33
+ if value.is_a?(Hash) && value[TYPE_KEY] == SPARSE_VECTOR_TYPE_VALUE
34
+ normalized[key] = SparseVector.from_h(value)
35
+ else
36
+ normalized[key] = value
37
+ end
38
+ end
39
+ normalized
40
+ end
41
+
42
+ def normalize_metadatas(metadatas)
43
+ return nil if metadatas.nil?
44
+ return [ normalize_metadata(metadatas) ] if metadatas.is_a?(Hash)
45
+ unless metadatas.is_a?(Array)
46
+ raise ArgumentError, "Expected metadatas to be an Array, got #{metadatas.class}"
47
+ end
48
+ metadatas.map { |metadata| normalize_metadata(metadata) }
49
+ end
50
+
51
+ def serialize_metadata(metadata)
52
+ return nil if metadata.nil?
53
+ unless metadata.is_a?(Hash)
54
+ raise ArgumentError, "Expected metadata to be a Hash, got #{metadata.class}"
55
+ end
56
+ serialized = {}
57
+ metadata.each do |key, value|
58
+ if value.is_a?(SparseVector)
59
+ serialized[key] = value.to_h
60
+ else
61
+ serialized[key] = value
62
+ end
63
+ end
64
+ serialized
65
+ end
66
+
67
+ def serialize_metadatas(metadatas)
68
+ return nil if metadatas.nil?
69
+ unless metadatas.is_a?(Array)
70
+ raise ArgumentError, "Expected metadatas to be an Array, got #{metadatas.class}"
71
+ end
72
+ metadatas.map { |metadata| metadata.nil? ? nil : serialize_metadata(metadata) }
73
+ end
74
+
75
+ def deserialize_metadata(metadata)
76
+ return nil if metadata.nil?
77
+ normalize_metadata(metadata)
78
+ end
79
+
80
+ def deserialize_metadatas(metadatas)
81
+ return nil if metadatas.nil?
82
+ normalize_metadatas(metadatas)
83
+ end
84
+
85
+ def validate_ids(ids)
86
+ unless ids.is_a?(Array)
87
+ raise ArgumentError, "Expected IDs to be a list, got #{ids.class} as IDs"
88
+ end
89
+ raise ArgumentError, "Expected IDs to be a non-empty list, got #{ids.length} IDs" if ids.empty?
90
+
91
+ seen = {}
92
+ dups = Set.new
93
+ ids.each do |id|
94
+ raise ArgumentError, "Expected ID to be a String, got #{id.inspect}" unless id.is_a?(String)
95
+ if seen[id]
96
+ dups.add(id)
97
+ else
98
+ seen[id] = true
99
+ end
100
+ end
101
+
102
+ return ids if dups.empty?
103
+
104
+ n_dups = dups.length
105
+ if n_dups < 10
106
+ example_string = dups.to_a.join(", ")
107
+ message = "Expected IDs to be unique, found duplicates of: #{example_string}"
108
+ else
109
+ examples = dups.to_a
110
+ example_string = "#{examples.first(5).join(', ')}, ..., #{examples.last(5).join(', ')}"
111
+ message = "Expected IDs to be unique, found #{n_dups} duplicated IDs: #{example_string}"
112
+ end
113
+ raise Chroma::DuplicateIDError, message
114
+ end
115
+
116
+ def validate_metadata(metadata)
117
+ return metadata if metadata.nil?
118
+ unless metadata.is_a?(Hash)
119
+ raise ArgumentError, "Expected metadata to be a Hash or nil, got #{metadata.class} as metadata"
120
+ end
121
+ raise ArgumentError, "Expected metadata to be a non-empty Hash, got #{metadata.length} metadata attributes" if metadata.empty?
122
+
123
+ metadata.each do |key, value|
124
+ if key == META_KEY_CHROMA_DOCUMENT
125
+ raise ArgumentError, "Expected metadata to not contain the reserved key #{META_KEY_CHROMA_DOCUMENT}"
126
+ end
127
+ raise TypeError, "Expected metadata key to be a String, got #{key.inspect}" unless key.is_a?(String)
128
+ if value.is_a?(SparseVector)
129
+ next
130
+ end
131
+ unless value.nil? || value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(TrueClass) || value.is_a?(FalseClass)
132
+ raise ArgumentError,
133
+ "Expected metadata value to be a String, Numeric, Boolean, SparseVector, or nil, got #{value.inspect} which is a #{value.class}"
134
+ end
135
+ end
136
+ metadata
137
+ end
138
+
139
+ def validate_update_metadata(metadata)
140
+ return metadata if metadata.nil?
141
+ unless metadata.is_a?(Hash)
142
+ raise ArgumentError, "Expected metadata to be a Hash or nil, got #{metadata.class}"
143
+ end
144
+ raise ArgumentError, "Expected metadata to be a non-empty Hash, got #{metadata.inspect}" if metadata.empty?
145
+
146
+ metadata.each do |key, value|
147
+ raise ArgumentError, "Expected metadata key to be a String, got #{key.inspect}" unless key.is_a?(String)
148
+ if value.is_a?(SparseVector)
149
+ next
150
+ end
151
+ unless value.nil? || value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(TrueClass) || value.is_a?(FalseClass)
152
+ raise ArgumentError,
153
+ "Expected metadata value to be a String, Numeric, Boolean, SparseVector, or nil, got #{value.inspect}"
154
+ end
155
+ end
156
+ metadata
157
+ end
158
+
159
+ def validate_metadatas(metadatas)
160
+ unless metadatas.is_a?(Array)
161
+ raise ArgumentError, "Expected metadatas to be a list, got #{metadatas.inspect}"
162
+ end
163
+ metadatas.each { |metadata| validate_metadata(metadata) }
164
+ metadatas
165
+ end
166
+
167
+ def validate_where(where)
168
+ unless where.is_a?(Hash)
169
+ raise ArgumentError, "Expected where to be a Hash, got #{where.inspect}"
170
+ end
171
+ raise ArgumentError, "Expected where to have exactly one operator, got #{where.inspect}" if where.length != 1
172
+
173
+ where.each do |key, value|
174
+ raise ArgumentError, "Expected where key to be a String, got #{key.inspect}" unless key.is_a?(String)
175
+
176
+ if [ "$and", "$or", "$in", "$nin" ].include?(key)
177
+ # handled below
178
+ elsif !(value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Hash))
179
+ raise ArgumentError,
180
+ "Expected where value to be a String, Numeric, or operator expression, got #{value.inspect}"
181
+ end
182
+
183
+ if key == "$and" || key == "$or"
184
+ unless value.is_a?(Array)
185
+ raise ArgumentError,
186
+ "Expected where value for #{key} to be a list of where expressions, got #{value.inspect}"
187
+ end
188
+ if value.length <= 1
189
+ raise ArgumentError,
190
+ "Expected where value for #{key} to be a list with at least two expressions, got #{value.inspect}"
191
+ end
192
+ value.each { |expr| validate_where(expr) }
193
+ elsif value.is_a?(Hash)
194
+ if value.length != 1
195
+ raise ArgumentError,
196
+ "Expected where operator expression to have exactly one operator, got #{value.inspect}"
197
+ end
198
+ operator, operand = value.first
199
+ unless [ "$eq", "$ne", "$gt", "$gte", "$lt", "$lte", "$in", "$nin", "$contains", "$not_contains", "$regex", "$not_regex" ].include?(operator)
200
+ raise ArgumentError,
201
+ "Expected where operator to be one of $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $contains, $not_contains, $regex, $not_regex, got #{operator}"
202
+ end
203
+ if [ "$in", "$nin" ].include?(operator)
204
+ unless operand.is_a?(Array)
205
+ raise ArgumentError,
206
+ "Expected where operand for #{operator} to be a list, got #{operand.inspect}"
207
+ end
208
+ if operand.empty?
209
+ raise ArgumentError,
210
+ "Expected where operand for #{operator} to be a non-empty list"
211
+ end
212
+ operand.each do |item|
213
+ unless item.is_a?(String) || item.is_a?(Numeric) || item.is_a?(TrueClass) || item.is_a?(FalseClass)
214
+ raise ArgumentError,
215
+ "Expected where list items to be String, Numeric, or Boolean, got #{item.inspect}"
216
+ end
217
+ end
218
+ elsif [ "$contains", "$not_contains", "$regex", "$not_regex" ].include?(operator)
219
+ unless operand.is_a?(String)
220
+ raise ArgumentError,
221
+ "Expected where operand for #{operator} to be a String, got #{operand.inspect}"
222
+ end
223
+ raise ArgumentError, "Expected where operand for #{operator} to be a non-empty String" if operand.empty?
224
+ else
225
+ unless operand.is_a?(String) || operand.is_a?(Numeric) || operand.is_a?(TrueClass) || operand.is_a?(FalseClass)
226
+ raise ArgumentError,
227
+ "Expected where operand for #{operator} to be String, Numeric, or Boolean, got #{operand.inspect}"
228
+ end
229
+ end
230
+ end
231
+ end
232
+ end
233
+
234
+ def validate_where_document(where_document)
235
+ unless where_document.is_a?(Hash)
236
+ raise ArgumentError, "Expected where document to be a Hash, got #{where_document.inspect}"
237
+ end
238
+ raise ArgumentError, "Expected where document to have exactly one operator, got #{where_document.inspect}" if where_document.length != 1
239
+
240
+ where_document.each do |operator, operand|
241
+ raise ArgumentError, "Expected where document key to be a String, got #{operator.inspect}" unless operator.is_a?(String)
242
+
243
+ unless [ "$contains", "$not_contains", "$regex", "$not_regex", "$and", "$or" ].include?(operator)
244
+ raise ArgumentError,
245
+ "Expected where document operator to be one of $contains, $not_contains, $regex, $not_regex, $and, $or, got #{operator}"
246
+ end
247
+
248
+ if operator == "$and" || operator == "$or"
249
+ unless operand.is_a?(Array)
250
+ raise ArgumentError,
251
+ "Expected document value for #{operator} to be a list of where document expressions, got #{operand.inspect}"
252
+ end
253
+ if operand.length <= 1
254
+ raise ArgumentError,
255
+ "Expected document value for #{operator} to be a list with at least two where document expressions, got #{operand.inspect}"
256
+ end
257
+ operand.each { |expr| validate_where_document(expr) }
258
+ else
259
+ unless operand.is_a?(String)
260
+ raise ArgumentError,
261
+ "Expected where document operand value for operator #{operator} to be a String, got #{operand.inspect}"
262
+ end
263
+ raise ArgumentError,
264
+ "Expected where document operand value for operator #{operator} to be a non-empty String" if operand.empty?
265
+ end
266
+ end
267
+ end
268
+
269
+ def validate_include(include, disallowed: nil)
270
+ unless include.is_a?(Array)
271
+ raise ArgumentError, "Expected include to be a list, got #{include.inspect}"
272
+ end
273
+ include.each do |item|
274
+ unless item.is_a?(String)
275
+ raise ArgumentError, "Expected include item to be a String, got #{item.inspect}"
276
+ end
277
+ unless INCLUDE_OPTIONS.include?(item)
278
+ raise ArgumentError, "Expected include item to be one of #{INCLUDE_OPTIONS.join(', ')}, got #{item}"
279
+ end
280
+ if disallowed && disallowed.include?(item)
281
+ raise ArgumentError, "Include item cannot be one of #{disallowed.join(', ')}, got #{item}"
282
+ end
283
+ end
284
+ end
285
+
286
+ def validate_n_results(n_results)
287
+ unless n_results.is_a?(Integer)
288
+ raise ArgumentError, "Expected requested number of results to be an Integer, got #{n_results.inspect}"
289
+ end
290
+ if n_results <= 0
291
+ raise ArgumentError, "Number of requested results #{n_results} cannot be negative or zero."
292
+ end
293
+ n_results
294
+ end
295
+
296
+ def validate_embeddings(embeddings)
297
+ unless embeddings.is_a?(Array)
298
+ raise ArgumentError, "Expected embeddings to be a list, got #{embeddings.class}"
299
+ end
300
+ raise ArgumentError,
301
+ "Expected embeddings to be a list with at least one item, got #{embeddings.length} embeddings" if embeddings.empty?
302
+
303
+ embeddings.each_with_index do |embedding, idx|
304
+ unless embedding.is_a?(Array)
305
+ raise ArgumentError,
306
+ "Expected embedding at position #{idx} to be an Array, got #{embedding.class}"
307
+ end
308
+ raise ArgumentError, "Expected embedding at position #{idx} to be non-empty" if embedding.empty?
309
+ embedding.each do |value|
310
+ unless value.is_a?(Numeric)
311
+ raise ArgumentError,
312
+ "Expected each value in the embedding to be Numeric, got #{value.inspect}"
313
+ end
314
+ end
315
+ end
316
+ embeddings
317
+ end
318
+
319
+ def validate_sparse_vectors(vectors)
320
+ unless vectors.is_a?(Array)
321
+ raise ArgumentError,
322
+ "Expected sparse vectors to be a list, got #{vectors.class}"
323
+ end
324
+ raise ArgumentError,
325
+ "Expected sparse vectors to be a non-empty list, got #{vectors.length} sparse vectors" if vectors.empty?
326
+ vectors.each_with_index do |vector, i|
327
+ unless vector.is_a?(SparseVector)
328
+ raise ArgumentError,
329
+ "Expected SparseVector instance at position #{i}, got #{vector.class}"
330
+ end
331
+ end
332
+ vectors
333
+ end
334
+
335
+ def validate_documents(documents, nullable: false)
336
+ unless documents.is_a?(Array)
337
+ raise ArgumentError, "Expected documents to be a list, got #{documents.class}"
338
+ end
339
+ raise ArgumentError,
340
+ "Expected documents to be a non-empty list, got #{documents.length} documents" if documents.empty?
341
+ documents.each do |doc|
342
+ next if nullable && doc.nil?
343
+ unless doc.is_a?(String)
344
+ raise ArgumentError, "Expected document to be a String, got #{doc.inspect}"
345
+ end
346
+ end
347
+ end
348
+
349
+ def validate_images(images)
350
+ unless images.is_a?(Array)
351
+ raise ArgumentError, "Expected images to be a list, got #{images.class}"
352
+ end
353
+ raise ArgumentError,
354
+ "Expected images to be a non-empty list, got #{images.length} images" if images.empty?
355
+ images.each do |img|
356
+ unless img.is_a?(Array)
357
+ raise ArgumentError, "Expected image to be an Array, got #{img.inspect}"
358
+ end
359
+ end
360
+ end
361
+
362
+ def validate_base_record_set(record_set)
363
+ validate_record_set_length_consistency(record_set)
364
+
365
+ validate_embeddings(record_set[:embeddings]) if record_set[:embeddings]
366
+ validate_documents(record_set[:documents], nullable: !record_set[:embeddings].nil?) if record_set[:documents]
367
+ validate_images(record_set[:images]) if record_set[:images]
368
+ end
369
+
370
+ def validate_insert_record_set(record_set)
371
+ validate_record_set_length_consistency(record_set)
372
+ validate_base_record_set(record_set)
373
+
374
+ validate_ids(record_set[:ids])
375
+ validate_metadatas(record_set[:metadatas]) if record_set[:metadatas]
376
+ end
377
+
378
+ def validate_record_set_length_consistency(record_set)
379
+ lengths = record_set.values.compact.map(&:length)
380
+ if lengths.empty?
381
+ raise ArgumentError,
382
+ "At least one of #{record_set.keys.join(', ')} must be provided"
383
+ end
384
+
385
+ zero_lengths = record_set.select { |_k, v| !v.nil? && v.length == 0 }.keys
386
+ raise ArgumentError, "Non-empty lists are required for #{zero_lengths}" unless zero_lengths.empty?
387
+
388
+ if lengths.uniq.length > 1
389
+ error_str = record_set.filter { |_k, v| !v.nil? }.map { |k, v| "#{k}: #{v.length}" }.join(", ")
390
+ raise ArgumentError, "Unequal lengths for fields: #{error_str}"
391
+ end
392
+ end
393
+
394
+ def validate_record_set_for_embedding(record_set, embeddable_fields: nil)
395
+ raise ArgumentError, "Attempting to embed a record that already has embeddings." if record_set[:embeddings]
396
+ embeddable_fields ||= default_embeddable_record_set_fields
397
+ validate_record_set_contains_one(record_set, embeddable_fields)
398
+ end
399
+
400
+ def validate_record_set_contains_any(record_set, contains_any)
401
+ validate_record_set_contains(record_set, contains_any)
402
+ unless contains_any.any? { |field| !record_set[field].nil? }
403
+ raise ArgumentError, "At least one of #{contains_any.join(', ')} must be provided"
404
+ end
405
+ end
406
+
407
+ def validate_record_set_contains_one(record_set, contains_one)
408
+ validate_record_set_contains(record_set, contains_one)
409
+ count = contains_one.count { |field| !record_set[field].nil? }
410
+ unless count == 1
411
+ raise ArgumentError, "Exactly one of #{contains_one.join(', ')} must be provided"
412
+ end
413
+ end
414
+
415
+ def validate_record_set_contains(record_set, contains)
416
+ contains.each do |field|
417
+ next if record_set.key?(field)
418
+ raise ArgumentError,
419
+ "Invalid field in contains: #{contains.join(', ')}, available fields: #{record_set.keys.join(', ')}"
420
+ end
421
+ end
422
+
423
+ def default_embeddable_record_set_fields
424
+ %i[documents images uris].freeze
425
+ end
426
+
427
+ def normalize_base_record_set(embeddings: nil, documents: nil, images: nil, uris: nil)
428
+ {
429
+ embeddings: normalize_embeddings(embeddings),
430
+ documents: maybe_cast_one_to_many(documents),
431
+ images: maybe_cast_one_to_many(images),
432
+ uris: maybe_cast_one_to_many(uris)
433
+ }
434
+ end
435
+
436
+ def normalize_insert_record_set(ids:, embeddings:, metadatas: nil, documents: nil, images: nil, uris: nil)
437
+ base_record_set = normalize_base_record_set(
438
+ embeddings: embeddings,
439
+ documents: documents,
440
+ images: images,
441
+ uris: uris,
442
+ )
443
+
444
+ {
445
+ ids: maybe_cast_one_to_many(ids),
446
+ metadatas: normalize_metadatas(metadatas),
447
+ embeddings: base_record_set[:embeddings],
448
+ documents: base_record_set[:documents],
449
+ images: base_record_set[:images],
450
+ uris: base_record_set[:uris]
451
+ }
452
+ end
453
+
454
+ def validate_batch(batch, limits)
455
+ ids = batch[0]
456
+ max_batch = limits[:max_batch_size]
457
+ if ids.length > max_batch
458
+ raise ArgumentError,
459
+ "Batch size #{ids.length} exceeds maximum batch size #{max_batch}"
460
+ end
461
+ end
462
+
463
+ def normalize_sparse_vector(indices:, values:, labels: nil)
464
+ return SparseVector.new(indices: [], values: [], labels: nil) if indices.empty?
465
+
466
+ if labels
467
+ triples = indices.zip(values, labels).sort_by { |pair| pair[0] }
468
+ sorted_indices, sorted_values, sorted_labels = triples.transpose
469
+ SparseVector.new(
470
+ indices: sorted_indices,
471
+ values: sorted_values,
472
+ labels: sorted_labels,
473
+ )
474
+ else
475
+ pairs = indices.zip(values).sort_by { |pair| pair[0] }
476
+ sorted_indices, sorted_values = pairs.transpose
477
+ SparseVector.new(indices: sorted_indices, values: sorted_values, labels: nil)
478
+ end
479
+ end
480
+ end
481
+
482
+ module Encoding
483
+ F32_MAX = 3.402823466e+38
484
+ F32_MIN = -3.402823466e+38
485
+
486
+ module_function
487
+
488
+ def to_f32(value)
489
+ return Float::NAN if value.respond_to?(:nan?) && value.nan?
490
+ return Float::INFINITY if value > F32_MAX
491
+ return -Float::INFINITY if value < F32_MIN
492
+ value.to_f
493
+ end
494
+
495
+ def pack_embedding_safely(embedding)
496
+ embedding.pack("e*")
497
+ rescue RangeError
498
+ embedding.map { |value| to_f32(value) }.pack("e*")
499
+ end
500
+
501
+ def embeddings_to_base64_strings(embeddings)
502
+ return nil if embeddings.nil?
503
+ embeddings.map do |embedding|
504
+ next nil if embedding.nil?
505
+ Base64.strict_encode64(pack_embedding_safely(embedding))
506
+ end
507
+ end
508
+
509
+ def base64_strings_to_embeddings(b64_strings)
510
+ return nil if b64_strings.nil?
511
+ b64_strings.map do |b64|
512
+ next nil if b64.nil?
513
+ bytes = Base64.decode64(b64)
514
+ bytes.unpack("e*")
515
+ end
516
+ end
517
+ end
518
+ end
519
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "types/sparse_vector"
4
+ require_relative "types/validation"
5
+ require_relative "types/results"
6
+
7
+ module Chroma
8
+ module Types
9
+ TYPE_KEY = "#type"
10
+ SPARSE_VECTOR_TYPE_VALUE = "sparse_vector"
11
+ META_KEY_CHROMA_DOCUMENT = "chroma:document"
12
+ end
13
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ VERSION = "0.1.0"
5
+ end
data/lib/chromadb.rb ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "base64"
5
+
6
+ require_relative "chromadb/version"
7
+ require_relative "chromadb/errors"
8
+ require_relative "chromadb/types"
9
+ require_relative "chromadb/schema"
10
+ require_relative "chromadb/search"
11
+ require_relative "chromadb/embedding_functions"
12
+ require_relative "chromadb/http_client"
13
+ require_relative "chromadb/client"
14
+ require_relative "chromadb/admin_client"
15
+ require_relative "chromadb/collection"