chromadb-experimental 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/chromadb/admin_client.rb +6 -0
- data/lib/chromadb/client.rb +317 -0
- data/lib/chromadb/collection.rb +573 -0
- data/lib/chromadb/embedding_functions/chroma_bm25.rb +459 -0
- data/lib/chromadb/embedding_functions/chroma_cloud_qwen.rb +139 -0
- data/lib/chromadb/embedding_functions/chroma_cloud_splade.rb +121 -0
- data/lib/chromadb/embedding_functions.rb +121 -0
- data/lib/chromadb/errors.rb +120 -0
- data/lib/chromadb/http_client.rb +142 -0
- data/lib/chromadb/openapi/lib/chromadb/api/default_api.rb +2349 -0
- data/lib/chromadb/openapi/lib/chromadb/api_client.rb +392 -0
- data/lib/chromadb/openapi/lib/chromadb/api_error.rb +58 -0
- data/lib/chromadb/openapi/lib/chromadb/configuration.rb +295 -0
- data/lib/chromadb/openapi/lib/chromadb/models/add_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attach_function_request.rb +250 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attach_function_response.rb +235 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attached_function_api_response.rb +361 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attached_function_info.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/bool_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/bool_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/checklist_response.rb +245 -0
- data/lib/chromadb/openapi/lib/chromadb/models/collection.rb +315 -0
- data/lib/chromadb/openapi/lib/chromadb/models/collection_configuration.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_collection_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_database_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_tenant_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/database.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/detach_function_request.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/detach_function_response.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/embedding_function_new_configuration.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/error_response.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_list_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/fork_collection_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/fts_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_attached_function_response.rb +224 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_response.rb +270 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_tenant_response.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_user_identity_response.rb +246 -0
- data/lib/chromadb/openapi/lib/chromadb/models/heartbeat_response.rb +235 -0
- data/lib/chromadb/openapi/lib/chromadb/models/hnsw_configuration.rb +330 -0
- data/lib/chromadb/openapi/lib/chromadb/models/hnsw_index_config.rb +371 -0
- data/lib/chromadb/openapi/lib/chromadb/models/include.rb +210 -0
- data/lib/chromadb/openapi/lib/chromadb/models/int_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/int_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/query_response.rb +280 -0
- data/lib/chromadb/openapi/lib/chromadb/models/raw_where_fields.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/schema.rb +258 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload.rb +256 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_filter.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_group_by.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_limit.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_select.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_request_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_response.rb +270 -0
- data/lib/chromadb/openapi/lib/chromadb/models/space.rb +210 -0
- data/lib/chromadb/openapi/lib/chromadb/models/spann_configuration.rb +420 -0
- data/lib/chromadb/openapi/lib/chromadb/models/spann_index_config.rb +536 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector.rb +244 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_config.rb +242 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_type.rb +234 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/string_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/string_value_type.rb +231 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_configuration.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_payload.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_hnsw_configuration.rb +345 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_spann_configuration.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_tenant_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/upsert_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/value_types.rb +271 -0
- data/lib/chromadb/openapi/lib/chromadb/models/vector_index_config.rb +261 -0
- data/lib/chromadb/openapi/lib/chromadb/models/vector_index_type.rb +234 -0
- data/lib/chromadb/openapi/lib/chromadb/version.rb +15 -0
- data/lib/chromadb/openapi/lib/chromadb.rb +102 -0
- data/lib/chromadb/openapi.rb +6 -0
- data/lib/chromadb/schema.rb +744 -0
- data/lib/chromadb/schemas/chroma-cloud-qwen.json +61 -0
- data/lib/chromadb/schemas/chroma-cloud-splade.json +31 -0
- data/lib/chromadb/schemas/chroma_bm25.json +37 -0
- data/lib/chromadb/search/key.rb +94 -0
- data/lib/chromadb/search/limit.rb +41 -0
- data/lib/chromadb/search/rank.rb +425 -0
- data/lib/chromadb/search/search.rb +73 -0
- data/lib/chromadb/search/select.rb +54 -0
- data/lib/chromadb/search/where.rb +157 -0
- data/lib/chromadb/search.rb +8 -0
- data/lib/chromadb/types/results.rb +96 -0
- data/lib/chromadb/types/sparse_vector.rb +86 -0
- data/lib/chromadb/types/validation.rb +519 -0
- data/lib/chromadb/types.rb +13 -0
- data/lib/chromadb/version.rb +5 -0
- data/lib/chromadb.rb +15 -0
- metadata +233 -0
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Chroma
|
|
6
|
+
module Types
|
|
7
|
+
module Validation
|
|
8
|
+
INCLUDE_OPTIONS = %w[documents embeddings metadatas distances uris data].freeze
|
|
9
|
+
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
def maybe_cast_one_to_many(target)
|
|
13
|
+
return nil if target.nil?
|
|
14
|
+
return target if target.is_a?(Array)
|
|
15
|
+
[ target ]
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def normalize_embeddings(target)
|
|
19
|
+
return nil if target.nil?
|
|
20
|
+
return [ target ] if target.is_a?(Array) && !target.empty? && target.all? { |v| v.is_a?(Numeric) }
|
|
21
|
+
return target if target.is_a?(Array)
|
|
22
|
+
|
|
23
|
+
raise ArgumentError, "Expected embeddings to be an Array, got #{target.class}"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def normalize_metadata(metadata)
|
|
27
|
+
return nil if metadata.nil?
|
|
28
|
+
unless metadata.is_a?(Hash)
|
|
29
|
+
raise ArgumentError, "Expected metadata to be a Hash, got #{metadata.class}"
|
|
30
|
+
end
|
|
31
|
+
normalized = {}
|
|
32
|
+
metadata.each do |key, value|
|
|
33
|
+
if value.is_a?(Hash) && value[TYPE_KEY] == SPARSE_VECTOR_TYPE_VALUE
|
|
34
|
+
normalized[key] = SparseVector.from_h(value)
|
|
35
|
+
else
|
|
36
|
+
normalized[key] = value
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
normalized
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def normalize_metadatas(metadatas)
|
|
43
|
+
return nil if metadatas.nil?
|
|
44
|
+
return [ normalize_metadata(metadatas) ] if metadatas.is_a?(Hash)
|
|
45
|
+
unless metadatas.is_a?(Array)
|
|
46
|
+
raise ArgumentError, "Expected metadatas to be an Array, got #{metadatas.class}"
|
|
47
|
+
end
|
|
48
|
+
metadatas.map { |metadata| normalize_metadata(metadata) }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def serialize_metadata(metadata)
|
|
52
|
+
return nil if metadata.nil?
|
|
53
|
+
unless metadata.is_a?(Hash)
|
|
54
|
+
raise ArgumentError, "Expected metadata to be a Hash, got #{metadata.class}"
|
|
55
|
+
end
|
|
56
|
+
serialized = {}
|
|
57
|
+
metadata.each do |key, value|
|
|
58
|
+
if value.is_a?(SparseVector)
|
|
59
|
+
serialized[key] = value.to_h
|
|
60
|
+
else
|
|
61
|
+
serialized[key] = value
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
serialized
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def serialize_metadatas(metadatas)
|
|
68
|
+
return nil if metadatas.nil?
|
|
69
|
+
unless metadatas.is_a?(Array)
|
|
70
|
+
raise ArgumentError, "Expected metadatas to be an Array, got #{metadatas.class}"
|
|
71
|
+
end
|
|
72
|
+
metadatas.map { |metadata| metadata.nil? ? nil : serialize_metadata(metadata) }
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def deserialize_metadata(metadata)
|
|
76
|
+
return nil if metadata.nil?
|
|
77
|
+
normalize_metadata(metadata)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def deserialize_metadatas(metadatas)
|
|
81
|
+
return nil if metadatas.nil?
|
|
82
|
+
normalize_metadatas(metadatas)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def validate_ids(ids)
|
|
86
|
+
unless ids.is_a?(Array)
|
|
87
|
+
raise ArgumentError, "Expected IDs to be a list, got #{ids.class} as IDs"
|
|
88
|
+
end
|
|
89
|
+
raise ArgumentError, "Expected IDs to be a non-empty list, got #{ids.length} IDs" if ids.empty?
|
|
90
|
+
|
|
91
|
+
seen = {}
|
|
92
|
+
dups = Set.new
|
|
93
|
+
ids.each do |id|
|
|
94
|
+
raise ArgumentError, "Expected ID to be a String, got #{id.inspect}" unless id.is_a?(String)
|
|
95
|
+
if seen[id]
|
|
96
|
+
dups.add(id)
|
|
97
|
+
else
|
|
98
|
+
seen[id] = true
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
return ids if dups.empty?
|
|
103
|
+
|
|
104
|
+
n_dups = dups.length
|
|
105
|
+
if n_dups < 10
|
|
106
|
+
example_string = dups.to_a.join(", ")
|
|
107
|
+
message = "Expected IDs to be unique, found duplicates of: #{example_string}"
|
|
108
|
+
else
|
|
109
|
+
examples = dups.to_a
|
|
110
|
+
example_string = "#{examples.first(5).join(', ')}, ..., #{examples.last(5).join(', ')}"
|
|
111
|
+
message = "Expected IDs to be unique, found #{n_dups} duplicated IDs: #{example_string}"
|
|
112
|
+
end
|
|
113
|
+
raise Chroma::DuplicateIDError, message
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def validate_metadata(metadata)
|
|
117
|
+
return metadata if metadata.nil?
|
|
118
|
+
unless metadata.is_a?(Hash)
|
|
119
|
+
raise ArgumentError, "Expected metadata to be a Hash or nil, got #{metadata.class} as metadata"
|
|
120
|
+
end
|
|
121
|
+
raise ArgumentError, "Expected metadata to be a non-empty Hash, got #{metadata.length} metadata attributes" if metadata.empty?
|
|
122
|
+
|
|
123
|
+
metadata.each do |key, value|
|
|
124
|
+
if key == META_KEY_CHROMA_DOCUMENT
|
|
125
|
+
raise ArgumentError, "Expected metadata to not contain the reserved key #{META_KEY_CHROMA_DOCUMENT}"
|
|
126
|
+
end
|
|
127
|
+
raise TypeError, "Expected metadata key to be a String, got #{key.inspect}" unless key.is_a?(String)
|
|
128
|
+
if value.is_a?(SparseVector)
|
|
129
|
+
next
|
|
130
|
+
end
|
|
131
|
+
unless value.nil? || value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
|
132
|
+
raise ArgumentError,
|
|
133
|
+
"Expected metadata value to be a String, Numeric, Boolean, SparseVector, or nil, got #{value.inspect} which is a #{value.class}"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
metadata
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def validate_update_metadata(metadata)
|
|
140
|
+
return metadata if metadata.nil?
|
|
141
|
+
unless metadata.is_a?(Hash)
|
|
142
|
+
raise ArgumentError, "Expected metadata to be a Hash or nil, got #{metadata.class}"
|
|
143
|
+
end
|
|
144
|
+
raise ArgumentError, "Expected metadata to be a non-empty Hash, got #{metadata.inspect}" if metadata.empty?
|
|
145
|
+
|
|
146
|
+
metadata.each do |key, value|
|
|
147
|
+
raise ArgumentError, "Expected metadata key to be a String, got #{key.inspect}" unless key.is_a?(String)
|
|
148
|
+
if value.is_a?(SparseVector)
|
|
149
|
+
next
|
|
150
|
+
end
|
|
151
|
+
unless value.nil? || value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
|
152
|
+
raise ArgumentError,
|
|
153
|
+
"Expected metadata value to be a String, Numeric, Boolean, SparseVector, or nil, got #{value.inspect}"
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
metadata
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def validate_metadatas(metadatas)
|
|
160
|
+
unless metadatas.is_a?(Array)
|
|
161
|
+
raise ArgumentError, "Expected metadatas to be a list, got #{metadatas.inspect}"
|
|
162
|
+
end
|
|
163
|
+
metadatas.each { |metadata| validate_metadata(metadata) }
|
|
164
|
+
metadatas
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def validate_where(where)
|
|
168
|
+
unless where.is_a?(Hash)
|
|
169
|
+
raise ArgumentError, "Expected where to be a Hash, got #{where.inspect}"
|
|
170
|
+
end
|
|
171
|
+
raise ArgumentError, "Expected where to have exactly one operator, got #{where.inspect}" if where.length != 1
|
|
172
|
+
|
|
173
|
+
where.each do |key, value|
|
|
174
|
+
raise ArgumentError, "Expected where key to be a String, got #{key.inspect}" unless key.is_a?(String)
|
|
175
|
+
|
|
176
|
+
if [ "$and", "$or", "$in", "$nin" ].include?(key)
|
|
177
|
+
# handled below
|
|
178
|
+
elsif !(value.is_a?(String) || value.is_a?(Numeric) || value.is_a?(Hash))
|
|
179
|
+
raise ArgumentError,
|
|
180
|
+
"Expected where value to be a String, Numeric, or operator expression, got #{value.inspect}"
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
if key == "$and" || key == "$or"
|
|
184
|
+
unless value.is_a?(Array)
|
|
185
|
+
raise ArgumentError,
|
|
186
|
+
"Expected where value for #{key} to be a list of where expressions, got #{value.inspect}"
|
|
187
|
+
end
|
|
188
|
+
if value.length <= 1
|
|
189
|
+
raise ArgumentError,
|
|
190
|
+
"Expected where value for #{key} to be a list with at least two expressions, got #{value.inspect}"
|
|
191
|
+
end
|
|
192
|
+
value.each { |expr| validate_where(expr) }
|
|
193
|
+
elsif value.is_a?(Hash)
|
|
194
|
+
if value.length != 1
|
|
195
|
+
raise ArgumentError,
|
|
196
|
+
"Expected where operator expression to have exactly one operator, got #{value.inspect}"
|
|
197
|
+
end
|
|
198
|
+
operator, operand = value.first
|
|
199
|
+
unless [ "$eq", "$ne", "$gt", "$gte", "$lt", "$lte", "$in", "$nin", "$contains", "$not_contains", "$regex", "$not_regex" ].include?(operator)
|
|
200
|
+
raise ArgumentError,
|
|
201
|
+
"Expected where operator to be one of $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $contains, $not_contains, $regex, $not_regex, got #{operator}"
|
|
202
|
+
end
|
|
203
|
+
if [ "$in", "$nin" ].include?(operator)
|
|
204
|
+
unless operand.is_a?(Array)
|
|
205
|
+
raise ArgumentError,
|
|
206
|
+
"Expected where operand for #{operator} to be a list, got #{operand.inspect}"
|
|
207
|
+
end
|
|
208
|
+
if operand.empty?
|
|
209
|
+
raise ArgumentError,
|
|
210
|
+
"Expected where operand for #{operator} to be a non-empty list"
|
|
211
|
+
end
|
|
212
|
+
operand.each do |item|
|
|
213
|
+
unless item.is_a?(String) || item.is_a?(Numeric) || item.is_a?(TrueClass) || item.is_a?(FalseClass)
|
|
214
|
+
raise ArgumentError,
|
|
215
|
+
"Expected where list items to be String, Numeric, or Boolean, got #{item.inspect}"
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
elsif [ "$contains", "$not_contains", "$regex", "$not_regex" ].include?(operator)
|
|
219
|
+
unless operand.is_a?(String)
|
|
220
|
+
raise ArgumentError,
|
|
221
|
+
"Expected where operand for #{operator} to be a String, got #{operand.inspect}"
|
|
222
|
+
end
|
|
223
|
+
raise ArgumentError, "Expected where operand for #{operator} to be a non-empty String" if operand.empty?
|
|
224
|
+
else
|
|
225
|
+
unless operand.is_a?(String) || operand.is_a?(Numeric) || operand.is_a?(TrueClass) || operand.is_a?(FalseClass)
|
|
226
|
+
raise ArgumentError,
|
|
227
|
+
"Expected where operand for #{operator} to be String, Numeric, or Boolean, got #{operand.inspect}"
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def validate_where_document(where_document)
|
|
235
|
+
unless where_document.is_a?(Hash)
|
|
236
|
+
raise ArgumentError, "Expected where document to be a Hash, got #{where_document.inspect}"
|
|
237
|
+
end
|
|
238
|
+
raise ArgumentError, "Expected where document to have exactly one operator, got #{where_document.inspect}" if where_document.length != 1
|
|
239
|
+
|
|
240
|
+
where_document.each do |operator, operand|
|
|
241
|
+
raise ArgumentError, "Expected where document key to be a String, got #{operator.inspect}" unless operator.is_a?(String)
|
|
242
|
+
|
|
243
|
+
unless [ "$contains", "$not_contains", "$regex", "$not_regex", "$and", "$or" ].include?(operator)
|
|
244
|
+
raise ArgumentError,
|
|
245
|
+
"Expected where document operator to be one of $contains, $not_contains, $regex, $not_regex, $and, $or, got #{operator}"
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
if operator == "$and" || operator == "$or"
|
|
249
|
+
unless operand.is_a?(Array)
|
|
250
|
+
raise ArgumentError,
|
|
251
|
+
"Expected document value for #{operator} to be a list of where document expressions, got #{operand.inspect}"
|
|
252
|
+
end
|
|
253
|
+
if operand.length <= 1
|
|
254
|
+
raise ArgumentError,
|
|
255
|
+
"Expected document value for #{operator} to be a list with at least two where document expressions, got #{operand.inspect}"
|
|
256
|
+
end
|
|
257
|
+
operand.each { |expr| validate_where_document(expr) }
|
|
258
|
+
else
|
|
259
|
+
unless operand.is_a?(String)
|
|
260
|
+
raise ArgumentError,
|
|
261
|
+
"Expected where document operand value for operator #{operator} to be a String, got #{operand.inspect}"
|
|
262
|
+
end
|
|
263
|
+
raise ArgumentError,
|
|
264
|
+
"Expected where document operand value for operator #{operator} to be a non-empty String" if operand.empty?
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def validate_include(include, disallowed: nil)
|
|
270
|
+
unless include.is_a?(Array)
|
|
271
|
+
raise ArgumentError, "Expected include to be a list, got #{include.inspect}"
|
|
272
|
+
end
|
|
273
|
+
include.each do |item|
|
|
274
|
+
unless item.is_a?(String)
|
|
275
|
+
raise ArgumentError, "Expected include item to be a String, got #{item.inspect}"
|
|
276
|
+
end
|
|
277
|
+
unless INCLUDE_OPTIONS.include?(item)
|
|
278
|
+
raise ArgumentError, "Expected include item to be one of #{INCLUDE_OPTIONS.join(', ')}, got #{item}"
|
|
279
|
+
end
|
|
280
|
+
if disallowed && disallowed.include?(item)
|
|
281
|
+
raise ArgumentError, "Include item cannot be one of #{disallowed.join(', ')}, got #{item}"
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def validate_n_results(n_results)
|
|
287
|
+
unless n_results.is_a?(Integer)
|
|
288
|
+
raise ArgumentError, "Expected requested number of results to be an Integer, got #{n_results.inspect}"
|
|
289
|
+
end
|
|
290
|
+
if n_results <= 0
|
|
291
|
+
raise ArgumentError, "Number of requested results #{n_results} cannot be negative or zero."
|
|
292
|
+
end
|
|
293
|
+
n_results
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def validate_embeddings(embeddings)
|
|
297
|
+
unless embeddings.is_a?(Array)
|
|
298
|
+
raise ArgumentError, "Expected embeddings to be a list, got #{embeddings.class}"
|
|
299
|
+
end
|
|
300
|
+
raise ArgumentError,
|
|
301
|
+
"Expected embeddings to be a list with at least one item, got #{embeddings.length} embeddings" if embeddings.empty?
|
|
302
|
+
|
|
303
|
+
embeddings.each_with_index do |embedding, idx|
|
|
304
|
+
unless embedding.is_a?(Array)
|
|
305
|
+
raise ArgumentError,
|
|
306
|
+
"Expected embedding at position #{idx} to be an Array, got #{embedding.class}"
|
|
307
|
+
end
|
|
308
|
+
raise ArgumentError, "Expected embedding at position #{idx} to be non-empty" if embedding.empty?
|
|
309
|
+
embedding.each do |value|
|
|
310
|
+
unless value.is_a?(Numeric)
|
|
311
|
+
raise ArgumentError,
|
|
312
|
+
"Expected each value in the embedding to be Numeric, got #{value.inspect}"
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
embeddings
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def validate_sparse_vectors(vectors)
|
|
320
|
+
unless vectors.is_a?(Array)
|
|
321
|
+
raise ArgumentError,
|
|
322
|
+
"Expected sparse vectors to be a list, got #{vectors.class}"
|
|
323
|
+
end
|
|
324
|
+
raise ArgumentError,
|
|
325
|
+
"Expected sparse vectors to be a non-empty list, got #{vectors.length} sparse vectors" if vectors.empty?
|
|
326
|
+
vectors.each_with_index do |vector, i|
|
|
327
|
+
unless vector.is_a?(SparseVector)
|
|
328
|
+
raise ArgumentError,
|
|
329
|
+
"Expected SparseVector instance at position #{i}, got #{vector.class}"
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
vectors
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def validate_documents(documents, nullable: false)
|
|
336
|
+
unless documents.is_a?(Array)
|
|
337
|
+
raise ArgumentError, "Expected documents to be a list, got #{documents.class}"
|
|
338
|
+
end
|
|
339
|
+
raise ArgumentError,
|
|
340
|
+
"Expected documents to be a non-empty list, got #{documents.length} documents" if documents.empty?
|
|
341
|
+
documents.each do |doc|
|
|
342
|
+
next if nullable && doc.nil?
|
|
343
|
+
unless doc.is_a?(String)
|
|
344
|
+
raise ArgumentError, "Expected document to be a String, got #{doc.inspect}"
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def validate_images(images)
|
|
350
|
+
unless images.is_a?(Array)
|
|
351
|
+
raise ArgumentError, "Expected images to be a list, got #{images.class}"
|
|
352
|
+
end
|
|
353
|
+
raise ArgumentError,
|
|
354
|
+
"Expected images to be a non-empty list, got #{images.length} images" if images.empty?
|
|
355
|
+
images.each do |img|
|
|
356
|
+
unless img.is_a?(Array)
|
|
357
|
+
raise ArgumentError, "Expected image to be an Array, got #{img.inspect}"
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
def validate_base_record_set(record_set)
|
|
363
|
+
validate_record_set_length_consistency(record_set)
|
|
364
|
+
|
|
365
|
+
validate_embeddings(record_set[:embeddings]) if record_set[:embeddings]
|
|
366
|
+
validate_documents(record_set[:documents], nullable: !record_set[:embeddings].nil?) if record_set[:documents]
|
|
367
|
+
validate_images(record_set[:images]) if record_set[:images]
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def validate_insert_record_set(record_set)
|
|
371
|
+
validate_record_set_length_consistency(record_set)
|
|
372
|
+
validate_base_record_set(record_set)
|
|
373
|
+
|
|
374
|
+
validate_ids(record_set[:ids])
|
|
375
|
+
validate_metadatas(record_set[:metadatas]) if record_set[:metadatas]
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def validate_record_set_length_consistency(record_set)
|
|
379
|
+
lengths = record_set.values.compact.map(&:length)
|
|
380
|
+
if lengths.empty?
|
|
381
|
+
raise ArgumentError,
|
|
382
|
+
"At least one of #{record_set.keys.join(', ')} must be provided"
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
zero_lengths = record_set.select { |_k, v| !v.nil? && v.length == 0 }.keys
|
|
386
|
+
raise ArgumentError, "Non-empty lists are required for #{zero_lengths}" unless zero_lengths.empty?
|
|
387
|
+
|
|
388
|
+
if lengths.uniq.length > 1
|
|
389
|
+
error_str = record_set.filter { |_k, v| !v.nil? }.map { |k, v| "#{k}: #{v.length}" }.join(", ")
|
|
390
|
+
raise ArgumentError, "Unequal lengths for fields: #{error_str}"
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def validate_record_set_for_embedding(record_set, embeddable_fields: nil)
|
|
395
|
+
raise ArgumentError, "Attempting to embed a record that already has embeddings." if record_set[:embeddings]
|
|
396
|
+
embeddable_fields ||= default_embeddable_record_set_fields
|
|
397
|
+
validate_record_set_contains_one(record_set, embeddable_fields)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def validate_record_set_contains_any(record_set, contains_any)
|
|
401
|
+
validate_record_set_contains(record_set, contains_any)
|
|
402
|
+
unless contains_any.any? { |field| !record_set[field].nil? }
|
|
403
|
+
raise ArgumentError, "At least one of #{contains_any.join(', ')} must be provided"
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
def validate_record_set_contains_one(record_set, contains_one)
|
|
408
|
+
validate_record_set_contains(record_set, contains_one)
|
|
409
|
+
count = contains_one.count { |field| !record_set[field].nil? }
|
|
410
|
+
unless count == 1
|
|
411
|
+
raise ArgumentError, "Exactly one of #{contains_one.join(', ')} must be provided"
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
def validate_record_set_contains(record_set, contains)
|
|
416
|
+
contains.each do |field|
|
|
417
|
+
next if record_set.key?(field)
|
|
418
|
+
raise ArgumentError,
|
|
419
|
+
"Invalid field in contains: #{contains.join(', ')}, available fields: #{record_set.keys.join(', ')}"
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def default_embeddable_record_set_fields
|
|
424
|
+
%i[documents images uris].freeze
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def normalize_base_record_set(embeddings: nil, documents: nil, images: nil, uris: nil)
|
|
428
|
+
{
|
|
429
|
+
embeddings: normalize_embeddings(embeddings),
|
|
430
|
+
documents: maybe_cast_one_to_many(documents),
|
|
431
|
+
images: maybe_cast_one_to_many(images),
|
|
432
|
+
uris: maybe_cast_one_to_many(uris)
|
|
433
|
+
}
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def normalize_insert_record_set(ids:, embeddings:, metadatas: nil, documents: nil, images: nil, uris: nil)
|
|
437
|
+
base_record_set = normalize_base_record_set(
|
|
438
|
+
embeddings: embeddings,
|
|
439
|
+
documents: documents,
|
|
440
|
+
images: images,
|
|
441
|
+
uris: uris,
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
{
|
|
445
|
+
ids: maybe_cast_one_to_many(ids),
|
|
446
|
+
metadatas: normalize_metadatas(metadatas),
|
|
447
|
+
embeddings: base_record_set[:embeddings],
|
|
448
|
+
documents: base_record_set[:documents],
|
|
449
|
+
images: base_record_set[:images],
|
|
450
|
+
uris: base_record_set[:uris]
|
|
451
|
+
}
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
def validate_batch(batch, limits)
|
|
455
|
+
ids = batch[0]
|
|
456
|
+
max_batch = limits[:max_batch_size]
|
|
457
|
+
if ids.length > max_batch
|
|
458
|
+
raise ArgumentError,
|
|
459
|
+
"Batch size #{ids.length} exceeds maximum batch size #{max_batch}"
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
def normalize_sparse_vector(indices:, values:, labels: nil)
|
|
464
|
+
return SparseVector.new(indices: [], values: [], labels: nil) if indices.empty?
|
|
465
|
+
|
|
466
|
+
if labels
|
|
467
|
+
triples = indices.zip(values, labels).sort_by { |pair| pair[0] }
|
|
468
|
+
sorted_indices, sorted_values, sorted_labels = triples.transpose
|
|
469
|
+
SparseVector.new(
|
|
470
|
+
indices: sorted_indices,
|
|
471
|
+
values: sorted_values,
|
|
472
|
+
labels: sorted_labels,
|
|
473
|
+
)
|
|
474
|
+
else
|
|
475
|
+
pairs = indices.zip(values).sort_by { |pair| pair[0] }
|
|
476
|
+
sorted_indices, sorted_values = pairs.transpose
|
|
477
|
+
SparseVector.new(indices: sorted_indices, values: sorted_values, labels: nil)
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
module Encoding
|
|
483
|
+
F32_MAX = 3.402823466e+38
|
|
484
|
+
F32_MIN = -3.402823466e+38
|
|
485
|
+
|
|
486
|
+
module_function
|
|
487
|
+
|
|
488
|
+
def to_f32(value)
|
|
489
|
+
return Float::NAN if value.respond_to?(:nan?) && value.nan?
|
|
490
|
+
return Float::INFINITY if value > F32_MAX
|
|
491
|
+
return -Float::INFINITY if value < F32_MIN
|
|
492
|
+
value.to_f
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
def pack_embedding_safely(embedding)
|
|
496
|
+
embedding.pack("e*")
|
|
497
|
+
rescue RangeError
|
|
498
|
+
embedding.map { |value| to_f32(value) }.pack("e*")
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def embeddings_to_base64_strings(embeddings)
|
|
502
|
+
return nil if embeddings.nil?
|
|
503
|
+
embeddings.map do |embedding|
|
|
504
|
+
next nil if embedding.nil?
|
|
505
|
+
Base64.strict_encode64(pack_embedding_safely(embedding))
|
|
506
|
+
end
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
def base64_strings_to_embeddings(b64_strings)
|
|
510
|
+
return nil if b64_strings.nil?
|
|
511
|
+
b64_strings.map do |b64|
|
|
512
|
+
next nil if b64.nil?
|
|
513
|
+
bytes = Base64.decode64(b64)
|
|
514
|
+
bytes.unpack("e*")
|
|
515
|
+
end
|
|
516
|
+
end
|
|
517
|
+
end
|
|
518
|
+
end
|
|
519
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "types/sparse_vector"
|
|
4
|
+
require_relative "types/validation"
|
|
5
|
+
require_relative "types/results"
|
|
6
|
+
|
|
7
|
+
module Chroma
|
|
8
|
+
module Types
|
|
9
|
+
TYPE_KEY = "#type"
|
|
10
|
+
SPARSE_VECTOR_TYPE_VALUE = "sparse_vector"
|
|
11
|
+
META_KEY_CHROMA_DOCUMENT = "chroma:document"
|
|
12
|
+
end
|
|
13
|
+
end
|
data/lib/chromadb.rb
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "base64"
|
|
5
|
+
|
|
6
|
+
require_relative "chromadb/version"
|
|
7
|
+
require_relative "chromadb/errors"
|
|
8
|
+
require_relative "chromadb/types"
|
|
9
|
+
require_relative "chromadb/schema"
|
|
10
|
+
require_relative "chromadb/search"
|
|
11
|
+
require_relative "chromadb/embedding_functions"
|
|
12
|
+
require_relative "chromadb/http_client"
|
|
13
|
+
require_relative "chromadb/client"
|
|
14
|
+
require_relative "chromadb/admin_client"
|
|
15
|
+
require_relative "chromadb/collection"
|