chroma-db 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,130 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Chroma
4
- module APIOperations
5
- using RubyNext
6
-
7
- # Request's response Data object.
8
- #
9
- # status - HTTP status code. It is zero when a request fails due to network error.
10
- # body - Parsed JSON object or response body.
11
- # headers - HTTP response headers.
12
- # error - Exception or Net::HTTPResponse object if the response is not Net::HTTPSuccess
13
- #
14
- # NOTE: Not supported yet by Ruby Next
15
- # Response = Data.define(:status, :body, :headers, :error)
16
- Response = Struct.new("Response", :status, :body, :headers, :error)
17
-
18
- # Request module provides functionality to perform HTTP requests.
19
- module Request
20
- module ClassMethods
21
- include Dry::Monads[:result]
22
-
23
- # Execute an HTTP request and return a monad wrapping the response.
24
- #
25
- # method - The HTTP method to use (e.g. 'GET', 'POST'). Method must be a `Symbol`.
26
- # url - The URL to send the request to.
27
- # params - The query parameters or request body. Params needs to be in a form of a Hash.
28
- # options - Additional options to pass to the request.
29
- #
30
- # A `Dry::Monads::Result` monad wrapping the response, either a success or failure.
31
- # The response is a `Chroma::APIOperations::Response` Data object.
32
- #
33
- # Examples
34
- #
35
- # result = execute_request(:get, "https://example.com", {name: "test request"})
36
- # if result.success?
37
- # puts "Response status: #{result.success.status}"
38
- # puts "Response body: #{result.success.body}"
39
- # else
40
- # puts "Request failed with error: #{result.failure.error}"
41
- # end
42
- def execute_request(method, url, params = {}, options = {})
43
- uri = URI.parse(url)
44
-
45
- request = build_request(method, uri, params)
46
-
47
- use_ssl = options.delete(:use_ssl) || false
48
- response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: use_ssl) do |http|
49
- Chroma::Util.log_debug("Sending a request", {method: method, uri: uri, params: params})
50
- http.request(request)
51
- end
52
-
53
- build_response(response)
54
- rescue => ex
55
- build_response(ex)
56
- end
57
-
58
- private def build_response(response)
59
- case response
60
- in Net::HTTPSuccess => success_response
61
- Chroma::Util.log_info("Successful response", code: success_response.code)
62
-
63
- build_response_details(success_response)
64
- in Net::HTTPRedirection => redirect_response
65
- Chroma::Util.log_info("Server redirect response", code: redirect_response.code, location: redirect_response["location"])
66
-
67
- build_response_details(redirect_response)
68
- in Net::HTTPClientError => client_error_response
69
- Chroma::Util.log_error("Client error response", code: client_error_response.code, body: client_error_response.body)
70
-
71
- build_response_details(client_error_response)
72
- in Net::HTTPServerError => server_error_response
73
- Chroma::Util.log_error("Server error response", code: server_error_response.code)
74
-
75
- build_response_details(server_error_response, parse_body: false)
76
- else
77
- Chroma::Util.log_error("An error happened", error: response.to_s)
78
-
79
- build_response_details(response, exception: true, parse_body: false)
80
- end
81
- end
82
-
83
- private def build_response_details(response, exception: false, parse_body: true)
84
- response_data = Chroma::APIOperations::Response.new(
85
- exception ? 0 : response.code.to_i,
86
- if exception
87
- exception.to_s
88
- else
89
- (parse_body ? body_to_json(response.body) : response.body)
90
- end,
91
- exception ? {} : response.each_header.to_h,
92
- response.is_a?(Net::HTTPSuccess) ? nil : response
93
- )
94
-
95
- case response
96
- in Net::HTTPSuccess
97
- return Success(response_data)
98
- else
99
- return Failure(response_data)
100
- end
101
- end
102
-
103
- private def body_to_json(content)
104
- JSON.parse(content, symbolize_keys: true)
105
- rescue JSON::ParserError, TypeError
106
- content
107
- end
108
-
109
- private def build_request(method, uri, params)
110
- request = case method
111
- when :post then Net::HTTP::Post.new(uri)
112
- when :put then Net::HTTP::Put.new(uri)
113
- when :delete then Net::HTTP::Delete.new(uri)
114
- else
115
- Net::HTTP::Get.new(uri)
116
- end
117
-
118
- request.content_type = "application/json"
119
- request.body = params.to_json if params.size > 0
120
-
121
- request
122
- end
123
- end
124
-
125
- def self.included(base)
126
- base.extend(ClassMethods)
127
- end
128
- end
129
- end
130
- end
@@ -1,412 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Chroma
4
- using RubyNext
5
-
6
- module Resources
7
- # A Collection class represents a store for your embeddings, documents, and any additional metadata.
8
- # This class can be instantiated by receiving the collection's name and metadata hash.
9
- class Collection
10
- include Chroma::APIOperations::Request
11
-
12
- attr_reader :id
13
- attr_reader :name
14
- attr_reader :metadata
15
-
16
- def initialize(id:, name:, metadata: nil)
17
- @id = id
18
- @name = name
19
- @metadata = metadata
20
- end
21
-
22
- # Query the collection and return an array of embeddings.
23
- #
24
- # query_embeddings - An array of the embeddings to use for querying the collection.
25
- # results - The maximum number of results to return. 10 by default.
26
- # where - A Hash of additional conditions to filter the query results (optional).
27
- # where_document - A Hash of additional conditions to filter the associated documents (optional).
28
- # include - An Array of the additional information to include in the query results (optional). Metadatas,
29
- # documents, and distances by default.
30
- #
31
- # Examples
32
- #
33
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
34
- # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
35
- #
36
- # Return an Array of Embedding with query results.
37
- def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
38
- payload = {
39
- query_embeddings: query_embeddings,
40
- n_results: results,
41
- where: where,
42
- where_document: where_document,
43
- include: include
44
- }
45
-
46
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/query", payload)
47
-
48
- if result.success?
49
- build_embeddings_response(result.success.body)
50
- else
51
- self.class.raise_failure_error(result)
52
- end
53
- end
54
-
55
- # Get embeddings from the collection.
56
- #
57
- # ids - An Array of the specific embedding IDs to retrieve (optional).
58
- # where - A Hash of additional conditions to filter the query results (optional).
59
- # sort - The sorting criteria for the query results (optional).
60
- # limit - The maximum number of embeddings to retrieve (optional).
61
- # offset - The offset for pagination (optional).
62
- # page - The page number for pagination (optional).
63
- # page_size - The page size for pagination (optional).
64
- # where_document - A Hash of additional conditions to filter the associated documents (optional).
65
- # include - An Array of the additional information to include in the query results (optional). Metadatas,
66
- # and documents by default.
67
- #
68
- # Examples
69
- #
70
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
71
- # embeddings = collection.get([Array#sort, "Array#each"])
72
- #
73
- # Returns an Array of Embeddings
74
- def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
75
- if !page.nil? && !page_size.nil?
76
- offset = (page - 1) * page_size
77
- limit = page_size
78
- end
79
-
80
- payload = {
81
- ids: ids,
82
- where: where,
83
- sort: sort,
84
- limit: limit,
85
- offset: offset,
86
- where_document: where_document,
87
- include: include
88
- }
89
-
90
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/get", payload)
91
-
92
- if result.success?
93
- build_embeddings_response(result.success.body)
94
- else
95
- self.class.raise_failure_error(result)
96
- end
97
- end
98
-
99
- # Add one or many embeddings to the collection.
100
- #
101
- # embeddings - An Array of Embeddings or one Embedding to add.
102
- #
103
- # Examples
104
- #
105
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
106
- # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
107
- #
108
- # Returns true with success or raises a Chroma::Error on failure.
109
- def add(embeddings = [])
110
- embeddings_array = Array(embeddings)
111
- return false if embeddings_array.size == 0
112
-
113
- payload = build_embeddings_payload(embeddings_array)
114
-
115
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/add", payload)
116
-
117
- return true if result.success?
118
-
119
- self.class.raise_failure_error(result)
120
- end
121
-
122
- # Delete embeddings from the collection.
123
- #
124
- # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
125
- # where [Hash] Additional conditions to filter the embeddings to delete (optional).
126
- # where_document [Hash] Additional conditions to filter the associated documents (optional).
127
- #
128
- # Examples
129
- #
130
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
131
- # collection.delete(["Array#fetch", "Array#sort"])
132
- #
133
- # Returns an Array of deleted global ids.
134
- def delete(ids: nil, where: {}, where_document: {})
135
- payload = {
136
- ids: ids,
137
- where: where,
138
- where_document: where_document
139
- }
140
-
141
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/delete", payload)
142
-
143
- return result.success.body if result.success?
144
-
145
- self.class.raise_failure_error(result)
146
- end
147
-
148
- # Update one or many embeddings to the collection.
149
- #
150
- # embeddings - An Array of Embeddings or one Embedding to add.
151
- #
152
- # Examples
153
- #
154
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
155
- # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
156
- #
157
- # Returns true with success or raises a Chroma::Error on failure.
158
- def update(embeddings = [])
159
- embeddings_array = Array(embeddings)
160
- return false if embeddings_array.size == 0
161
-
162
- payload = build_embeddings_payload(embeddings_array)
163
- payload.delete(:increment_index)
164
-
165
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/update", payload)
166
-
167
- return true if result.success?
168
-
169
- self.class.raise_failure_error(result)
170
- end
171
-
172
- # Upsert (insert or update) one or many embeddings to the collection.
173
- #
174
- # embeddings - An Array of Embeddings or one Embedding to add.
175
- #
176
- # Examples
177
- #
178
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
179
- # embeddings = [
180
- # Embedding.new(id: "Array#fetch", embeddings: [9.8, 2.3, 2.9], metadata: {url: "https://..."}),
181
- # Embedding.new(id: "Array#select", embeddings: [5.6, 3.1, 4.7], metadata: {url: "https://..."})
182
- # ]
183
- # collection.upsert()
184
- #
185
- # Returns true with success or raises a Chroma::Error on failure.
186
- def upsert(embeddings = [])
187
- embeddings_array = Array(embeddings)
188
- return false if embeddings_array.size == 0
189
-
190
- payload = build_embeddings_payload(embeddings_array)
191
-
192
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/upsert", payload)
193
-
194
- return true if result.success?
195
-
196
- self.class.raise_failure_error(result)
197
- end
198
-
199
- # Count the number of embeddings in a collection.
200
- #
201
- # Examples
202
- #
203
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
204
- # collection.count
205
- #
206
- # Returns the count of embeddings in the collection.
207
- def count
208
- result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{id}/count")
209
-
210
- return result.success.body if result.success?
211
-
212
- self.class.raise_failure_error(result)
213
- end
214
-
215
- # Modify the name and metadata of the current collection.
216
- #
217
- # new_name - The new name for the collection.
218
- # new_metadata - The new metadata hash for the collection.
219
- #
220
- # Examples:
221
- #
222
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
223
- # collection.modify("ruby-3.2-documentation")
224
- #
225
- # Returns nothing.
226
- def modify(new_name, new_metadata: {})
227
- payload = {new_name: new_name}
228
- payload[:new_metadata] = new_metadata if new_metadata.any?
229
-
230
- result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{id}", payload)
231
-
232
- if result.success?
233
- @name = new_name
234
- @metadata = new_metadata
235
- else
236
- self.class.raise_failure_error(result)
237
- end
238
- end
239
-
240
- # Creates an index for the collection.
241
- #
242
- # Examples:
243
- #
244
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
245
- # collection.create_index
246
- #
247
- # Returns true on success or raise a Chroma::Error on failure.
248
- def create_index
249
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/create_index")
250
-
251
- return true if result.success?
252
-
253
- self.class.raise_failure_error(result)
254
- end
255
-
256
- # Create a new collection on the database.
257
- #
258
- # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
259
- # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
260
- # contains no two consecutive periods
261
- # metadata - A hash of additional metadata associated with the collection.
262
- #
263
- # Examples
264
- #
265
- # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
266
- #
267
- # Returns the created collection object.
268
- def self.create(name, metadata = nil)
269
- payload = {name: name, metadata: metadata, get_or_create: false}
270
-
271
- result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
272
-
273
- if result.success?
274
- data = result.success.body
275
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
276
- else
277
- raise_failure_error(result)
278
- end
279
- end
280
-
281
- # Retrieves a collection from the database.
282
- #
283
- # name - The name of the collection to retrieve.
284
- #
285
- # Examples
286
- #
287
- # collection = Chroma::Resources::Colection.get("ruby-documentation")
288
- #
289
- # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
290
- def self.get(name)
291
- result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
292
-
293
- if result.success?
294
- data = result.success.body
295
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
296
- else
297
- raise_failure_error(result)
298
- end
299
- end
300
-
301
- # Get or create a collection on the database.
302
- #
303
- # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
304
- # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
305
- # contains no two consecutive periods
306
- # metadata - A hash of additional metadata associated with the collection, this is used if collection is created.
307
- #
308
- # Examples
309
- #
310
- # collection = Chorma::Resources::Collection.get_or_create("ruby-documentation", {source: "Ruby lang website"})
311
- #
312
- # Returns the created collection object.
313
- def self.get_or_create(name, metadata = nil)
314
- payload = {name: name, metadata: metadata, get_or_create: true}
315
-
316
- result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
317
-
318
- if result.success?
319
- data = result.success.body
320
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
321
- else
322
- raise_failure_error(result)
323
- end
324
- end
325
-
326
- # Retrieves all collections in the database.
327
- #
328
- # Examples
329
- #
330
- # collections = Chroma::Resources::Collection.list
331
- #
332
- # Returns An array of all collections in the database.
333
- def self.list
334
- result = execute_request(:get, "#{Chroma.api_url}/collections")
335
-
336
- if result.success?
337
- data = result.success.body
338
- data.map { |item| new(id: item["id"], name: item["name"], metadata: item["metadata"]) }
339
- else
340
- raise_failure_error(result)
341
- end
342
- end
343
-
344
- # Deletes a collection from the database.
345
- #
346
- # name - The name of the collection to delete.
347
- #
348
- # Examples
349
- #
350
- # Chroma::Resources::Collection.delete("ruby-documentation")
351
- #
352
- # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
353
- def self.delete(name)
354
- result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
355
-
356
- return true if result.success?
357
-
358
- raise_failure_error(result)
359
- end
360
-
361
- def self.raise_failure_error(result)
362
- case result.failure.error
363
- in Exception => exception
364
- raise Chroma::APIConnectionError.new(exception.message)
365
- in Net::HTTPInternalServerError => response
366
- if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
367
- raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
368
- else
369
- raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
370
- end
371
- else
372
- raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
373
- end
374
- end
375
-
376
- private
377
-
378
- def build_embeddings_payload(embeddings, increment_index = true)
379
- payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
380
-
381
- embeddings.each do |embedding|
382
- payload[:ids] << embedding.id
383
- payload[:embeddings] << embedding.embedding
384
- payload[:metadatas] << embedding.metadata
385
- payload[:documents] << embedding.document
386
- end
387
-
388
- payload
389
- end
390
-
391
- def build_embeddings_response(result)
392
- Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
393
-
394
- result_ids = result.fetch("ids", []).flatten
395
- result_embeddings = result.dig("embeddings") || []
396
- result_documents = (result.dig("documents") || []).flatten
397
- result_metadatas = (result.dig("metadatas") || []).flatten
398
- result_distances = (result.dig("distances") || []).flatten
399
-
400
- Chroma::Util.log_debug("Ids #{result_ids.inspect}")
401
- Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
402
- Chroma::Util.log_debug("Documents #{result_documents.inspect}")
403
- Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
404
- Chroma::Util.log_debug("distances #{result_distances.inspect}")
405
-
406
- result_ids.map.with_index do |id, index|
407
- Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
408
- end
409
- end
410
- end
411
- end
412
- end