chroma-db 0.6.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,130 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Chroma
4
- module APIOperations
5
- using RubyNext
6
-
7
- # Request's response Data object.
8
- #
9
- # status - HTTP status code. It is zero when a request fails due to network error.
10
- # body - Parsed JSON object or response body.
11
- # headers - HTTP response headers.
12
- # error - Exception or Net::HTTPResponse object if the response is not Net::HTTPSuccess
13
- #
14
- # NOTE: Not supported yet by Ruby Next
15
- # Response = Data.define(:status, :body, :headers, :error)
16
- Response = Struct.new("Response", :status, :body, :headers, :error)
17
-
18
- # Request module provides functionality to perform HTTP requests.
19
- module Request
20
- module ClassMethods
21
- include Dry::Monads[:result]
22
-
23
- # Execute an HTTP request and return a monad wrapping the response.
24
- #
25
- # method - The HTTP method to use (e.g. 'GET', 'POST'). Method must be a `Symbol`.
26
- # url - The URL to send the request to.
27
- # params - The query parameters or request body. Params needs to be in a form of a Hash.
28
- # options - Additional options to pass to the request.
29
- #
30
- # A `Dry::Monads::Result` monad wrapping the response, either a success or failure.
31
- # The response is a `Chroma::APIOperations::Response` Data object.
32
- #
33
- # Examples
34
- #
35
- # result = execute_request(:get, "https://example.com", {name: "test request"})
36
- # if result.success?
37
- # puts "Response status: #{result.success.status}"
38
- # puts "Response body: #{result.success.body}"
39
- # else
40
- # puts "Request failed with error: #{result.failure.error}"
41
- # end
42
- def execute_request(method, url, params = {}, options = {})
43
- uri = URI.parse(url)
44
-
45
- request = build_request(method, uri, params)
46
-
47
- use_ssl = options.delete(:use_ssl) || false
48
- response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: use_ssl) do |http|
49
- Chroma::Util.log_debug("Sending a request", {method: method, uri: uri, params: params})
50
- http.request(request)
51
- end
52
-
53
- build_response(response)
54
- rescue => ex
55
- build_response(ex)
56
- end
57
-
58
- private def build_response(response)
59
- case response
60
- in Net::HTTPSuccess => success_response
61
- Chroma::Util.log_info("Successful response", code: success_response.code)
62
-
63
- build_response_details(success_response)
64
- in Net::HTTPRedirection => redirect_response
65
- Chroma::Util.log_info("Server redirect response", code: redirect_response.code, location: redirect_response["location"])
66
-
67
- build_response_details(redirect_response)
68
- in Net::HTTPClientError => client_error_response
69
- Chroma::Util.log_error("Client error response", code: client_error_response.code, body: client_error_response.body)
70
-
71
- build_response_details(client_error_response)
72
- in Net::HTTPServerError => server_error_response
73
- Chroma::Util.log_error("Server error response", code: server_error_response.code)
74
-
75
- build_response_details(server_error_response, parse_body: false)
76
- else
77
- Chroma::Util.log_error("An error happened", error: response.to_s)
78
-
79
- build_response_details(response, exception: true, parse_body: false)
80
- end
81
- end
82
-
83
- private def build_response_details(response, exception: false, parse_body: true)
84
- response_data = Chroma::APIOperations::Response.new(
85
- exception ? 0 : response.code.to_i,
86
- if exception
87
- exception.to_s
88
- else
89
- (parse_body ? body_to_json(response.body) : response.body)
90
- end,
91
- exception ? {} : response.each_header.to_h,
92
- response.is_a?(Net::HTTPSuccess) ? nil : response
93
- )
94
-
95
- case response
96
- in Net::HTTPSuccess
97
- return Success(response_data)
98
- else
99
- return Failure(response_data)
100
- end
101
- end
102
-
103
- private def body_to_json(content)
104
- JSON.parse(content, symbolize_keys: true)
105
- rescue JSON::ParserError, TypeError
106
- content
107
- end
108
-
109
- private def build_request(method, uri, params)
110
- request = case method
111
- when :post then Net::HTTP::Post.new(uri)
112
- when :put then Net::HTTP::Put.new(uri)
113
- when :delete then Net::HTTP::Delete.new(uri)
114
- else
115
- Net::HTTP::Get.new(uri)
116
- end
117
-
118
- request.content_type = "application/json"
119
- request.body = params.to_json if params.size > 0
120
-
121
- request
122
- end
123
- end
124
-
125
- def self.included(base)
126
- base.extend(ClassMethods)
127
- end
128
- end
129
- end
130
- end
@@ -1,412 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Chroma
4
- using RubyNext
5
-
6
- module Resources
7
- # A Collection class represents a store for your embeddings, documents, and any additional metadata.
8
- # This class can be instantiated by receiving the collection's name and metadata hash.
9
- class Collection
10
- include Chroma::APIOperations::Request
11
-
12
- attr_reader :id
13
- attr_reader :name
14
- attr_reader :metadata
15
-
16
- def initialize(id:, name:, metadata: nil)
17
- @id = id
18
- @name = name
19
- @metadata = metadata
20
- end
21
-
22
- # Query the collection and return an array of embeddings.
23
- #
24
- # query_embeddings - An array of the embeddings to use for querying the collection.
25
- # results - The maximum number of results to return. 10 by default.
26
- # where - A Hash of additional conditions to filter the query results (optional).
27
- # where_document - A Hash of additional conditions to filter the associated documents (optional).
28
- # include - An Array of the additional information to include in the query results (optional). Metadatas,
29
- # documents, and distances by default.
30
- #
31
- # Examples
32
- #
33
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
34
- # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
35
- #
36
- # Return an Array of Embedding with query results.
37
- def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
38
- payload = {
39
- query_embeddings: query_embeddings,
40
- n_results: results,
41
- where: where,
42
- where_document: where_document,
43
- include: include
44
- }
45
-
46
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/query", payload)
47
-
48
- if result.success?
49
- build_embeddings_response(result.success.body)
50
- else
51
- self.class.raise_failure_error(result)
52
- end
53
- end
54
-
55
- # Get embeddings from the collection.
56
- #
57
- # ids - An Array of the specific embedding IDs to retrieve (optional).
58
- # where - A Hash of additional conditions to filter the query results (optional).
59
- # sort - The sorting criteria for the query results (optional).
60
- # limit - The maximum number of embeddings to retrieve (optional).
61
- # offset - The offset for pagination (optional).
62
- # page - The page number for pagination (optional).
63
- # page_size - The page size for pagination (optional).
64
- # where_document - A Hash of additional conditions to filter the associated documents (optional).
65
- # include - An Array of the additional information to include in the query results (optional). Metadatas,
66
- # and documents by default.
67
- #
68
- # Examples
69
- #
70
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
71
- # embeddings = collection.get([Array#sort, "Array#each"])
72
- #
73
- # Returns an Array of Embeddings
74
- def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
75
- if !page.nil? && !page_size.nil?
76
- offset = (page - 1) * page_size
77
- limit = page_size
78
- end
79
-
80
- payload = {
81
- ids: ids,
82
- where: where,
83
- sort: sort,
84
- limit: limit,
85
- offset: offset,
86
- where_document: where_document,
87
- include: include
88
- }
89
-
90
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/get", payload)
91
-
92
- if result.success?
93
- build_embeddings_response(result.success.body)
94
- else
95
- self.class.raise_failure_error(result)
96
- end
97
- end
98
-
99
- # Add one or many embeddings to the collection.
100
- #
101
- # embeddings - An Array of Embeddings or one Embedding to add.
102
- #
103
- # Examples
104
- #
105
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
106
- # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
107
- #
108
- # Returns true with success or raises a Chroma::Error on failure.
109
- def add(embeddings = [])
110
- embeddings_array = Array(embeddings)
111
- return false if embeddings_array.size == 0
112
-
113
- payload = build_embeddings_payload(embeddings_array)
114
-
115
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/add", payload)
116
-
117
- return true if result.success?
118
-
119
- self.class.raise_failure_error(result)
120
- end
121
-
122
- # Delete embeddings from the collection.
123
- #
124
- # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
125
- # where [Hash] Additional conditions to filter the embeddings to delete (optional).
126
- # where_document [Hash] Additional conditions to filter the associated documents (optional).
127
- #
128
- # Examples
129
- #
130
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
131
- # collection.delete(["Array#fetch", "Array#sort"])
132
- #
133
- # Returns an Array of deleted global ids.
134
- def delete(ids: nil, where: {}, where_document: {})
135
- payload = {
136
- ids: ids,
137
- where: where,
138
- where_document: where_document
139
- }
140
-
141
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/delete", payload)
142
-
143
- return result.success.body if result.success?
144
-
145
- self.class.raise_failure_error(result)
146
- end
147
-
148
- # Update one or many embeddings to the collection.
149
- #
150
- # embeddings - An Array of Embeddings or one Embedding to add.
151
- #
152
- # Examples
153
- #
154
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
155
- # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
156
- #
157
- # Returns true with success or raises a Chroma::Error on failure.
158
- def update(embeddings = [])
159
- embeddings_array = Array(embeddings)
160
- return false if embeddings_array.size == 0
161
-
162
- payload = build_embeddings_payload(embeddings_array)
163
- payload.delete(:increment_index)
164
-
165
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/update", payload)
166
-
167
- return true if result.success?
168
-
169
- self.class.raise_failure_error(result)
170
- end
171
-
172
- # Upsert (insert or update) one or many embeddings to the collection.
173
- #
174
- # embeddings - An Array of Embeddings or one Embedding to add.
175
- #
176
- # Examples
177
- #
178
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
179
- # embeddings = [
180
- # Embedding.new(id: "Array#fetch", embeddings: [9.8, 2.3, 2.9], metadata: {url: "https://..."}),
181
- # Embedding.new(id: "Array#select", embeddings: [5.6, 3.1, 4.7], metadata: {url: "https://..."})
182
- # ]
183
- # collection.upsert()
184
- #
185
- # Returns true with success or raises a Chroma::Error on failure.
186
- def upsert(embeddings = [])
187
- embeddings_array = Array(embeddings)
188
- return false if embeddings_array.size == 0
189
-
190
- payload = build_embeddings_payload(embeddings_array)
191
-
192
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/upsert", payload)
193
-
194
- return true if result.success?
195
-
196
- self.class.raise_failure_error(result)
197
- end
198
-
199
- # Count the number of embeddings in a collection.
200
- #
201
- # Examples
202
- #
203
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
204
- # collection.count
205
- #
206
- # Returns the count of embeddings in the collection.
207
- def count
208
- result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{id}/count")
209
-
210
- return result.success.body if result.success?
211
-
212
- self.class.raise_failure_error(result)
213
- end
214
-
215
- # Modify the name and metadata of the current collection.
216
- #
217
- # new_name - The new name for the collection.
218
- # new_metadata - The new metadata hash for the collection.
219
- #
220
- # Examples:
221
- #
222
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
223
- # collection.modify("ruby-3.2-documentation")
224
- #
225
- # Returns nothing.
226
- def modify(new_name, new_metadata: {})
227
- payload = {new_name: new_name}
228
- payload[:new_metadata] = new_metadata if new_metadata.any?
229
-
230
- result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{id}", payload)
231
-
232
- if result.success?
233
- @name = new_name
234
- @metadata = new_metadata
235
- else
236
- self.class.raise_failure_error(result)
237
- end
238
- end
239
-
240
- # Creates an index for the collection.
241
- #
242
- # Examples:
243
- #
244
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
245
- # collection.create_index
246
- #
247
- # Returns true on success or raise a Chroma::Error on failure.
248
- def create_index
249
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/create_index")
250
-
251
- return true if result.success?
252
-
253
- self.class.raise_failure_error(result)
254
- end
255
-
256
- # Create a new collection on the database.
257
- #
258
- # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
259
- # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
260
- # contains no two consecutive periods
261
- # metadata - A hash of additional metadata associated with the collection.
262
- #
263
- # Examples
264
- #
265
- # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
266
- #
267
- # Returns the created collection object.
268
- def self.create(name, metadata = nil)
269
- payload = {name: name, metadata: metadata, get_or_create: false}
270
-
271
- result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
272
-
273
- if result.success?
274
- data = result.success.body
275
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
276
- else
277
- raise_failure_error(result)
278
- end
279
- end
280
-
281
- # Retrieves a collection from the database.
282
- #
283
- # name - The name of the collection to retrieve.
284
- #
285
- # Examples
286
- #
287
- # collection = Chroma::Resources::Colection.get("ruby-documentation")
288
- #
289
- # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
290
- def self.get(name)
291
- result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
292
-
293
- if result.success?
294
- data = result.success.body
295
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
296
- else
297
- raise_failure_error(result)
298
- end
299
- end
300
-
301
- # Get or create a collection on the database.
302
- #
303
- # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
304
- # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
305
- # contains no two consecutive periods
306
- # metadata - A hash of additional metadata associated with the collection, this is used if collection is created.
307
- #
308
- # Examples
309
- #
310
- # collection = Chorma::Resources::Collection.get_or_create("ruby-documentation", {source: "Ruby lang website"})
311
- #
312
- # Returns the created collection object.
313
- def self.get_or_create(name, metadata = nil)
314
- payload = {name: name, metadata: metadata, get_or_create: true}
315
-
316
- result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
317
-
318
- if result.success?
319
- data = result.success.body
320
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
321
- else
322
- raise_failure_error(result)
323
- end
324
- end
325
-
326
- # Retrieves all collections in the database.
327
- #
328
- # Examples
329
- #
330
- # collections = Chroma::Resources::Collection.list
331
- #
332
- # Returns An array of all collections in the database.
333
- def self.list
334
- result = execute_request(:get, "#{Chroma.api_url}/collections")
335
-
336
- if result.success?
337
- data = result.success.body
338
- data.map { |item| new(id: item["id"], name: item["name"], metadata: item["metadata"]) }
339
- else
340
- raise_failure_error(result)
341
- end
342
- end
343
-
344
- # Deletes a collection from the database.
345
- #
346
- # name - The name of the collection to delete.
347
- #
348
- # Examples
349
- #
350
- # Chroma::Resources::Collection.delete("ruby-documentation")
351
- #
352
- # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
353
- def self.delete(name)
354
- result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
355
-
356
- return true if result.success?
357
-
358
- raise_failure_error(result)
359
- end
360
-
361
- def self.raise_failure_error(result)
362
- case result.failure.error
363
- in Exception => exception
364
- raise Chroma::APIConnectionError.new(exception.message)
365
- in Net::HTTPInternalServerError => response
366
- if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
367
- raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
368
- else
369
- raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
370
- end
371
- else
372
- raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
373
- end
374
- end
375
-
376
- private
377
-
378
- def build_embeddings_payload(embeddings, increment_index = true)
379
- payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
380
-
381
- embeddings.each do |embedding|
382
- payload[:ids] << embedding.id
383
- payload[:embeddings] << embedding.embedding
384
- payload[:metadatas] << embedding.metadata
385
- payload[:documents] << embedding.document
386
- end
387
-
388
- payload
389
- end
390
-
391
- def build_embeddings_response(result)
392
- Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
393
-
394
- result_ids = result.fetch("ids", []).flatten
395
- result_embeddings = result.dig("embeddings") || []
396
- result_documents = (result.dig("documents") || []).flatten
397
- result_metadatas = (result.dig("metadatas") || []).flatten
398
- result_distances = (result.dig("distances") || []).flatten
399
-
400
- Chroma::Util.log_debug("Ids #{result_ids.inspect}")
401
- Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
402
- Chroma::Util.log_debug("Documents #{result_documents.inspect}")
403
- Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
404
- Chroma::Util.log_debug("distances #{result_distances.inspect}")
405
-
406
- result_ids.map.with_index do |id, index|
407
- Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
408
- end
409
- end
410
- end
411
- end
412
- end