chroma-db 0.2.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,392 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ using RubyNext
5
+
6
+ module Resources
7
+ # A Collection class represents a store for your embeddings, documents, and any additional metadata.
8
+ # This class can be instantiated by receiving the collection's name and metadata hash.
9
+ class Collection
10
+ include Chroma::APIOperations::Request
11
+
12
+ attr_reader :id
13
+ attr_reader :name
14
+ attr_reader :metadata
15
+
16
+ def initialize(id:, name:, metadata: nil)
17
+ @id = id
18
+ @name = name
19
+ @metadata = metadata
20
+ end
21
+
22
+ # Query the collection and return an array of embeddings.
23
+ #
24
+ # query_embeddings - An array of the embeddings to use for querying the collection.
25
+ # results - The maximum number of results to return. 10 by default.
26
+ # where - A Hash of additional conditions to filter the query results (optional).
27
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
28
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
29
+ # documents, and distances by default.
30
+ #
31
+ # Examples
32
+ #
33
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
34
+ # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
35
+ #
36
+ # Return an Array of Embedding with query results.
37
+ def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
38
+ payload = {
39
+ query_embeddings: query_embeddings,
40
+ n_results: results,
41
+ where: where,
42
+ where_document: where_document,
43
+ include: include
44
+ }
45
+
46
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/query", payload)
47
+
48
+ if result.success?
49
+ build_embeddings_response(result.success.body)
50
+ else
51
+ raise_failure_error(result)
52
+ end
53
+ end
54
+
55
+ # Get embeddings from the collection.
56
+ #
57
+ # ids - An Array of the specific embedding IDs to retrieve (optional).
58
+ # where - A Hash of additional conditions to filter the query results (optional).
59
+ # sort - The sorting criteria for the query results (optional).
60
+ # limit - The maximum number of embeddings to retrieve (optional).
61
+ # offset - The offset for pagination (optional).
62
+ # page - The page number for pagination (optional).
63
+ # page_size - The page size for pagination (optional).
64
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
65
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
66
+ # and documents by default.
67
+ #
68
+ # Examples
69
+ #
70
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
71
+ # embeddings = collection.get([Array#sort, "Array#each"])
72
+ #
73
+ # Returns an Array of Embeddings
74
+ def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
75
+ if !page.nil? && !page_size.nil?
76
+ offset = (page - 1) * page_size
77
+ limit = page_size
78
+ end
79
+
80
+ payload = {
81
+ ids: ids,
82
+ where: where,
83
+ sort: sort,
84
+ limit: limit,
85
+ offset: offset,
86
+ where_document: where_document,
87
+ include: include
88
+ }
89
+
90
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/get", payload)
91
+
92
+ if result.success?
93
+ build_embeddings_response(result.success.body)
94
+ else
95
+ raise_failure_error(result)
96
+ end
97
+ end
98
+
99
+ # Add one or many embeddings to the collection.
100
+ #
101
+ # embeddings - An Array of Embeddings or one Embedding to add.
102
+ #
103
+ # Examples
104
+ #
105
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
106
+ # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
107
+ #
108
+ # Returns true with success or raises a Chroma::Error on failure.
109
+ def add(embeddings = [])
110
+ embeddings_array = Array(embeddings)
111
+ return false if embeddings_array.size == 0
112
+
113
+ payload = build_embeddings_payload(embeddings_array)
114
+
115
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/add", payload)
116
+
117
+ return true if result.success?
118
+
119
+ raise_failure_error(result)
120
+ end
121
+
122
+ # Delete embeddings from the collection.
123
+ #
124
+ # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
125
+ # where [Hash] Additional conditions to filter the embeddings to delete (optional).
126
+ # where_document [Hash] Additional conditions to filter the associated documents (optional).
127
+ #
128
+ # Examples
129
+ #
130
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
131
+ # collection.delete(["Array#fetch", "Array#sort"])
132
+ #
133
+ # Returns an Array of deleted global ids.
134
+ def delete(ids: nil, where: {}, where_document: {})
135
+ payload = {
136
+ ids: ids,
137
+ where: where,
138
+ where_document: where_document
139
+ }
140
+
141
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/delete", payload)
142
+
143
+ return result.success.body if result.success?
144
+
145
+ raise_failure_error(result)
146
+ end
147
+
148
+ # Update one or many embeddings to the collection.
149
+ #
150
+ # embeddings - An Array of Embeddings or one Embedding to add.
151
+ #
152
+ # Examples
153
+ #
154
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
155
+ # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
156
+ #
157
+ # Returns true with success or raises a Chroma::Error on failure.
158
+ def update(embeddings = [])
159
+ embeddings_array = Array(embeddings)
160
+ return false if embeddings_array.size == 0
161
+
162
+ payload = build_embeddings_payload(embeddings_array)
163
+ payload.delete(:increment_index)
164
+
165
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/update", payload)
166
+
167
+ return true if result.success?
168
+
169
+ raise_failure_error(result)
170
+ end
171
+
172
+ # Upsert (insert or update) one or many embeddings to the collection.
173
+ #
174
+ # embeddings - An Array of Embeddings or one Embedding to add.
175
+ #
176
+ # Examples
177
+ #
178
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
179
+ # embeddings = [
180
+ # Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}),
181
+ # Embedding.new(id: "Array#select", embeddings[5.6, 3.1, 4.7], metadata: {url: "https://..."})
182
+ # ]
183
+ # collection.upsert()
184
+ #
185
+ # Returns true with success or raises a Chroma::Error on failure.
186
+ def upsert(embeddings = [])
187
+ embeddings_array = Array(embeddings)
188
+ return false if embeddings_array.size == 0
189
+
190
+ payload = build_embeddings_payload(embeddings_array)
191
+
192
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/upsert", payload)
193
+
194
+ return true if result.success?
195
+
196
+ raise_failure_error(result)
197
+ end
198
+
199
+ # Count the number of embeddings in a collection.
200
+ #
201
+ # Examples
202
+ #
203
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
204
+ # collection.count
205
+ #
206
+ # Returns the count of embeddings in the collection.
207
+ def count
208
+ result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{id}/count")
209
+
210
+ return result.success.body if result.success?
211
+
212
+ raise_failure_error(result)
213
+ end
214
+
215
+ # Modify the name and metadata of the current collection.
216
+ #
217
+ # new_name - The new name for the collection.
218
+ # new_metadata - The new metadata hash for the collection.
219
+ #
220
+ # Examples:
221
+ #
222
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
223
+ # collection.modify("ruby-3.2-documentation")
224
+ #
225
+ # Returns nothing.
226
+ def modify(new_name, new_metadata: {})
227
+ payload = {new_name: new_name}
228
+ payload[:new_metadata] = new_metadata if new_metadata.any?
229
+
230
+ result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{id}", payload)
231
+
232
+ if result.success?
233
+ @name = new_name
234
+ @metadata = new_metadata
235
+ else
236
+ raise_failure_error(result)
237
+ end
238
+ end
239
+
240
+ # Creates an index for the collection.
241
+ #
242
+ # Examples:
243
+ #
244
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
245
+ # collection.create_index
246
+ #
247
+ # Returns true on success or raise a Chroma::Error on failure.
248
+ def create_index
249
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/create_index")
250
+
251
+ return true if result.success?
252
+
253
+ raise_failure_error(result)
254
+ end
255
+
256
+ # Create a new collection on the database.
257
+ #
258
+ # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
259
+ # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
260
+ # contains no two consecutive periods
261
+ # metadata - A hash of additional metadata associated with the collection.
262
+ #
263
+ # Examples
264
+ #
265
+ # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
266
+ #
267
+ # Returns the created collection object.
268
+ def self.create(name, metadata = nil)
269
+ payload = {name: name, metadata: metadata, get_or_create: false}
270
+
271
+ result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
272
+
273
+ if result.success?
274
+ data = result.success.body
275
+ new(id: data["id"], name: data["name"], metadata: data["metadata"])
276
+ else
277
+ raise_failure_error(result)
278
+ end
279
+ end
280
+
281
+ # Retrieves a collection from the database.
282
+ #
283
+ # name - The name of the collection to retrieve.
284
+ #
285
+ # Examples
286
+ #
287
+ # collection = Chroma::Resources::Colection.get("ruby-documentation")
288
+ #
289
+ # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
290
+ def self.get(name)
291
+ result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
292
+
293
+ if result.success?
294
+ data = result.success.body
295
+ new(id: data["id"], name: data["name"], metadata: data["metadata"])
296
+ else
297
+ raise_failure_error(result)
298
+ end
299
+ end
300
+
301
+ # Retrieves all collections in the database.
302
+ #
303
+ # Examples
304
+ #
305
+ # collections = Chroma::Resources::Collection.list
306
+ #
307
+ # Returns An array of all collections in the database.
308
+ def self.list
309
+ result = execute_request(:get, "#{Chroma.api_url}/collections")
310
+
311
+ if result.success?
312
+ data = result.success.body
313
+ data.map { |item| new(id: item["id"], name: item["name"], metadata: item["metadata"]) }
314
+ else
315
+ raise_failure_error(result)
316
+ end
317
+ end
318
+
319
+ # Deletes a collection from the database.
320
+ #
321
+ # name - The name of the collection to delete.
322
+ #
323
+ # Examples
324
+ #
325
+ # Chroma::Resources::Collection.delete("ruby-documentation")
326
+ #
327
+ # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
328
+ def self.delete(name)
329
+ result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
330
+
331
+ return true if result.success?
332
+
333
+ raise_failure_error(result)
334
+ end
335
+
336
+ def self.raise_failure_error(result)
337
+ case; when ((__m__ = result.failure.error)) && false
338
+ when (((exception,) = nil) || ((Exception === __m__) && ((exception = __m__) || true)))
339
+ raise Chroma::APIConnectionError.new(exception.message)
340
+ when (((response,) = nil) || ((Net::HTTPInternalServerError === __m__) && ((response = __m__) || true)))
341
+
342
+
343
+
344
+
345
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
346
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
347
+ else
348
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
349
+ end
350
+ else
351
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
352
+ end
353
+ end
354
+ private_class_method :raise_failure_error
355
+
356
+ private
357
+
358
+ def build_embeddings_payload(embeddings, increment_index = true)
359
+ payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
360
+
361
+ embeddings.each do |embedding|
362
+ payload[:ids] << embedding.id
363
+ payload[:embeddings] << embedding.embedding
364
+ payload[:metadatas] << embedding.metadata
365
+ payload[:documents] << embedding.document
366
+ end
367
+
368
+ payload
369
+ end
370
+
371
+ def build_embeddings_response(result)
372
+ Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
373
+
374
+ result_ids = result.fetch("ids", []).flatten
375
+ result_embeddings = (result.dig("embeddings") || []).flatten
376
+ result_documents = (result.dig("documents") || []).flatten
377
+ result_metadatas = (result.dig("metadatas") || []).flatten
378
+ result_distances = (result.dig("distances") || []).flatten
379
+
380
+ Chroma::Util.log_debug("Ids #{result_ids.inspect}")
381
+ Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
382
+ Chroma::Util.log_debug("Documents #{result_documents.inspect}")
383
+ Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
384
+ Chroma::Util.log_debug("distances #{result_distances.inspect}")
385
+
386
+ result_ids.map.with_index do |id, index|
387
+ Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
388
+ end
389
+ end
390
+ end
391
+ end
392
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Resources
5
+ # The Database class provides methods for interacting with the Chroma database server.
6
+ class Database
7
+ using RubyNext
8
+
9
+ include Chroma::APIOperations::Request
10
+ # Get the version of the Chroma database server.
11
+ #
12
+ # Returns the version of the Chroma database server.
13
+ def self.version
14
+ result = execute_request(:get, "#{Chroma.api_url}/version")
15
+
16
+ return result.success.body if result.success?
17
+
18
+ raise_failure_error(result)
19
+ end
20
+
21
+ # Reset the Chroma database server. This can't be undone.
22
+ #
23
+ # Returns true on success or raise a Chroma::Error on failure.
24
+ def self.reset
25
+ result = execute_request(:post, "#{Chroma.api_url}/reset")
26
+
27
+ return result.success.body if result.success?
28
+
29
+ raise_failure_error(result)
30
+ end
31
+
32
+ # Persist Chroma database data.
33
+ #
34
+ # Resturn true on success or raise a Chroma::Error on failure.
35
+ def self.persist
36
+ result = execute_request(:post, "#{Chroma.api_url}/persist")
37
+
38
+ return result.success.body if result.success?
39
+
40
+ raise_failure_error(result)
41
+ end
42
+
43
+ # Check the heartbeat of the Chroma database server.
44
+ #
45
+ # Return a Hash with a timestamp.
46
+ def self.heartbeat
47
+ result = execute_request(:get, "#{Chroma.api_url}/heartbeat")
48
+
49
+ return result.success.body if result.success?
50
+
51
+ raise_failure_error(result)
52
+ end
53
+
54
+ def self.raise_failure_error(result)
55
+ case; when ((__m__ = result.failure.error)) && false
56
+ when (((exception,) = nil) || ((Exception === __m__) && ((exception = __m__) || true)))
57
+ raise Chroma::APIConnectionError.new(exception.message)
58
+ when (((response,) = nil) || ((Net::HTTPInternalServerError === __m__) && ((response = __m__) || true)))
59
+
60
+
61
+
62
+
63
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
64
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
65
+ else
66
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
67
+ end
68
+ else
69
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
70
+ end
71
+ end
72
+ private_class_method :raise_failure_error
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module APIOperations
5
+ using RubyNext
6
+
7
+ # Request's response Data object.
8
+ #
9
+ # status - HTTP status code. It is zero when a request fails due to network error.
10
+ # body - Parsed JSON object or response body.
11
+ # headers - HTTP response headers.
12
+ # error - Exception or Net::HTTPResponse object if the response is not Net::HTTPSuccess
13
+ #
14
+ # NOTE: Not supported yet by Ruby Next
15
+ # Response = Data.define(:status, :body, :headers, :error)
16
+ Response = Struct.new("Response", :status, :body, :headers, :error)
17
+
18
+ # Request module provides functionality to perform HTTP requests.
19
+ module Request
20
+ module ClassMethods
21
+ include Dry::Monads[:result]
22
+
23
+ # Execute an HTTP request and return a monad wrapping the response.
24
+ #
25
+ # method - The HTTP method to use (e.g. 'GET', 'POST'). Method must be a `Symbol`.
26
+ # url - The URL to send the request to.
27
+ # params - The query parameters or request body. Params needs to be in a form of a Hash.
28
+ # options - Additional options to pass to the request.
29
+ #
30
+ # A `Dry::Monads::Result` monad wrapping the response, either a success or failure.
31
+ # The response is a `Chroma::APIOperations::Response` Data object.
32
+ #
33
+ # Examples
34
+ #
35
+ # result = execute_request(:get, "https://example.com", {name: "test request"})
36
+ # if result.success?
37
+ # puts "Response status: #{result.success.status}"
38
+ # puts "Response body: #{result.success.body}"
39
+ # else
40
+ # puts "Request failed with error: #{result.failure.error}"
41
+ # end
42
+ def execute_request(method, url, params = {}, options = {})
43
+ uri = URI.parse(url)
44
+
45
+ request = build_request(method, uri, params)
46
+
47
+ use_ssl = options.delete(:use_ssl) || false
48
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: use_ssl) do |http|
49
+ Chroma::Util.log_debug("Sending a request", {method: method, uri: uri, params: params})
50
+ http.request(request)
51
+ end
52
+
53
+ build_response(response)
54
+ rescue => ex
55
+ build_response(ex)
56
+ end
57
+
58
+ private def build_response(response)
59
+ case response
60
+ in Net::HTTPSuccess => success_response
61
+ Chroma::Util.log_info("Successful response", code: success_response.code)
62
+
63
+ build_response_details(success_response)
64
+ in Net::HTTPRedirection => redirect_response
65
+ Chroma::Util.log_info("Server redirect response", code: redirect_response.code, location: redirect_response["location"])
66
+
67
+ build_response_details(redirect_response)
68
+ in Net::HTTPClientError => client_error_response
69
+ Chroma::Util.log_error("Client error response", code: client_error_response.code, body: client_error_response.body)
70
+
71
+ build_response_details(client_error_response)
72
+ in Net::HTTPServerError => server_error_response
73
+ Chroma::Util.log_error("Server error response", code: server_error_response.code)
74
+
75
+ build_response_details(server_error_response, parse_body: false)
76
+ else
77
+ Chroma::Util.log_error("An error happened", error: response.to_s)
78
+
79
+ build_response_details(response, exception: true, parse_body: false)
80
+ end
81
+ end
82
+
83
+ private def build_response_details(response, exception: false, parse_body: true)
84
+ response_data = Chroma::APIOperations::Response.new(
85
+ exception ? 0 : response.code.to_i,
86
+ if exception
87
+ exception.to_s
88
+ else
89
+ (parse_body ? body_to_json(response.body) : response.body)
90
+ end,
91
+ exception ? {} : response.each_header.to_h,
92
+ response.is_a?(Net::HTTPSuccess) ? nil : response
93
+ )
94
+
95
+ case response
96
+ in Net::HTTPSuccess
97
+ return Success(response_data)
98
+ else
99
+ return Failure(response_data)
100
+ end
101
+ end
102
+
103
+ private def body_to_json(content)
104
+ JSON.parse(content, symbolize_keys: true)
105
+ rescue JSON::ParserError, TypeError
106
+ content
107
+ end
108
+
109
+ private def build_request(method, uri, params)
110
+ request = case method
111
+ when :post then Net::HTTP::Post.new(uri)
112
+ when :put then Net::HTTP::Put.new(uri)
113
+ when :delete then Net::HTTP::Delete.new(uri)
114
+ else
115
+ Net::HTTP::Get.new(uri)
116
+ end
117
+
118
+ request.content_type = "application/json"
119
+ request.body = params.to_json if params.size > 0
120
+
121
+ request
122
+ end
123
+ end
124
+
125
+ def self.included(base)
126
+ base.extend(ClassMethods)
127
+ end
128
+ end
129
+ end
130
+ end