chroma-db 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,389 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ using RubyNext
5
+ module Resources
6
+ # A Collection class represents a store for your embeddings, documents, and any additional metadata.
7
+ # This class can be instantiated by receiving the collection's name and metadata hash.
8
+ class Collection
9
+ include Chroma::APIOperations::Request
10
+
11
+ attr_reader :name
12
+ attr_reader :metadata
13
+
14
+ def initialize(name:, metadata: nil)
15
+ @name = name
16
+ @metadata = metadata
17
+ end
18
+
19
+ # Query the collection and return an array of embeddings.
20
+ #
21
+ # query_embeddings - An array of the embeddings to use for querying the collection.
22
+ # results - The maximum number of results to return. 10 by default.
23
+ # where - A Hash of additional conditions to filter the query results (optional).
24
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
25
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
26
+ # documents, and distances by default.
27
+ #
28
+ # Examples
29
+ #
30
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
31
+ # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
32
+ #
33
+ # Return an Array of Embedding with query results.
34
+ def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
35
+ payload = {
36
+ query_embeddings: query_embeddings,
37
+ n_results: results,
38
+ where: where,
39
+ where_document: where_document,
40
+ include: include
41
+ }
42
+
43
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/query", payload)
44
+
45
+ if result.success?
46
+ build_embeddings_response(result.success.body)
47
+ else
48
+ raise_failure_error(result)
49
+ end
50
+ end
51
+
52
+ # Get embeddings from the collection.
53
+ #
54
+ # ids - An Array of the specific embedding IDs to retrieve (optional).
55
+ # where - A Hash of additional conditions to filter the query results (optional).
56
+ # sort - The sorting criteria for the query results (optional).
57
+ # limit - The maximum number of embeddings to retrieve (optional).
58
+ # offset - The offset for pagination (optional).
59
+ # page - The page number for pagination (optional).
60
+ # page_size - The page size for pagination (optional).
61
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
62
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
63
+ # and documents by default.
64
+ #
65
+ # Examples
66
+ #
67
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
68
+ # embeddings = collection.get([Array#sort, "Array#each"])
69
+ #
70
+ # Returns an Array of Embeddings
71
+ def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
72
+ if !page.nil? && !page_size.nil?
73
+ offset = (page - 1) * page_size
74
+ limit = page_size
75
+ end
76
+
77
+ payload = {
78
+ ids: ids,
79
+ where: where,
80
+ sort: sort,
81
+ limit: limit,
82
+ offset: offset,
83
+ where_document: where_document,
84
+ include: include
85
+ }
86
+
87
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/get", payload)
88
+
89
+ if result.success?
90
+ build_embeddings_response(result.success.body)
91
+ else
92
+ raise_failure_error(result)
93
+ end
94
+ end
95
+
96
+ # Add one or many embeddings to the collection.
97
+ #
98
+ # embeddings - An Array of Embeddings or one Embedding to add.
99
+ #
100
+ # Examples
101
+ #
102
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
103
+ # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
104
+ #
105
+ # Returns true with success or raises a Chroma::Error on failure.
106
+ def add(embeddings = [])
107
+ embeddings_array = Array(embeddings)
108
+ return false if embeddings_array.size == 0
109
+
110
+ payload = build_embeddings_payload(embeddings_array)
111
+
112
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/add", payload)
113
+
114
+ return true if result.success?
115
+
116
+ raise_failure_error(result)
117
+ end
118
+
119
+ # Delete embeddings from the collection.
120
+ #
121
+ # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
122
+ # where [Hash] Additional conditions to filter the embeddings to delete (optional).
123
+ # where_document [Hash] Additional conditions to filter the associated documents (optional).
124
+ #
125
+ # Examples
126
+ #
127
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
128
+ # collection.delete(["Array#fetch", "Array#sort"])
129
+ #
130
+ # Returns an Array of deleted global ids.
131
+ def delete(ids: nil, where: {}, where_document: {})
132
+ payload = {
133
+ ids: ids,
134
+ where: where,
135
+ where_document: where_document
136
+ }
137
+
138
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/delete", payload)
139
+
140
+ return result.success.body if result.success?
141
+
142
+ raise_failure_error(result)
143
+ end
144
+
145
+ # Update one or many embeddings to the collection.
146
+ #
147
+ # embeddings - An Array of Embeddings or one Embedding to add.
148
+ #
149
+ # Examples
150
+ #
151
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
152
+ # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
153
+ #
154
+ # Returns true with success or raises a Chroma::Error on failure.
155
+ def update(embeddings = [])
156
+ embeddings_array = Array(embeddings)
157
+ return false if embeddings_array.size == 0
158
+
159
+ payload = build_embeddings_payload(embeddings_array)
160
+ payload.delete(:increment_index)
161
+
162
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/update", payload)
163
+
164
+ return true if result.success?
165
+
166
+ raise_failure_error(result)
167
+ end
168
+
169
+ # Upsert (insert or update) one or many embeddings to the collection.
170
+ #
171
+ # embeddings - An Array of Embeddings or one Embedding to add.
172
+ #
173
+ # Examples
174
+ #
175
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
176
+ # embeddings = [
177
+ # Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}),
178
+ # Embedding.new(id: "Array#select", embeddings[5.6, 3.1, 4.7], metadata: {url: "https://..."})
179
+ # ]
180
+ # collection.upsert()
181
+ #
182
+ # Returns true with success or raises a Chroma::Error on failure.
183
+ def upsert(embeddings = [])
184
+ embeddings_array = Array(embeddings)
185
+ return false if embeddings_array.size == 0
186
+
187
+ payload = build_embeddings_payload(embeddings_array)
188
+
189
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/upsert", payload)
190
+
191
+ return true if result.success?
192
+
193
+ raise_failure_error(result)
194
+ end
195
+
196
+ # Count the number of embeddings in a collection.
197
+ #
198
+ # Examples
199
+ #
200
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
201
+ # collection.count
202
+ #
203
+ # Returns the count of embeddings in the collection.
204
+ def count
205
+ result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{name}/count")
206
+
207
+ return result.success.body if result.success?
208
+
209
+ raise_failure_error(result)
210
+ end
211
+
212
+ # Modify the name and metadata of the current collection.
213
+ #
214
+ # new_name - The new name for the collection.
215
+ # new_metadata - The new metadata hash for the collection.
216
+ #
217
+ # Examples:
218
+ #
219
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
220
+ # collection.modify("ruby-3.2-documentation")
221
+ #
222
+ # Returns nothing.
223
+ def modify(new_name, new_metadata: {})
224
+ payload = {new_name: new_name}
225
+ payload[:new_metadata] = new_metadata if new_metadata.any?
226
+
227
+ result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{name}", payload)
228
+
229
+ if result.success?
230
+ @name = new_name
231
+ @metadata = new_metadata
232
+ else
233
+ raise_failure_error(result)
234
+ end
235
+ end
236
+
237
+ # Creates an index for the collection.
238
+ #
239
+ # Examples:
240
+ #
241
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
242
+ # collection.create_index
243
+ #
244
+ # Returns true on success or raise a Chroma::Error on failure.
245
+ def create_index
246
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/create_index")
247
+
248
+ return true if result.success?
249
+
250
+ raise_failure_error(result)
251
+ end
252
+
253
+ # Create a new collection on the database.
254
+ #
255
+ # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
256
+ # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
257
+ # contains no two consecutive periods
258
+ # metadata - A hash of additional metadata associated with the collection.
259
+ #
260
+ # Examples
261
+ #
262
+ # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
263
+ #
264
+ # Returns the created collection object.
265
+ def self.create(name, metadata = nil)
266
+ payload = {name: name, metadata: metadata, get_or_create: false}
267
+
268
+ result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
269
+
270
+ if result.success?
271
+ data = result.success.body
272
+ new(name: data["name"], metadata: data["metadata"])
273
+ else
274
+ raise_failure_error(result)
275
+ end
276
+ end
277
+
278
+ # Retrieves a collection from the database.
279
+ #
280
+ # name - The name of the collection to retrieve.
281
+ #
282
+ # Examples
283
+ #
284
+ # collection = Chroma::Resources::Colection.get("ruby-documentation")
285
+ #
286
+ # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
287
+ def self.get(name)
288
+ result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
289
+
290
+ if result.success?
291
+ data = result.success.body
292
+ new(name: data["name"], metadata: data["metadata"])
293
+ else
294
+ raise_failure_error(result)
295
+ end
296
+ end
297
+
298
+ # Retrieves all collections in the database.
299
+ #
300
+ # Examples
301
+ #
302
+ # collections = Chroma::Resources::Collection.list
303
+ #
304
+ # Returns An array of all collections in the database.
305
+ def self.list
306
+ result = execute_request(:get, "#{Chroma.api_url}/collections")
307
+
308
+ if result.success?
309
+ data = result.success.body
310
+ data.map { |item| new(name: item["name"], metadata: item["metadata"]) }
311
+ else
312
+ raise_failure_error(result)
313
+ end
314
+ end
315
+
316
+ # Deletes a collection from the database.
317
+ #
318
+ # name - The name of the collection to delete.
319
+ #
320
+ # Examples
321
+ #
322
+ # Chroma::Resources::Collection.delete("ruby-documentation")
323
+ #
324
+ # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
325
+ def self.delete(name)
326
+ result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
327
+
328
+ return true if result.success?
329
+
330
+ raise_failure_error(result)
331
+ end
332
+
333
+ def self.raise_failure_error(result)
334
+ case; when ((__m__ = result.failure.error)) && false
335
+ when (((exception,) = nil) || ((Exception === __m__) && ((exception = __m__) || true)))
336
+ raise Chroma::APIConnectionError.new(exception.message)
337
+ when (((response,) = nil) || ((Net::HTTPInternalServerError === __m__) && ((response = __m__) || true)))
338
+
339
+
340
+
341
+
342
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
343
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
344
+ else
345
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
346
+ end
347
+ else
348
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
349
+ end
350
+ end
351
+ private_class_method :raise_failure_error
352
+
353
+ private
354
+
355
+ def build_embeddings_payload(embeddings, increment_index = true)
356
+ payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
357
+
358
+ embeddings.each do |embedding|
359
+ payload[:ids] << embedding.id
360
+ payload[:embeddings] << embedding.embedding
361
+ payload[:metadatas] << embedding.metadata
362
+ payload[:documents] << embedding.document
363
+ end
364
+
365
+ payload
366
+ end
367
+
368
+ def build_embeddings_response(result)
369
+ Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
370
+
371
+ result_ids = result.fetch("ids", []).flatten
372
+ result_embeddings = (result.dig("embeddings") || []).flatten
373
+ result_documents = (result.dig("documents") || []).flatten
374
+ result_metadatas = (result.dig("metadatas") || []).flatten
375
+ result_distances = (result.dig("distances") || []).flatten
376
+
377
+ Chroma::Util.log_debug("Ids #{result_ids.inspect}")
378
+ Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
379
+ Chroma::Util.log_debug("Documents #{result_documents.inspect}")
380
+ Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
381
+ Chroma::Util.log_debug("distances #{result_distances.inspect}")
382
+
383
+ result_ids.map.with_index do |id, index|
384
+ Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
385
+ end
386
+ end
387
+ end
388
+ end
389
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Resources
5
+ # The Database class provides methods for interacting with the Chroma database server.
6
+ class Database
7
+ using RubyNext
8
+
9
+ include Chroma::APIOperations::Request
10
+ # Get the version of the Chroma database server.
11
+ #
12
+ # Returns the version of the Chroma database server.
13
+ def self.version
14
+ result = execute_request(:get, "#{Chroma.api_url}/version")
15
+
16
+ return result.success.body if result.success?
17
+
18
+ raise_failure_error(result)
19
+ end
20
+
21
+ # Reset the Chroma database server. This can't be undone.
22
+ #
23
+ # Returns true on success or raise a Chroma::Error on failure.
24
+ def self.reset
25
+ result = execute_request(:post, "#{Chroma.api_url}/reset")
26
+
27
+ return result.success.body if result.success?
28
+
29
+ raise_failure_error(result)
30
+ end
31
+
32
+ # Persist Chroma database data.
33
+ #
34
+ # Resturn true on success or raise a Chroma::Error on failure.
35
+ def self.persist
36
+ result = execute_request(:post, "#{Chroma.api_url}/persist")
37
+
38
+ return result.success.body if result.success?
39
+
40
+ raise_failure_error(result)
41
+ end
42
+
43
+ # Check the heartbeat of the Chroma database server.
44
+ #
45
+ # Return a Hash with a timestamp.
46
+ def self.heartbeat
47
+ result = execute_request(:get, "#{Chroma.api_url}/heartbeat")
48
+
49
+ return result.success.body if result.success?
50
+
51
+ raise_failure_error(result)
52
+ end
53
+
54
+ def self.raise_failure_error(result)
55
+ case; when ((__m__ = result.failure.error)) && false
56
+ when (((exception,) = nil) || ((Exception === __m__) && ((exception = __m__) || true)))
57
+ raise Chroma::APIConnectionError.new(exception.message)
58
+ when (((response,) = nil) || ((Net::HTTPInternalServerError === __m__) && ((response = __m__) || true)))
59
+
60
+
61
+
62
+
63
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
64
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
65
+ else
66
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
67
+ end
68
+ else
69
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
70
+ end
71
+ end
72
+ private_class_method :raise_failure_error
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module APIOperations
5
+ # using RubyNext
6
+
7
+ # Request's response Data object.
8
+ #
9
+ # status - HTTP status code. It is zero when a request fails due to network error.
10
+ # body - Parsed JSON object or response body.
11
+ # headers - HTTP response headers.
12
+ # error - Exception or Net::HTTPResponse object if the response is not Net::HTTPSuccess
13
+ #
14
+ # NOTE: Not supported yet by Ruby Next
15
+ # Response = Data.define(:status, :body, :headers, :error)
16
+ Response = Struct.new("Response", :status, :body, :headers, :error)
17
+
18
+ # Request module provides functionality to perform HTTP requests.
19
+ module Request
20
+ module ClassMethods
21
+ include Dry::Monads[:result]
22
+
23
+ # Execute an HTTP request and return a monad wrapping the response.
24
+ #
25
+ # method - The HTTP method to use (e.g. 'GET', 'POST'). Method must be a `Symbol`.
26
+ # url - The URL to send the request to.
27
+ # params - The query parameters or request body. Params needs to be in a form of a Hash.
28
+ # options - Additional options to pass to the request.
29
+ #
30
+ # A `Dry::Monads::Result` monad wrapping the response, either a success or failure.
31
+ # The response is a `Chroma::APIOperations::Response` Data object.
32
+ #
33
+ # Examples
34
+ #
35
+ # result = execute_request(:get, "https://example.com", {name: "test request"})
36
+ # if result.success?
37
+ # puts "Response status: #{result.success.status}"
38
+ # puts "Response body: #{result.success.body}"
39
+ # else
40
+ # puts "Request failed with error: #{result.failure.error}"
41
+ # end
42
+ def execute_request(method, url, params = {}, options = {})
43
+ uri = URI.parse(url)
44
+
45
+ request = build_request(method, uri, params)
46
+
47
+ use_ssl = options.delete(:use_ssl) || false
48
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: use_ssl) do |http|
49
+ Chroma::Util.log_debug("Sending a request", {method: method, uri: uri, params: params})
50
+ http.request(request)
51
+ end
52
+
53
+ build_response(response)
54
+ rescue => ex
55
+ build_response(ex)
56
+ end
57
+
58
+ private def build_response(response)
59
+ case response
60
+ in Net::HTTPSuccess => success_response
61
+ Chroma::Util.log_info("Successful response", code: success_response.code)
62
+
63
+ build_response_details(success_response)
64
+ in Net::HTTPRedirection => redirect_response
65
+ Chroma::Util.log_info("Server redirect response", code: redirect_response.code, location: redirect_response["location"])
66
+
67
+ build_response_details(redirect_response)
68
+ in Net::HTTPClientError => client_error_response
69
+ Chroma::Util.log_error("Client error response", code: client_error_response.code, body: client_error_response.body)
70
+
71
+ build_response_details(client_error_response)
72
+ in Net::HTTPServerError => server_error_response
73
+ Chroma::Util.log_error("Server error response", code: server_error_response.code)
74
+
75
+ build_response_details(server_error_response, parse_body: false)
76
+ else
77
+ Chroma::Util.log_error("An error happened", error: response.to_s)
78
+
79
+ build_response_details(response, exception: true, parse_body: false)
80
+ end
81
+ end
82
+
83
+ private def build_response_details(response, exception: false, parse_body: true)
84
+ response_data = Chroma::APIOperations::Response.new(
85
+ exception ? 0 : response.code.to_i,
86
+ if exception
87
+ exception.to_s
88
+ else
89
+ (parse_body ? body_to_json(response.body) : response.body)
90
+ end,
91
+ exception ? {} : response.each_header.to_h,
92
+ response.is_a?(Net::HTTPSuccess) ? nil : response
93
+ )
94
+
95
+ case response
96
+ in Net::HTTPSuccess
97
+ return Success(response_data)
98
+ else
99
+ return Failure(response_data)
100
+ end
101
+ end
102
+
103
+ private def body_to_json(content)
104
+ JSON.parse(content, symbolize_keys: true)
105
+ rescue JSON::ParserError, TypeError
106
+ content
107
+ end
108
+
109
+ private def build_request(method, uri, params)
110
+ request = case method
111
+ when :post then Net::HTTP::Post.new(uri)
112
+ when :put then Net::HTTP::Put.new(uri)
113
+ when :delete then Net::HTTP::Delete.new(uri)
114
+ else
115
+ Net::HTTP::Get.new(uri)
116
+ end
117
+
118
+ request.content_type = "application/json"
119
+ request.body = params.to_json if params.size > 0
120
+
121
+ request
122
+ end
123
+ end
124
+
125
+ def self.included(base)
126
+ base.extend(ClassMethods)
127
+ end
128
+ end
129
+ end
130
+ end