chroma-db 0.7.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,400 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Chroma
4
- using RubyNext
5
-
6
- module Resources
7
- # A Collection class represents a store for your embeddings, documents, and any additional metadata.
8
- # This class can be instantiated by receiving the collection's name and metadata hash.
9
- class Collection
10
- include Chroma::APIOperations::Request
11
-
12
- attr_reader :id
13
- attr_reader :name
14
- attr_reader :metadata
15
-
16
- def initialize(id:, name:, metadata: nil)
17
- @id = id
18
- @name = name
19
- @metadata = metadata
20
- end
21
-
22
- # Query the collection and return an array of embeddings.
23
- #
24
- # query_embeddings - An array of the embeddings to use for querying the collection.
25
- # results - The maximum number of results to return. 10 by default.
26
- # where - A Hash of additional conditions to filter the query results (optional).
27
- # where_document - A Hash of additional conditions to filter the associated documents (optional).
28
- # include - An Array of the additional information to include in the query results (optional). Metadatas,
29
- # documents, and distances by default.
30
- #
31
- # Examples
32
- #
33
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
34
- # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
35
- #
36
- # Return an Array of Embedding with query results.
37
- def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
38
- payload = {
39
- query_embeddings: query_embeddings,
40
- n_results: results,
41
- where: where,
42
- where_document: where_document,
43
- include: include
44
- }
45
-
46
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/query", payload)
47
-
48
- if result.success?
49
- build_embeddings_response(result.success.body)
50
- else
51
- self.class.raise_failure_error(result)
52
- end
53
- end
54
-
55
- # Get embeddings from the collection.
56
- #
57
- # ids - An Array of the specific embedding IDs to retrieve (optional).
58
- # where - A Hash of additional conditions to filter the query results (optional).
59
- # sort - The sorting criteria for the query results (optional).
60
- # limit - The maximum number of embeddings to retrieve (optional).
61
- # offset - The offset for pagination (optional).
62
- # page - The page number for pagination (optional).
63
- # page_size - The page size for pagination (optional).
64
- # where_document - A Hash of additional conditions to filter the associated documents (optional).
65
- # include - An Array of the additional information to include in the query results (optional). Metadatas,
66
- # and documents by default.
67
- #
68
- # Examples
69
- #
70
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
71
- # embeddings = collection.get([Array#sort, "Array#each"])
72
- #
73
- # Returns an Array of Embeddings
74
- def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
75
- if !page.nil? && !page_size.nil?
76
- offset = (page - 1) * page_size
77
- limit = page_size
78
- end
79
-
80
- payload = {
81
- ids: ids,
82
- where: where,
83
- sort: sort,
84
- limit: limit,
85
- offset: offset,
86
- where_document: where_document,
87
- include: include
88
- }
89
-
90
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/get", payload)
91
-
92
- if result.success?
93
- build_embeddings_response(result.success.body)
94
- else
95
- self.class.raise_failure_error(result)
96
- end
97
- end
98
-
99
- # Add one or many embeddings to the collection.
100
- #
101
- # embeddings - An Array of Embeddings or one Embedding to add.
102
- #
103
- # Examples
104
- #
105
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
106
- # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
107
- #
108
- # Returns true with success or raises a Chroma::Error on failure.
109
- def add(embeddings = [])
110
- embeddings_array = Array(embeddings)
111
- return false if embeddings_array.size == 0
112
-
113
- payload = build_embeddings_payload(embeddings_array)
114
-
115
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/add", payload)
116
-
117
- return true if result.success?
118
-
119
- self.class.raise_failure_error(result)
120
- end
121
-
122
- # Delete embeddings from the collection.
123
- #
124
- # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
125
- # where [Hash] Additional conditions to filter the embeddings to delete (optional).
126
- # where_document [Hash] Additional conditions to filter the associated documents (optional).
127
- #
128
- # Examples
129
- #
130
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
131
- # collection.delete(["Array#fetch", "Array#sort"])
132
- #
133
- # Returns an Array of deleted global ids.
134
- def delete(ids: nil, where: {}, where_document: {})
135
- payload = {
136
- ids: ids,
137
- where: where,
138
- where_document: where_document
139
- }
140
-
141
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/delete", payload)
142
-
143
- return result.success.body if result.success?
144
-
145
- self.class.raise_failure_error(result)
146
- end
147
-
148
- # Update one or many embeddings to the collection.
149
- #
150
- # embeddings - An Array of Embeddings or one Embedding to add.
151
- #
152
- # Examples
153
- #
154
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
155
- # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
156
- #
157
- # Returns true with success or raises a Chroma::Error on failure.
158
- def update(embeddings = [])
159
- embeddings_array = Array(embeddings)
160
- return false if embeddings_array.size == 0
161
-
162
- payload = build_embeddings_payload(embeddings_array)
163
- payload.delete(:increment_index)
164
-
165
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/update", payload)
166
-
167
- return true if result.success?
168
-
169
- self.class.raise_failure_error(result)
170
- end
171
-
172
- # Upsert (insert or update) one or many embeddings to the collection.
173
- #
174
- # embeddings - An Array of Embeddings or one Embedding to add.
175
- #
176
- # Examples
177
- #
178
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
179
- # embeddings = [
180
- # Embedding.new(id: "Array#fetch", embeddings: [9.8, 2.3, 2.9], metadata: {url: "https://..."}),
181
- # Embedding.new(id: "Array#select", embeddings: [5.6, 3.1, 4.7], metadata: {url: "https://..."})
182
- # ]
183
- # collection.upsert(embeddings)
184
- #
185
- # Returns true with success or raises a Chroma::Error on failure.
186
- def upsert(embeddings = [])
187
- embeddings_array = Array(embeddings)
188
- return false if embeddings_array.size == 0
189
-
190
- payload = build_embeddings_payload(embeddings_array)
191
-
192
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/upsert", payload)
193
-
194
- return true if result.success?
195
-
196
- self.class.raise_failure_error(result)
197
- end
198
-
199
- # Count the number of embeddings in a collection.
200
- #
201
- # Examples
202
- #
203
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
204
- # collection.count
205
- #
206
- # Returns the count of embeddings in the collection.
207
- def count
208
- result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{id}/count")
209
-
210
- return result.success.body if result.success?
211
-
212
- self.class.raise_failure_error(result)
213
- end
214
-
215
- # Modify the name and metadata of the current collection.
216
- #
217
- # new_name - The new name for the collection.
218
- # new_metadata - The new metadata hash for the collection.
219
- #
220
- # Examples:
221
- #
222
- # collection = Chroma::Resource::Collection.get("ruby-documentation")
223
- # collection.modify("ruby-3.2-documentation")
224
- #
225
- # Returns nothing.
226
- def modify(new_name, new_metadata: {})
227
- payload = {new_name: new_name}
228
- payload[:new_metadata] = new_metadata if new_metadata.any?
229
-
230
- result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{id}", payload)
231
-
232
- if result.success?
233
- @name = new_name
234
- @metadata = new_metadata
235
- else
236
- self.class.raise_failure_error(result)
237
- end
238
- end
239
-
240
- # Create a new collection on the database.
241
- #
242
- # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
243
- # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
244
- # contains no two consecutive periods
245
- # metadata - A hash of additional metadata associated with the collection.
246
- #
247
- # Examples
248
- #
249
- # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
250
- #
251
- # Returns the created collection object.
252
- def self.create(name, metadata = nil)
253
- payload = {name: name, metadata: metadata, get_or_create: false}
254
-
255
- result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
256
-
257
- if result.success?
258
- data = result.success.body
259
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
260
- else
261
- raise_failure_error(result)
262
- end
263
- end
264
-
265
- # Retrieves a collection from the database.
266
- #
267
- # name - The name of the collection to retrieve.
268
- #
269
- # Examples
270
- #
271
- # collection = Chroma::Resources::Colection.get("ruby-documentation")
272
- #
273
- # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
274
- def self.get(name)
275
- result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
276
-
277
- if result.success?
278
- data = result.success.body
279
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
280
- else
281
- raise_failure_error(result)
282
- end
283
- end
284
-
285
- # Get or create a collection on the database.
286
- #
287
- # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
288
- # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
289
- # contains no two consecutive periods
290
- # metadata - A hash of additional metadata associated with the collection, this is used if collection is created.
291
- #
292
- # Examples
293
- #
294
- # collection = Chorma::Resources::Collection.get_or_create("ruby-documentation", {source: "Ruby lang website"})
295
- #
296
- # Returns the created collection object.
297
- def self.get_or_create(name, metadata = nil)
298
- payload = {name: name, metadata: metadata, get_or_create: true}
299
-
300
- result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
301
-
302
- if result.success?
303
- data = result.success.body
304
- new(id: data["id"], name: data["name"], metadata: data["metadata"])
305
- else
306
- raise_failure_error(result)
307
- end
308
- end
309
-
310
- # Retrieves all collections in the database.
311
- #
312
- # Examples
313
- #
314
- # collections = Chroma::Resources::Collection.list
315
- #
316
- # Returns An array of all collections in the database.
317
- def self.list
318
- result = execute_request(:get, "#{Chroma.api_url}/collections")
319
-
320
- if result.success?
321
- data = result.success.body
322
- data.map { |item| new(id: item["id"], name: item["name"], metadata: item["metadata"]) }
323
- else
324
- raise_failure_error(result)
325
- end
326
- end
327
-
328
- # Deletes a collection from the database.
329
- #
330
- # name - The name of the collection to delete.
331
- #
332
- # Examples
333
- #
334
- # Chroma::Resources::Collection.delete("ruby-documentation")
335
- #
336
- # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
337
- def self.delete(name)
338
- result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
339
-
340
- return true if result.success?
341
-
342
- raise_failure_error(result)
343
- end
344
-
345
- def self.raise_failure_error(result)
346
- case; when ((__m__ = result.failure.error)) && false
347
- when (((exception,) = nil) || ((Exception === __m__) && ((exception = __m__) || true)))
348
- raise Chroma::APIConnectionError.new(exception.message)
349
- when (((response,) = nil) || ((Net::HTTPInternalServerError === __m__) && ((response = __m__) || true)))
350
-
351
-
352
-
353
-
354
- if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
355
- raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
356
- else
357
- raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
358
- end
359
- else
360
- raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
361
- end
362
- end
363
-
364
- private
365
-
366
- def build_embeddings_payload(embeddings, increment_index = true)
367
- payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
368
-
369
- embeddings.each do |embedding|
370
- payload[:ids] << embedding.id
371
- payload[:embeddings] << embedding.embedding
372
- payload[:metadatas] << embedding.metadata
373
- payload[:documents] << embedding.document
374
- end
375
-
376
- payload
377
- end
378
-
379
- def build_embeddings_response(result)
380
- Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
381
-
382
- result_ids = result.fetch("ids", []).flatten
383
- result_embeddings = result.dig("embeddings") || []
384
- result_documents = (result.dig("documents") || []).flatten
385
- result_metadatas = (result.dig("metadatas") || []).flatten
386
- result_distances = (result.dig("distances") || []).flatten
387
-
388
- Chroma::Util.log_debug("Ids #{result_ids.inspect}")
389
- Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
390
- Chroma::Util.log_debug("Documents #{result_documents.inspect}")
391
- Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
392
- Chroma::Util.log_debug("distances #{result_distances.inspect}")
393
-
394
- result_ids.map.with_index do |id, index|
395
- Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
396
- end
397
- end
398
- end
399
- end
400
- end
@@ -1,64 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Chroma
4
- module Resources
5
- # The Database class provides methods for interacting with the Chroma database server.
6
- class Database
7
- using RubyNext
8
-
9
- include Chroma::APIOperations::Request
10
- # Get the version of the Chroma database server.
11
- #
12
- # Returns the version of the Chroma database server.
13
- def self.version
14
- result = execute_request(:get, "#{Chroma.api_url}/version")
15
-
16
- return result.success.body if result.success?
17
-
18
- raise_failure_error(result)
19
- end
20
-
21
- # Reset the Chroma database server. This can't be undone.
22
- #
23
- # Returns true on success or raise a Chroma::Error on failure.
24
- def self.reset
25
- result = execute_request(:post, "#{Chroma.api_url}/reset")
26
-
27
- return result.success.body if result.success?
28
-
29
- raise_failure_error(result)
30
- end
31
-
32
- # Check the heartbeat of the Chroma database server.
33
- #
34
- # Return a Hash with a timestamp.
35
- def self.heartbeat
36
- result = execute_request(:get, "#{Chroma.api_url}/heartbeat")
37
-
38
- return result.success.body if result.success?
39
-
40
- raise_failure_error(result)
41
- end
42
-
43
- def self.raise_failure_error(result)
44
- case; when ((__m__ = result.failure.error)) && false
45
- when (((exception,) = nil) || ((Exception === __m__) && ((exception = __m__) || true)))
46
- raise Chroma::APIConnectionError.new(exception.message)
47
- when (((response,) = nil) || ((Net::HTTPInternalServerError === __m__) && ((response = __m__) || true)))
48
-
49
-
50
-
51
-
52
- if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
53
- raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
54
- else
55
- raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
56
- end
57
- else
58
- raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
59
- end
60
- end
61
- private_class_method :raise_failure_error
62
- end
63
- end
64
- end
@@ -1,131 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Chroma
4
- module APIOperations
5
- using RubyNext
6
-
7
- # Request's response Data object.
8
- #
9
- # status - HTTP status code. It is zero when a request fails due to network error.
10
- # body - Parsed JSON object or response body.
11
- # headers - HTTP response headers.
12
- # error - Exception or Net::HTTPResponse object if the response is not Net::HTTPSuccess
13
- #
14
- # NOTE: Not supported yet by Ruby Next
15
- # Response = Data.define(:status, :body, :headers, :error)
16
- Response = Struct.new("Response", :status, :body, :headers, :error)
17
-
18
- # Request module provides functionality to perform HTTP requests.
19
- module Request
20
- module ClassMethods
21
- include Dry::Monads[:result]
22
-
23
- # Execute an HTTP request and return a monad wrapping the response.
24
- #
25
- # method - The HTTP method to use (e.g. 'GET', 'POST'). Method must be a `Symbol`.
26
- # url - The URL to send the request to.
27
- # params - The query parameters or request body. Params needs to be in a form of a Hash.
28
- # options - Additional options to pass to the request.
29
- #
30
- # A `Dry::Monads::Result` monad wrapping the response, either a success or failure.
31
- # The response is a `Chroma::APIOperations::Response` Data object.
32
- #
33
- # Examples
34
- #
35
- # result = execute_request(:get, "https://example.com", {name: "test request"})
36
- # if result.success?
37
- # puts "Response status: #{result.success.status}"
38
- # puts "Response body: #{result.success.body}"
39
- # else
40
- # puts "Request failed with error: #{result.failure.error}"
41
- # end
42
- def execute_request(method, url, params = {}, options = {})
43
- uri = URI.parse(url)
44
-
45
- request = build_request(method, uri, params)
46
-
47
- use_ssl = uri.scheme == "https"
48
- response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: use_ssl) do |http|
49
- Chroma::Util.log_debug("Sending a request", {method: method, uri: uri, params: params})
50
- http.request(request)
51
- end
52
-
53
- build_response(response)
54
- rescue => ex
55
- build_response(ex)
56
- end
57
-
58
- private def build_response(response)
59
- case response
60
- in Net::HTTPSuccess => success_response
61
- Chroma::Util.log_info("Successful response", code: success_response.code)
62
-
63
- build_response_details(success_response)
64
- in Net::HTTPRedirection => redirect_response
65
- Chroma::Util.log_info("Server redirect response", code: redirect_response.code, location: redirect_response["location"])
66
-
67
- build_response_details(redirect_response)
68
- in Net::HTTPClientError => client_error_response
69
- Chroma::Util.log_error("Client error response", code: client_error_response.code, body: client_error_response.body)
70
-
71
- build_response_details(client_error_response)
72
- in Net::HTTPServerError => server_error_response
73
- Chroma::Util.log_error("Server error response", code: server_error_response.code)
74
-
75
- build_response_details(server_error_response, parse_body: false)
76
- else
77
- Chroma::Util.log_error("An error happened", error: response.to_s)
78
-
79
- build_response_details(response, exception: true, parse_body: false)
80
- end
81
- end
82
-
83
- private def build_response_details(response, exception: false, parse_body: true)
84
- response_data = Chroma::APIOperations::Response.new(
85
- exception ? 0 : response.code.to_i,
86
- if exception
87
- exception.to_s
88
- else
89
- (parse_body ? body_to_json(response.body) : response.body)
90
- end,
91
- exception ? {} : response.each_header.to_h,
92
- response.is_a?(Net::HTTPSuccess) ? nil : response
93
- )
94
-
95
- case response
96
- in Net::HTTPSuccess
97
- return Success(response_data)
98
- else
99
- return Failure(response_data)
100
- end
101
- end
102
-
103
- private def body_to_json(content)
104
- JSON.parse(content, symbolize_keys: true)
105
- rescue JSON::ParserError, TypeError
106
- content
107
- end
108
-
109
- private def build_request(method, uri, params)
110
- request = case method
111
- when :post then Net::HTTP::Post.new(uri)
112
- when :put then Net::HTTP::Put.new(uri)
113
- when :delete then Net::HTTP::Delete.new(uri)
114
- else
115
- Net::HTTP::Get.new(uri)
116
- end
117
-
118
- request.content_type = "application/json"
119
- request.body = params.to_json if params.size > 0
120
- request.basic_auth(uri.user, uri.password) if !uri.user.nil?
121
-
122
- request
123
- end
124
- end
125
-
126
- def self.included(base)
127
- base.extend(ClassMethods)
128
- end
129
- end
130
- end
131
- end