chroma-db 0.2.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,388 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ using RubyNext
5
+
6
+ module Resources
7
+ # A Collection class represents a store for your embeddings, documents, and any additional metadata.
8
+ # This class can be instantiated by receiving the collection's name and metadata hash.
9
+ class Collection
10
+ include Chroma::APIOperations::Request
11
+
12
+ attr_reader :id
13
+ attr_reader :name
14
+ attr_reader :metadata
15
+
16
+ def initialize(id:, name:, metadata: nil)
17
+ @id = id
18
+ @name = name
19
+ @metadata = metadata
20
+ end
21
+
22
+ # Query the collection and return an array of embeddings.
23
+ #
24
+ # query_embeddings - An array of the embeddings to use for querying the collection.
25
+ # results - The maximum number of results to return. 10 by default.
26
+ # where - A Hash of additional conditions to filter the query results (optional).
27
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
28
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
29
+ # documents, and distances by default.
30
+ #
31
+ # Examples
32
+ #
33
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
34
+ # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
35
+ #
36
+ # Return an Array of Embedding with query results.
37
+ def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
38
+ payload = {
39
+ query_embeddings: query_embeddings,
40
+ n_results: results,
41
+ where: where,
42
+ where_document: where_document,
43
+ include: include
44
+ }
45
+
46
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/query", payload)
47
+
48
+ if result.success?
49
+ build_embeddings_response(result.success.body)
50
+ else
51
+ raise_failure_error(result)
52
+ end
53
+ end
54
+
55
+ # Get embeddings from the collection.
56
+ #
57
+ # ids - An Array of the specific embedding IDs to retrieve (optional).
58
+ # where - A Hash of additional conditions to filter the query results (optional).
59
+ # sort - The sorting criteria for the query results (optional).
60
+ # limit - The maximum number of embeddings to retrieve (optional).
61
+ # offset - The offset for pagination (optional).
62
+ # page - The page number for pagination (optional).
63
+ # page_size - The page size for pagination (optional).
64
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
65
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
66
+ # and documents by default.
67
+ #
68
+ # Examples
69
+ #
70
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
71
+ # embeddings = collection.get([Array#sort, "Array#each"])
72
+ #
73
+ # Returns an Array of Embeddings
74
+ def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
75
+ if !page.nil? && !page_size.nil?
76
+ offset = (page - 1) * page_size
77
+ limit = page_size
78
+ end
79
+
80
+ payload = {
81
+ ids: ids,
82
+ where: where,
83
+ sort: sort,
84
+ limit: limit,
85
+ offset: offset,
86
+ where_document: where_document,
87
+ include: include
88
+ }
89
+
90
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/get", payload)
91
+
92
+ if result.success?
93
+ build_embeddings_response(result.success.body)
94
+ else
95
+ raise_failure_error(result)
96
+ end
97
+ end
98
+
99
+ # Add one or many embeddings to the collection.
100
+ #
101
+ # embeddings - An Array of Embeddings or one Embedding to add.
102
+ #
103
+ # Examples
104
+ #
105
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
106
+ # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
107
+ #
108
+ # Returns true with success or raises a Chroma::Error on failure.
109
+ def add(embeddings = [])
110
+ embeddings_array = Array(embeddings)
111
+ return false if embeddings_array.size == 0
112
+
113
+ payload = build_embeddings_payload(embeddings_array)
114
+
115
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/add", payload)
116
+
117
+ return true if result.success?
118
+
119
+ raise_failure_error(result)
120
+ end
121
+
122
+ # Delete embeddings from the collection.
123
+ #
124
+ # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
125
+ # where [Hash] Additional conditions to filter the embeddings to delete (optional).
126
+ # where_document [Hash] Additional conditions to filter the associated documents (optional).
127
+ #
128
+ # Examples
129
+ #
130
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
131
+ # collection.delete(["Array#fetch", "Array#sort"])
132
+ #
133
+ # Returns an Array of deleted global ids.
134
+ def delete(ids: nil, where: {}, where_document: {})
135
+ payload = {
136
+ ids: ids,
137
+ where: where,
138
+ where_document: where_document
139
+ }
140
+
141
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/delete", payload)
142
+
143
+ return result.success.body if result.success?
144
+
145
+ raise_failure_error(result)
146
+ end
147
+
148
+ # Update one or many embeddings to the collection.
149
+ #
150
+ # embeddings - An Array of Embeddings or one Embedding to add.
151
+ #
152
+ # Examples
153
+ #
154
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
155
+ # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
156
+ #
157
+ # Returns true with success or raises a Chroma::Error on failure.
158
+ def update(embeddings = [])
159
+ embeddings_array = Array(embeddings)
160
+ return false if embeddings_array.size == 0
161
+
162
+ payload = build_embeddings_payload(embeddings_array)
163
+ payload.delete(:increment_index)
164
+
165
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/update", payload)
166
+
167
+ return true if result.success?
168
+
169
+ raise_failure_error(result)
170
+ end
171
+
172
+ # Upsert (insert or update) one or many embeddings to the collection.
173
+ #
174
+ # embeddings - An Array of Embeddings or one Embedding to add.
175
+ #
176
+ # Examples
177
+ #
178
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
179
+ # embeddings = [
180
+ # Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}),
181
+ # Embedding.new(id: "Array#select", embeddings[5.6, 3.1, 4.7], metadata: {url: "https://..."})
182
+ # ]
183
+ # collection.upsert()
184
+ #
185
+ # Returns true with success or raises a Chroma::Error on failure.
186
+ def upsert(embeddings = [])
187
+ embeddings_array = Array(embeddings)
188
+ return false if embeddings_array.size == 0
189
+
190
+ payload = build_embeddings_payload(embeddings_array)
191
+
192
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/upsert", payload)
193
+
194
+ return true if result.success?
195
+
196
+ raise_failure_error(result)
197
+ end
198
+
199
+ # Count the number of embeddings in a collection.
200
+ #
201
+ # Examples
202
+ #
203
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
204
+ # collection.count
205
+ #
206
+ # Returns the count of embeddings in the collection.
207
+ def count
208
+ result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{id}/count")
209
+
210
+ return result.success.body if result.success?
211
+
212
+ raise_failure_error(result)
213
+ end
214
+
215
+ # Modify the name and metadata of the current collection.
216
+ #
217
+ # new_name - The new name for the collection.
218
+ # new_metadata - The new metadata hash for the collection.
219
+ #
220
+ # Examples:
221
+ #
222
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
223
+ # collection.modify("ruby-3.2-documentation")
224
+ #
225
+ # Returns nothing.
226
+ def modify(new_name, new_metadata: {})
227
+ payload = {new_name: new_name}
228
+ payload[:new_metadata] = new_metadata if new_metadata.any?
229
+
230
+ result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{id}", payload)
231
+
232
+ if result.success?
233
+ @name = new_name
234
+ @metadata = new_metadata
235
+ else
236
+ raise_failure_error(result)
237
+ end
238
+ end
239
+
240
+ # Creates an index for the collection.
241
+ #
242
+ # Examples:
243
+ #
244
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
245
+ # collection.create_index
246
+ #
247
+ # Returns true on success or raise a Chroma::Error on failure.
248
+ def create_index
249
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/create_index")
250
+
251
+ return true if result.success?
252
+
253
+ raise_failure_error(result)
254
+ end
255
+
256
+ # Create a new collection on the database.
257
+ #
258
+ # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
259
+ # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
260
+ # contains no two consecutive periods
261
+ # metadata - A hash of additional metadata associated with the collection.
262
+ #
263
+ # Examples
264
+ #
265
+ # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
266
+ #
267
+ # Returns the created collection object.
268
+ def self.create(name, metadata = nil)
269
+ payload = {name: name, metadata: metadata, get_or_create: false}
270
+
271
+ result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
272
+
273
+ if result.success?
274
+ data = result.success.body
275
+ new(id: data["id"], name: data["name"], metadata: data["metadata"])
276
+ else
277
+ raise_failure_error(result)
278
+ end
279
+ end
280
+
281
+ # Retrieves a collection from the database.
282
+ #
283
+ # name - The name of the collection to retrieve.
284
+ #
285
+ # Examples
286
+ #
287
+ # collection = Chroma::Resources::Colection.get("ruby-documentation")
288
+ #
289
+ # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
290
+ def self.get(name)
291
+ result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
292
+
293
+ if result.success?
294
+ data = result.success.body
295
+ new(id: data["id"], name: data["name"], metadata: data["metadata"])
296
+ else
297
+ raise_failure_error(result)
298
+ end
299
+ end
300
+
301
+ # Retrieves all collections in the database.
302
+ #
303
+ # Examples
304
+ #
305
+ # collections = Chroma::Resources::Collection.list
306
+ #
307
+ # Returns An array of all collections in the database.
308
+ def self.list
309
+ result = execute_request(:get, "#{Chroma.api_url}/collections")
310
+
311
+ if result.success?
312
+ data = result.success.body
313
+ data.map { |item| new(id: item["id"], name: item["name"], metadata: item["metadata"]) }
314
+ else
315
+ raise_failure_error(result)
316
+ end
317
+ end
318
+
319
+ # Deletes a collection from the database.
320
+ #
321
+ # name - The name of the collection to delete.
322
+ #
323
+ # Examples
324
+ #
325
+ # Chroma::Resources::Collection.delete("ruby-documentation")
326
+ #
327
+ # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
328
+ def self.delete(name)
329
+ result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
330
+
331
+ return true if result.success?
332
+
333
+ raise_failure_error(result)
334
+ end
335
+
336
+ def self.raise_failure_error(result)
337
+ case result.failure.error
338
+ in Exception => exception
339
+ raise Chroma::APIConnectionError.new(exception.message)
340
+ in Net::HTTPInternalServerError => response
341
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
342
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
343
+ else
344
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
345
+ end
346
+ else
347
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
348
+ end
349
+ end
350
+ private_class_method :raise_failure_error
351
+
352
+ private
353
+
354
+ def build_embeddings_payload(embeddings, increment_index = true)
355
+ payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
356
+
357
+ embeddings.each do |embedding|
358
+ payload[:ids] << embedding.id
359
+ payload[:embeddings] << embedding.embedding
360
+ payload[:metadatas] << embedding.metadata
361
+ payload[:documents] << embedding.document
362
+ end
363
+
364
+ payload
365
+ end
366
+
367
+ def build_embeddings_response(result)
368
+ Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
369
+
370
+ result_ids = result.fetch("ids", []).flatten
371
+ result_embeddings = (result.dig("embeddings") || []).flatten
372
+ result_documents = (result.dig("documents") || []).flatten
373
+ result_metadatas = (result.dig("metadatas") || []).flatten
374
+ result_distances = (result.dig("distances") || []).flatten
375
+
376
+ Chroma::Util.log_debug("Ids #{result_ids.inspect}")
377
+ Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
378
+ Chroma::Util.log_debug("Documents #{result_documents.inspect}")
379
+ Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
380
+ Chroma::Util.log_debug("distances #{result_distances.inspect}")
381
+
382
+ result_ids.map.with_index do |id, index|
383
+ Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
384
+ end
385
+ end
386
+ end
387
+ end
388
+ end
@@ -2,13 +2,18 @@
2
2
 
3
3
  module Chroma
4
4
  module APIOperations
5
+ using RubyNext
6
+
5
7
  # Request's response Data object.
6
8
  #
7
9
  # status - HTTP status code. It is zero when a request fails due to network error.
8
10
  # body - Parsed JSON object or response body.
9
11
  # headers - HTTP response headers.
10
12
  # error - Exception or Net::HTTPResponse object if the response is not Net::HTTPSuccess
11
- Response = Data.define(:status, :body, :headers, :error)
13
+ #
14
+ # NOTE: Not supported yet by Ruby Next
15
+ # Response = Data.define(:status, :body, :headers, :error)
16
+ Response = Struct.new("Response", :status, :body, :headers, :error)
12
17
 
13
18
  # Request module provides functionality to perform HTTP requests.
14
19
  module Request
@@ -77,14 +82,14 @@ module Chroma
77
82
 
78
83
  private def build_response_details(response, exception: false, parse_body: true)
79
84
  response_data = Chroma::APIOperations::Response.new(
80
- status: exception ? 0 : response.code.to_i,
81
- body: if exception
82
- exception.to_s
83
- else
84
- (parse_body ? body_to_json(response.body) : response.body)
85
- end,
86
- headers: exception ? {} : response.each_header.to_h,
87
- error: response.is_a?(Net::HTTPSuccess) ? nil : response
85
+ exception ? 0 : response.code.to_i,
86
+ if exception
87
+ exception.to_s
88
+ else
89
+ (parse_body ? body_to_json(response.body) : response.body)
90
+ end,
91
+ exception ? {} : response.each_header.to_h,
92
+ response.is_a?(Net::HTTPSuccess) ? nil : response
88
93
  )
89
94
 
90
95
  case response
@@ -1,16 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Chroma
4
+ using RubyNext
5
+
4
6
  module Resources
5
7
  # A Collection class represents a store for your embeddings, documents, and any additional metadata.
6
8
  # This class can be instantiated by receiving the collection's name and metadata hash.
7
9
  class Collection
8
10
  include Chroma::APIOperations::Request
9
11
 
12
+ attr_reader :id
10
13
  attr_reader :name
11
14
  attr_reader :metadata
12
15
 
13
- def initialize(name:, metadata: nil)
16
+ def initialize(id:, name:, metadata: nil)
17
+ @id = id
14
18
  @name = name
15
19
  @metadata = metadata
16
20
  end
@@ -39,7 +43,7 @@ module Chroma
39
43
  include:
40
44
  }
41
45
 
42
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/query", payload)
46
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/query", payload)
43
47
 
44
48
  if result.success?
45
49
  build_embeddings_response(result.success.body)
@@ -83,7 +87,7 @@ module Chroma
83
87
  include:
84
88
  }
85
89
 
86
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/get", payload)
90
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/get", payload)
87
91
 
88
92
  if result.success?
89
93
  build_embeddings_response(result.success.body)
@@ -108,7 +112,7 @@ module Chroma
108
112
 
109
113
  payload = build_embeddings_payload(embeddings_array)
110
114
 
111
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/add", payload)
115
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/add", payload)
112
116
 
113
117
  return true if result.success?
114
118
 
@@ -134,7 +138,7 @@ module Chroma
134
138
  where_document:
135
139
  }
136
140
 
137
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/delete", payload)
141
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/delete", payload)
138
142
 
139
143
  return result.success.body if result.success?
140
144
 
@@ -158,7 +162,7 @@ module Chroma
158
162
  payload = build_embeddings_payload(embeddings_array)
159
163
  payload.delete(:increment_index)
160
164
 
161
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/update", payload)
165
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/update", payload)
162
166
 
163
167
  return true if result.success?
164
168
 
@@ -185,7 +189,7 @@ module Chroma
185
189
 
186
190
  payload = build_embeddings_payload(embeddings_array)
187
191
 
188
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/upsert", payload)
192
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/upsert", payload)
189
193
 
190
194
  return true if result.success?
191
195
 
@@ -201,7 +205,7 @@ module Chroma
201
205
  #
202
206
  # Returns the count of embeddings in the collection.
203
207
  def count
204
- result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{name}/count")
208
+ result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{id}/count")
205
209
 
206
210
  return result.success.body if result.success?
207
211
 
@@ -223,7 +227,7 @@ module Chroma
223
227
  payload = {new_name:}
224
228
  payload[:new_metadata] = new_metadata if new_metadata.any?
225
229
 
226
- result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{name}", payload)
230
+ result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{id}", payload)
227
231
 
228
232
  if result.success?
229
233
  @name = new_name
@@ -242,7 +246,7 @@ module Chroma
242
246
  #
243
247
  # Returns true on success or raise a Chroma::Error on failure.
244
248
  def create_index
245
- result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/create_index")
249
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{id}/create_index")
246
250
 
247
251
  return true if result.success?
248
252
 
@@ -268,7 +272,7 @@ module Chroma
268
272
 
269
273
  if result.success?
270
274
  data = result.success.body
271
- new(name: data["name"], metadata: data["metadata"])
275
+ new(id: data["id"], name: data["name"], metadata: data["metadata"])
272
276
  else
273
277
  raise_failure_error(result)
274
278
  end
@@ -288,7 +292,7 @@ module Chroma
288
292
 
289
293
  if result.success?
290
294
  data = result.success.body
291
- new(name: data["name"], metadata: data["metadata"])
295
+ new(id: data["id"], name: data["name"], metadata: data["metadata"])
292
296
  else
293
297
  raise_failure_error(result)
294
298
  end
@@ -306,7 +310,7 @@ module Chroma
306
310
 
307
311
  if result.success?
308
312
  data = result.success.body
309
- data.map { |item| new(name: item["name"], metadata: item["metadata"]) }
313
+ data.map { |item| new(id: item["id"], name: item["name"], metadata: item["metadata"]) }
310
314
  else
311
315
  raise_failure_error(result)
312
316
  end
@@ -4,6 +4,8 @@ module Chroma
4
4
  module Resources
5
5
  # The Database class provides methods for interacting with the Chroma database server.
6
6
  class Database
7
+ using RubyNext
8
+
7
9
  include Chroma::APIOperations::Request
8
10
  # Get the version of the Chroma database server.
9
11
  #
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Chroma
4
- VERSION = "0.2.0"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/chroma-db.rb CHANGED
@@ -6,13 +6,17 @@ require "uri"
6
6
  require "json"
7
7
  require "logger"
8
8
  require "forwardable"
9
+ require "ruby-next"
10
+ require "ruby-next/language/setup"
9
11
 
10
- require_relative "chroma/version"
11
- require_relative "chroma/util"
12
- require_relative "chroma/chroma_configuration"
13
- require_relative "chroma/chroma"
14
- require_relative "chroma/api_operations/request"
15
- require_relative "chroma/errors"
16
- require_relative "chroma/resources/embedding"
17
- require_relative "chroma/resources/collection"
18
- require_relative "chroma/resources/database"
12
+ RubyNext::Language.setup_gem_load_path(transpile: true)
13
+
14
+ require "chroma/version"
15
+ require "chroma/util"
16
+ require "chroma/chroma_configuration"
17
+ require "chroma/chroma"
18
+ require "chroma/api_operations/request"
19
+ require "chroma/errors"
20
+ require "chroma/resources/embedding"
21
+ require "chroma/resources/collection"
22
+ require "chroma/resources/database"