chroma-db 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,385 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ using RubyNext
5
+ module Resources
6
+ # A Collection class represents a store for your embeddings, documents, and any additional metadata.
7
+ # This class can be instantiated by receiving the collection's name and metadata hash.
8
+ class Collection
9
+ include Chroma::APIOperations::Request
10
+
11
+ attr_reader :name
12
+ attr_reader :metadata
13
+
14
+ def initialize(name:, metadata: nil)
15
+ @name = name
16
+ @metadata = metadata
17
+ end
18
+
19
+ # Query the collection and return an array of embeddings.
20
+ #
21
+ # query_embeddings - An array of the embeddings to use for querying the collection.
22
+ # results - The maximum number of results to return. 10 by default.
23
+ # where - A Hash of additional conditions to filter the query results (optional).
24
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
25
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
26
+ # documents, and distances by default.
27
+ #
28
+ # Examples
29
+ #
30
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
31
+ # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
32
+ #
33
+ # Return an Array of Embedding with query results.
34
+ def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
35
+ payload = {
36
+ query_embeddings: query_embeddings,
37
+ n_results: results,
38
+ where: where,
39
+ where_document: where_document,
40
+ include: include
41
+ }
42
+
43
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/query", payload)
44
+
45
+ if result.success?
46
+ build_embeddings_response(result.success.body)
47
+ else
48
+ raise_failure_error(result)
49
+ end
50
+ end
51
+
52
+ # Get embeddings from the collection.
53
+ #
54
+ # ids - An Array of the specific embedding IDs to retrieve (optional).
55
+ # where - A Hash of additional conditions to filter the query results (optional).
56
+ # sort - The sorting criteria for the query results (optional).
57
+ # limit - The maximum number of embeddings to retrieve (optional).
58
+ # offset - The offset for pagination (optional).
59
+ # page - The page number for pagination (optional).
60
+ # page_size - The page size for pagination (optional).
61
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
62
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
63
+ # and documents by default.
64
+ #
65
+ # Examples
66
+ #
67
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
68
+ # embeddings = collection.get([Array#sort, "Array#each"])
69
+ #
70
+ # Returns an Array of Embeddings
71
+ def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
72
+ if !page.nil? && !page_size.nil?
73
+ offset = (page - 1) * page_size
74
+ limit = page_size
75
+ end
76
+
77
+ payload = {
78
+ ids: ids,
79
+ where: where,
80
+ sort: sort,
81
+ limit: limit,
82
+ offset: offset,
83
+ where_document: where_document,
84
+ include: include
85
+ }
86
+
87
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/get", payload)
88
+
89
+ if result.success?
90
+ build_embeddings_response(result.success.body)
91
+ else
92
+ raise_failure_error(result)
93
+ end
94
+ end
95
+
96
+ # Add one or many embeddings to the collection.
97
+ #
98
+ # embeddings - An Array of Embeddings or one Embedding to add.
99
+ #
100
+ # Examples
101
+ #
102
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
103
+ # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
104
+ #
105
+ # Returns true with success or raises a Chroma::Error on failure.
106
+ def add(embeddings = [])
107
+ embeddings_array = Array(embeddings)
108
+ return false if embeddings_array.size == 0
109
+
110
+ payload = build_embeddings_payload(embeddings_array)
111
+
112
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/add", payload)
113
+
114
+ return true if result.success?
115
+
116
+ raise_failure_error(result)
117
+ end
118
+
119
+ # Delete embeddings from the collection.
120
+ #
121
+ # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
122
+ # where [Hash] Additional conditions to filter the embeddings to delete (optional).
123
+ # where_document [Hash] Additional conditions to filter the associated documents (optional).
124
+ #
125
+ # Examples
126
+ #
127
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
128
+ # collection.delete(["Array#fetch", "Array#sort"])
129
+ #
130
+ # Returns an Array of deleted global ids.
131
+ def delete(ids: nil, where: {}, where_document: {})
132
+ payload = {
133
+ ids: ids,
134
+ where: where,
135
+ where_document: where_document
136
+ }
137
+
138
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/delete", payload)
139
+
140
+ return result.success.body if result.success?
141
+
142
+ raise_failure_error(result)
143
+ end
144
+
145
+ # Update one or many embeddings to the collection.
146
+ #
147
+ # embeddings - An Array of Embeddings or one Embedding to add.
148
+ #
149
+ # Examples
150
+ #
151
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
152
+ # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
153
+ #
154
+ # Returns true with success or raises a Chroma::Error on failure.
155
+ def update(embeddings = [])
156
+ embeddings_array = Array(embeddings)
157
+ return false if embeddings_array.size == 0
158
+
159
+ payload = build_embeddings_payload(embeddings_array)
160
+ payload.delete(:increment_index)
161
+
162
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/update", payload)
163
+
164
+ return true if result.success?
165
+
166
+ raise_failure_error(result)
167
+ end
168
+
169
+ # Upsert (insert or update) one or many embeddings to the collection.
170
+ #
171
+ # embeddings - An Array of Embeddings or one Embedding to add.
172
+ #
173
+ # Examples
174
+ #
175
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
176
+ # embeddings = [
177
+ # Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}),
178
+ # Embedding.new(id: "Array#select", embeddings[5.6, 3.1, 4.7], metadata: {url: "https://..."})
179
+ # ]
180
+ # collection.upsert()
181
+ #
182
+ # Returns true with success or raises a Chroma::Error on failure.
183
+ def upsert(embeddings = [])
184
+ embeddings_array = Array(embeddings)
185
+ return false if embeddings_array.size == 0
186
+
187
+ payload = build_embeddings_payload(embeddings_array)
188
+
189
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/upsert", payload)
190
+
191
+ return true if result.success?
192
+
193
+ raise_failure_error(result)
194
+ end
195
+
196
+ # Count the number of embeddings in a collection.
197
+ #
198
+ # Examples
199
+ #
200
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
201
+ # collection.count
202
+ #
203
+ # Returns the count of embeddings in the collection.
204
+ def count
205
+ result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{name}/count")
206
+
207
+ return result.success.body if result.success?
208
+
209
+ raise_failure_error(result)
210
+ end
211
+
212
+ # Modify the name and metadata of the current collection.
213
+ #
214
+ # new_name - The new name for the collection.
215
+ # new_metadata - The new metadata hash for the collection.
216
+ #
217
+ # Examples:
218
+ #
219
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
220
+ # collection.modify("ruby-3.2-documentation")
221
+ #
222
+ # Returns nothing.
223
+ def modify(new_name, new_metadata: {})
224
+ payload = {new_name: new_name}
225
+ payload[:new_metadata] = new_metadata if new_metadata.any?
226
+
227
+ result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{name}", payload)
228
+
229
+ if result.success?
230
+ @name = new_name
231
+ @metadata = new_metadata
232
+ else
233
+ raise_failure_error(result)
234
+ end
235
+ end
236
+
237
+ # Creates an index for the collection.
238
+ #
239
+ # Examples:
240
+ #
241
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
242
+ # collection.create_index
243
+ #
244
+ # Returns true on success or raise a Chroma::Error on failure.
245
+ def create_index
246
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/create_index")
247
+
248
+ return true if result.success?
249
+
250
+ raise_failure_error(result)
251
+ end
252
+
253
+ # Create a new collection on the database.
254
+ #
255
+ # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
256
+ # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
257
+ # contains no two consecutive periods
258
+ # metadata - A hash of additional metadata associated with the collection.
259
+ #
260
+ # Examples
261
+ #
262
+ # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
263
+ #
264
+ # Returns the created collection object.
265
+ def self.create(name, metadata = nil)
266
+ payload = {name: name, metadata: metadata, get_or_create: false}
267
+
268
+ result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
269
+
270
+ if result.success?
271
+ data = result.success.body
272
+ new(name: data["name"], metadata: data["metadata"])
273
+ else
274
+ raise_failure_error(result)
275
+ end
276
+ end
277
+
278
+ # Retrieves a collection from the database.
279
+ #
280
+ # name - The name of the collection to retrieve.
281
+ #
282
+ # Examples
283
+ #
284
+ # collection = Chroma::Resources::Colection.get("ruby-documentation")
285
+ #
286
+ # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
287
+ def self.get(name)
288
+ result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
289
+
290
+ if result.success?
291
+ data = result.success.body
292
+ new(name: data["name"], metadata: data["metadata"])
293
+ else
294
+ raise_failure_error(result)
295
+ end
296
+ end
297
+
298
+ # Retrieves all collections in the database.
299
+ #
300
+ # Examples
301
+ #
302
+ # collections = Chroma::Resources::Collection.list
303
+ #
304
+ # Returns An array of all collections in the database.
305
+ def self.list
306
+ result = execute_request(:get, "#{Chroma.api_url}/collections")
307
+
308
+ if result.success?
309
+ data = result.success.body
310
+ data.map { |item| new(name: item["name"], metadata: item["metadata"]) }
311
+ else
312
+ raise_failure_error(result)
313
+ end
314
+ end
315
+
316
+ # Deletes a collection from the database.
317
+ #
318
+ # name - The name of the collection to delete.
319
+ #
320
+ # Examples
321
+ #
322
+ # Chroma::Resources::Collection.delete("ruby-documentation")
323
+ #
324
+ # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
325
+ def self.delete(name)
326
+ result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
327
+
328
+ return true if result.success?
329
+
330
+ raise_failure_error(result)
331
+ end
332
+
333
+ def self.raise_failure_error(result)
334
+ case result.failure.error
335
+ in Exception => exception
336
+ raise Chroma::APIConnectionError.new(exception.message)
337
+ in Net::HTTPInternalServerError => response
338
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
339
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
340
+ else
341
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
342
+ end
343
+ else
344
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
345
+ end
346
+ end
347
+ private_class_method :raise_failure_error
348
+
349
+ private
350
+
351
+ def build_embeddings_payload(embeddings, increment_index = true)
352
+ payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
353
+
354
+ embeddings.each do |embedding|
355
+ payload[:ids] << embedding.id
356
+ payload[:embeddings] << embedding.embedding
357
+ payload[:metadatas] << embedding.metadata
358
+ payload[:documents] << embedding.document
359
+ end
360
+
361
+ payload
362
+ end
363
+
364
+ def build_embeddings_response(result)
365
+ Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
366
+
367
+ result_ids = result.fetch("ids", []).flatten
368
+ result_embeddings = (result.dig("embeddings") || []).flatten
369
+ result_documents = (result.dig("documents") || []).flatten
370
+ result_metadatas = (result.dig("metadatas") || []).flatten
371
+ result_distances = (result.dig("distances") || []).flatten
372
+
373
+ Chroma::Util.log_debug("Ids #{result_ids.inspect}")
374
+ Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
375
+ Chroma::Util.log_debug("Documents #{result_documents.inspect}")
376
+ Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
377
+ Chroma::Util.log_debug("distances #{result_distances.inspect}")
378
+
379
+ result_ids.map.with_index do |id, index|
380
+ Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
381
+ end
382
+ end
383
+ end
384
+ end
385
+ end
@@ -2,13 +2,18 @@
2
2
 
3
3
  module Chroma
4
4
  module APIOperations
5
+ using RubyNext
6
+
5
7
  # Request's response Data object.
6
8
  #
7
9
  # status - HTTP status code. It is zero when a request fails due to network error.
8
10
  # body - Parsed JSON object or response body.
9
11
  # headers - HTTP response headers.
10
12
  # error - Exception or Net::HTTPResponse object if the response is not Net::HTTPSuccess
11
- Response = Data.define(:status, :body, :headers, :error)
13
+ #
14
+ # NOTE: Not supported yet by Ruby Next
15
+ # Response = Data.define(:status, :body, :headers, :error)
16
+ Response = Struct.new("Response", :status, :body, :headers, :error)
12
17
 
13
18
  # Request module provides functionality to perform HTTP requests.
14
19
  module Request
@@ -77,14 +82,14 @@ module Chroma
77
82
 
78
83
  private def build_response_details(response, exception: false, parse_body: true)
79
84
  response_data = Chroma::APIOperations::Response.new(
80
- status: exception ? 0 : response.code.to_i,
81
- body: if exception
82
- exception.to_s
83
- else
84
- (parse_body ? body_to_json(response.body) : response.body)
85
- end,
86
- headers: exception ? {} : response.each_header.to_h,
87
- error: response.is_a?(Net::HTTPSuccess) ? nil : response
85
+ exception ? 0 : response.code.to_i,
86
+ if exception
87
+ exception.to_s
88
+ else
89
+ (parse_body ? body_to_json(response.body) : response.body)
90
+ end,
91
+ exception ? {} : response.each_header.to_h,
92
+ response.is_a?(Net::HTTPSuccess) ? nil : response
88
93
  )
89
94
 
90
95
  case response
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Chroma
4
+ using RubyNext
4
5
  module Resources
5
6
  # A Collection class represents a store for your embeddings, documents, and any additional metadata.
6
7
  # This class can be instantiated by receiving the collection's name and metadata hash.
@@ -4,6 +4,8 @@ module Chroma
4
4
  module Resources
5
5
  # The Database class provides methods for interacting with the Chroma database server.
6
6
  class Database
7
+ using RubyNext
8
+
7
9
  include Chroma::APIOperations::Request
8
10
  # Get the version of the Chroma database server.
9
11
  #
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Chroma
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/chroma-db.rb CHANGED
@@ -6,13 +6,17 @@ require "uri"
6
6
  require "json"
7
7
  require "logger"
8
8
  require "forwardable"
9
+ require "ruby-next"
10
+ require "ruby-next/language/setup"
9
11
 
10
- require_relative "chroma/version"
11
- require_relative "chroma/util"
12
- require_relative "chroma/chroma_configuration"
13
- require_relative "chroma/chroma"
14
- require_relative "chroma/api_operations/request"
15
- require_relative "chroma/errors"
16
- require_relative "chroma/resources/embedding"
17
- require_relative "chroma/resources/collection"
18
- require_relative "chroma/resources/database"
12
+ RubyNext::Language.setup_gem_load_path(transpile: true)
13
+
14
+ require "chroma/version"
15
+ require "chroma/util"
16
+ require "chroma/chroma_configuration"
17
+ require "chroma/chroma"
18
+ require "chroma/api_operations/request"
19
+ require "chroma/errors"
20
+ require "chroma/resources/embedding"
21
+ require "chroma/resources/collection"
22
+ require "chroma/resources/database"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chroma-db
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mario Alberto Chávez
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-12 00:00:00.000000000 Z
11
+ date: 2023-05-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dry-monads
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-next-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.15.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.15.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-next
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 0.15.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 0.15.0
27
55
  description: Chroma is the open-source embedding database. Chroma makes it easy to
28
56
  build LLM apps by making knowledge, facts, and skills pluggable for LLMs.
29
57
  email:
@@ -32,6 +60,8 @@ executables: []
32
60
  extensions: []
33
61
  extra_rdoc_files: []
34
62
  files:
63
+ - ".rbnextrc"
64
+ - ".ruby-version"
35
65
  - CHANGELOG.md
36
66
  - CODE_OF_CONDUCT.md
37
67
  - Gemfile
@@ -39,6 +69,11 @@ files:
39
69
  - LICENSE.txt
40
70
  - README.md
41
71
  - Rakefile
72
+ - lib/.rbnext/2.7/chroma/api_operations/request.rb
73
+ - lib/.rbnext/2.7/chroma/resources/collection.rb
74
+ - lib/.rbnext/2.7/chroma/resources/database.rb
75
+ - lib/.rbnext/3.1/chroma/api_operations/request.rb
76
+ - lib/.rbnext/3.1/chroma/resources/collection.rb
42
77
  - lib/chroma-db.rb
43
78
  - lib/chroma/api_operations/request.rb
44
79
  - lib/chroma/chroma.rb
@@ -49,8 +84,6 @@ files:
49
84
  - lib/chroma/resources/embedding.rb
50
85
  - lib/chroma/util.rb
51
86
  - lib/chroma/version.rb
52
- - notebook/Chroma Gem.ipynb
53
- - notebook/ruby.txt
54
87
  - sig/chroma.rbs
55
88
  homepage: https://mariochavez.io
56
89
  licenses:
@@ -67,7 +100,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
67
100
  requirements:
68
101
  - - ">="
69
102
  - !ruby/object:Gem::Version
70
- version: 3.1.0
103
+ version: 2.7.8
71
104
  required_rubygems_version: !ruby/object:Gem::Requirement
72
105
  requirements:
73
106
  - - ">="
@@ -77,5 +110,5 @@ requirements: []
77
110
  rubygems_version: 3.4.12
78
111
  signing_key:
79
112
  specification_version: 4
80
- summary: Ruby bindings for Chroma DB.
113
+ summary: Ruby client for Chroma DB.
81
114
  test_files: []