chroma-db 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,385 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ using RubyNext
5
+ module Resources
6
+ # A Collection class represents a store for your embeddings, documents, and any additional metadata.
7
+ # This class can be instantiated by receiving the collection's name and metadata hash.
8
+ class Collection
9
+ include Chroma::APIOperations::Request
10
+
11
+ attr_reader :name
12
+ attr_reader :metadata
13
+
14
+ def initialize(name:, metadata: nil)
15
+ @name = name
16
+ @metadata = metadata
17
+ end
18
+
19
+ # Query the collection and return an array of embeddings.
20
+ #
21
+ # query_embeddings - An array of the embeddings to use for querying the collection.
22
+ # results - The maximum number of results to return. 10 by default.
23
+ # where - A Hash of additional conditions to filter the query results (optional).
24
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
25
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
26
+ # documents, and distances by default.
27
+ #
28
+ # Examples
29
+ #
30
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
31
+ # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
32
+ #
33
+ # Return an Array of Embedding with query results.
34
+ def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
35
+ payload = {
36
+ query_embeddings: query_embeddings,
37
+ n_results: results,
38
+ where: where,
39
+ where_document: where_document,
40
+ include: include
41
+ }
42
+
43
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/query", payload)
44
+
45
+ if result.success?
46
+ build_embeddings_response(result.success.body)
47
+ else
48
+ raise_failure_error(result)
49
+ end
50
+ end
51
+
52
+ # Get embeddings from the collection.
53
+ #
54
+ # ids - An Array of the specific embedding IDs to retrieve (optional).
55
+ # where - A Hash of additional conditions to filter the query results (optional).
56
+ # sort - The sorting criteria for the query results (optional).
57
+ # limit - The maximum number of embeddings to retrieve (optional).
58
+ # offset - The offset for pagination (optional).
59
+ # page - The page number for pagination (optional).
60
+ # page_size - The page size for pagination (optional).
61
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
62
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
63
+ # and documents by default.
64
+ #
65
+ # Examples
66
+ #
67
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
68
+ # embeddings = collection.get([Array#sort, "Array#each"])
69
+ #
70
+ # Returns an Array of Embeddings
71
+ def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
72
+ if !page.nil? && !page_size.nil?
73
+ offset = (page - 1) * page_size
74
+ limit = page_size
75
+ end
76
+
77
+ payload = {
78
+ ids: ids,
79
+ where: where,
80
+ sort: sort,
81
+ limit: limit,
82
+ offset: offset,
83
+ where_document: where_document,
84
+ include: include
85
+ }
86
+
87
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/get", payload)
88
+
89
+ if result.success?
90
+ build_embeddings_response(result.success.body)
91
+ else
92
+ raise_failure_error(result)
93
+ end
94
+ end
95
+
96
+ # Add one or many embeddings to the collection.
97
+ #
98
+ # embeddings - An Array of Embeddings or one Embedding to add.
99
+ #
100
+ # Examples
101
+ #
102
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
103
+ # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
104
+ #
105
+ # Returns true with success or raises a Chroma::Error on failure.
106
+ def add(embeddings = [])
107
+ embeddings_array = Array(embeddings)
108
+ return false if embeddings_array.size == 0
109
+
110
+ payload = build_embeddings_payload(embeddings_array)
111
+
112
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/add", payload)
113
+
114
+ return true if result.success?
115
+
116
+ raise_failure_error(result)
117
+ end
118
+
119
+ # Delete embeddings from the collection.
120
+ #
121
+ # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
122
+ # where [Hash] Additional conditions to filter the embeddings to delete (optional).
123
+ # where_document [Hash] Additional conditions to filter the associated documents (optional).
124
+ #
125
+ # Examples
126
+ #
127
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
128
+ # collection.delete(["Array#fetch", "Array#sort"])
129
+ #
130
+ # Returns an Array of deleted global ids.
131
+ def delete(ids: nil, where: {}, where_document: {})
132
+ payload = {
133
+ ids: ids,
134
+ where: where,
135
+ where_document: where_document
136
+ }
137
+
138
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/delete", payload)
139
+
140
+ return result.success.body if result.success?
141
+
142
+ raise_failure_error(result)
143
+ end
144
+
145
+ # Update one or many embeddings to the collection.
146
+ #
147
+ # embeddings - An Array of Embeddings or one Embedding to add.
148
+ #
149
+ # Examples
150
+ #
151
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
152
+ # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
153
+ #
154
+ # Returns true with success or raises a Chroma::Error on failure.
155
+ def update(embeddings = [])
156
+ embeddings_array = Array(embeddings)
157
+ return false if embeddings_array.size == 0
158
+
159
+ payload = build_embeddings_payload(embeddings_array)
160
+ payload.delete(:increment_index)
161
+
162
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/update", payload)
163
+
164
+ return true if result.success?
165
+
166
+ raise_failure_error(result)
167
+ end
168
+
169
+ # Upsert (insert or update) one or many embeddings to the collection.
170
+ #
171
+ # embeddings - An Array of Embeddings or one Embedding to add.
172
+ #
173
+ # Examples
174
+ #
175
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
176
+ # embeddings = [
177
+ # Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}),
178
+ # Embedding.new(id: "Array#select", embeddings[5.6, 3.1, 4.7], metadata: {url: "https://..."})
179
+ # ]
180
+ # collection.upsert()
181
+ #
182
+ # Returns true with success or raises a Chroma::Error on failure.
183
+ def upsert(embeddings = [])
184
+ embeddings_array = Array(embeddings)
185
+ return false if embeddings_array.size == 0
186
+
187
+ payload = build_embeddings_payload(embeddings_array)
188
+
189
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/upsert", payload)
190
+
191
+ return true if result.success?
192
+
193
+ raise_failure_error(result)
194
+ end
195
+
196
+ # Count the number of embeddings in a collection.
197
+ #
198
+ # Examples
199
+ #
200
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
201
+ # collection.count
202
+ #
203
+ # Returns the count of embeddings in the collection.
204
+ def count
205
+ result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{name}/count")
206
+
207
+ return result.success.body if result.success?
208
+
209
+ raise_failure_error(result)
210
+ end
211
+
212
+ # Modify the name and metadata of the current collection.
213
+ #
214
+ # new_name - The new name for the collection.
215
+ # new_metadata - The new metadata hash for the collection.
216
+ #
217
+ # Examples:
218
+ #
219
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
220
+ # collection.modify("ruby-3.2-documentation")
221
+ #
222
+ # Returns nothing.
223
+ def modify(new_name, new_metadata: {})
224
+ payload = {new_name: new_name}
225
+ payload[:new_metadata] = new_metadata if new_metadata.any?
226
+
227
+ result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{name}", payload)
228
+
229
+ if result.success?
230
+ @name = new_name
231
+ @metadata = new_metadata
232
+ else
233
+ raise_failure_error(result)
234
+ end
235
+ end
236
+
237
+ # Creates an index for the collection.
238
+ #
239
+ # Examples:
240
+ #
241
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
242
+ # collection.create_index
243
+ #
244
+ # Returns true on success or raise a Chroma::Error on failure.
245
+ def create_index
246
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/create_index")
247
+
248
+ return true if result.success?
249
+
250
+ raise_failure_error(result)
251
+ end
252
+
253
+ # Create a new collection on the database.
254
+ #
255
+ # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
256
+ # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
257
+ # contains no two consecutive periods
258
+ # metadata - A hash of additional metadata associated with the collection.
259
+ #
260
+ # Examples
261
+ #
262
+ # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
263
+ #
264
+ # Returns the created collection object.
265
+ def self.create(name, metadata = nil)
266
+ payload = {name: name, metadata: metadata, get_or_create: false}
267
+
268
+ result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
269
+
270
+ if result.success?
271
+ data = result.success.body
272
+ new(name: data["name"], metadata: data["metadata"])
273
+ else
274
+ raise_failure_error(result)
275
+ end
276
+ end
277
+
278
+ # Retrieves a collection from the database.
279
+ #
280
+ # name - The name of the collection to retrieve.
281
+ #
282
+ # Examples
283
+ #
284
+ # collection = Chroma::Resources::Colection.get("ruby-documentation")
285
+ #
286
+ # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
287
+ def self.get(name)
288
+ result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
289
+
290
+ if result.success?
291
+ data = result.success.body
292
+ new(name: data["name"], metadata: data["metadata"])
293
+ else
294
+ raise_failure_error(result)
295
+ end
296
+ end
297
+
298
+ # Retrieves all collections in the database.
299
+ #
300
+ # Examples
301
+ #
302
+ # collections = Chroma::Resources::Collection.list
303
+ #
304
+ # Returns An array of all collections in the database.
305
+ def self.list
306
+ result = execute_request(:get, "#{Chroma.api_url}/collections")
307
+
308
+ if result.success?
309
+ data = result.success.body
310
+ data.map { |item| new(name: item["name"], metadata: item["metadata"]) }
311
+ else
312
+ raise_failure_error(result)
313
+ end
314
+ end
315
+
316
+ # Deletes a collection from the database.
317
+ #
318
+ # name - The name of the collection to delete.
319
+ #
320
+ # Examples
321
+ #
322
+ # Chroma::Resources::Collection.delete("ruby-documentation")
323
+ #
324
+ # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
325
+ def self.delete(name)
326
+ result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
327
+
328
+ return true if result.success?
329
+
330
+ raise_failure_error(result)
331
+ end
332
+
333
+ def self.raise_failure_error(result)
334
+ case result.failure.error
335
+ in Exception => exception
336
+ raise Chroma::APIConnectionError.new(exception.message)
337
+ in Net::HTTPInternalServerError => response
338
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
339
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
340
+ else
341
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
342
+ end
343
+ else
344
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
345
+ end
346
+ end
347
+ private_class_method :raise_failure_error
348
+
349
+ private
350
+
351
+ def build_embeddings_payload(embeddings, increment_index = true)
352
+ payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
353
+
354
+ embeddings.each do |embedding|
355
+ payload[:ids] << embedding.id
356
+ payload[:embeddings] << embedding.embedding
357
+ payload[:metadatas] << embedding.metadata
358
+ payload[:documents] << embedding.document
359
+ end
360
+
361
+ payload
362
+ end
363
+
364
+ def build_embeddings_response(result)
365
+ Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
366
+
367
+ result_ids = result.fetch("ids", []).flatten
368
+ result_embeddings = (result.dig("embeddings") || []).flatten
369
+ result_documents = (result.dig("documents") || []).flatten
370
+ result_metadatas = (result.dig("metadatas") || []).flatten
371
+ result_distances = (result.dig("distances") || []).flatten
372
+
373
+ Chroma::Util.log_debug("Ids #{result_ids.inspect}")
374
+ Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
375
+ Chroma::Util.log_debug("Documents #{result_documents.inspect}")
376
+ Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
377
+ Chroma::Util.log_debug("distances #{result_distances.inspect}")
378
+
379
+ result_ids.map.with_index do |id, index|
380
+ Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
381
+ end
382
+ end
383
+ end
384
+ end
385
+ end
@@ -2,13 +2,18 @@
2
2
 
3
3
  module Chroma
4
4
  module APIOperations
5
+ using RubyNext
6
+
5
7
  # Request's response Data object.
6
8
  #
7
9
  # status - HTTP status code. It is zero when a request fails due to network error.
8
10
  # body - Parsed JSON object or response body.
9
11
  # headers - HTTP response headers.
10
12
  # error - Exception or Net::HTTPResponse object if the response is not Net::HTTPSuccess
11
- Response = Data.define(:status, :body, :headers, :error)
13
+ #
14
+ # NOTE: Not supported yet by Ruby Next
15
+ # Response = Data.define(:status, :body, :headers, :error)
16
+ Response = Struct.new("Response", :status, :body, :headers, :error)
12
17
 
13
18
  # Request module provides functionality to perform HTTP requests.
14
19
  module Request
@@ -77,14 +82,14 @@ module Chroma
77
82
 
78
83
  private def build_response_details(response, exception: false, parse_body: true)
79
84
  response_data = Chroma::APIOperations::Response.new(
80
- status: exception ? 0 : response.code.to_i,
81
- body: if exception
82
- exception.to_s
83
- else
84
- (parse_body ? body_to_json(response.body) : response.body)
85
- end,
86
- headers: exception ? {} : response.each_header.to_h,
87
- error: response.is_a?(Net::HTTPSuccess) ? nil : response
85
+ exception ? 0 : response.code.to_i,
86
+ if exception
87
+ exception.to_s
88
+ else
89
+ (parse_body ? body_to_json(response.body) : response.body)
90
+ end,
91
+ exception ? {} : response.each_header.to_h,
92
+ response.is_a?(Net::HTTPSuccess) ? nil : response
88
93
  )
89
94
 
90
95
  case response
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Chroma
4
+ using RubyNext
4
5
  module Resources
5
6
  # A Collection class represents a store for your embeddings, documents, and any additional metadata.
6
7
  # This class can be instantiated by receiving the collection's name and metadata hash.
@@ -4,6 +4,8 @@ module Chroma
4
4
  module Resources
5
5
  # The Database class provides methods for interacting with the Chroma database server.
6
6
  class Database
7
+ using RubyNext
8
+
7
9
  include Chroma::APIOperations::Request
8
10
  # Get the version of the Chroma database server.
9
11
  #
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Chroma
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/chroma-db.rb CHANGED
@@ -6,13 +6,17 @@ require "uri"
6
6
  require "json"
7
7
  require "logger"
8
8
  require "forwardable"
9
+ require "ruby-next"
10
+ require "ruby-next/language/setup"
9
11
 
10
- require_relative "chroma/version"
11
- require_relative "chroma/util"
12
- require_relative "chroma/chroma_configuration"
13
- require_relative "chroma/chroma"
14
- require_relative "chroma/api_operations/request"
15
- require_relative "chroma/errors"
16
- require_relative "chroma/resources/embedding"
17
- require_relative "chroma/resources/collection"
18
- require_relative "chroma/resources/database"
12
+ RubyNext::Language.setup_gem_load_path(transpile: true)
13
+
14
+ require "chroma/version"
15
+ require "chroma/util"
16
+ require "chroma/chroma_configuration"
17
+ require "chroma/chroma"
18
+ require "chroma/api_operations/request"
19
+ require "chroma/errors"
20
+ require "chroma/resources/embedding"
21
+ require "chroma/resources/collection"
22
+ require "chroma/resources/database"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chroma-db
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mario Alberto Chávez
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-12 00:00:00.000000000 Z
11
+ date: 2023-05-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dry-monads
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-next-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.15.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.15.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-next
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 0.15.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 0.15.0
27
55
  description: Chroma is the open-source embedding database. Chroma makes it easy to
28
56
  build LLM apps by making knowledge, facts, and skills pluggable for LLMs.
29
57
  email:
@@ -32,6 +60,8 @@ executables: []
32
60
  extensions: []
33
61
  extra_rdoc_files: []
34
62
  files:
63
+ - ".rbnextrc"
64
+ - ".ruby-version"
35
65
  - CHANGELOG.md
36
66
  - CODE_OF_CONDUCT.md
37
67
  - Gemfile
@@ -39,6 +69,11 @@ files:
39
69
  - LICENSE.txt
40
70
  - README.md
41
71
  - Rakefile
72
+ - lib/.rbnext/2.7/chroma/api_operations/request.rb
73
+ - lib/.rbnext/2.7/chroma/resources/collection.rb
74
+ - lib/.rbnext/2.7/chroma/resources/database.rb
75
+ - lib/.rbnext/3.1/chroma/api_operations/request.rb
76
+ - lib/.rbnext/3.1/chroma/resources/collection.rb
42
77
  - lib/chroma-db.rb
43
78
  - lib/chroma/api_operations/request.rb
44
79
  - lib/chroma/chroma.rb
@@ -49,8 +84,6 @@ files:
49
84
  - lib/chroma/resources/embedding.rb
50
85
  - lib/chroma/util.rb
51
86
  - lib/chroma/version.rb
52
- - notebook/Chroma Gem.ipynb
53
- - notebook/ruby.txt
54
87
  - sig/chroma.rbs
55
88
  homepage: https://mariochavez.io
56
89
  licenses:
@@ -67,7 +100,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
67
100
  requirements:
68
101
  - - ">="
69
102
  - !ruby/object:Gem::Version
70
- version: 3.1.0
103
+ version: 2.7.8
71
104
  required_rubygems_version: !ruby/object:Gem::Requirement
72
105
  requirements:
73
106
  - - ">="
@@ -77,5 +110,5 @@ requirements: []
77
110
  rubygems_version: 3.4.12
78
111
  signing_key:
79
112
  specification_version: 4
80
- summary: Ruby bindings for Chroma DB.
113
+ summary: Ruby client for Chroma DB.
81
114
  test_files: []