chroma-db 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,384 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Resources
5
+ # A Collection class represents a store for your embeddings, documents, and any additional metadata.
6
+ # This class can be instantiated by receiving the collection's name and metadata hash.
7
+ class Collection
8
+ include Chroma::APIOperations::Request
9
+
10
+ attr_reader :name
11
+ attr_reader :metadata
12
+
13
+ def initialize(name:, metadata: nil)
14
+ @name = name
15
+ @metadata = metadata
16
+ end
17
+
18
+ # Query the collection and return an array of embeddings.
19
+ #
20
+ # query_embeddings - An array of the embeddings to use for querying the collection.
21
+ # results - The maximum number of results to return. 10 by default.
22
+ # where - A Hash of additional conditions to filter the query results (optional).
23
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
24
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
25
+ # documents, and distances by default.
26
+ #
27
+ # Examples
28
+ #
29
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
30
+ # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
31
+ #
32
+ # Return an Array of Embedding with query results.
33
+ def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
34
+ payload = {
35
+ query_embeddings:,
36
+ n_results: results,
37
+ where:,
38
+ where_document:,
39
+ include:
40
+ }
41
+
42
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/query", payload)
43
+
44
+ if result.success?
45
+ build_embeddings_response(result.success.body)
46
+ else
47
+ raise_failure_error(result)
48
+ end
49
+ end
50
+
51
+ # Get embeddings from the collection.
52
+ #
53
+ # ids - An Array of the specific embedding IDs to retrieve (optional).
54
+ # where - A Hash of additional conditions to filter the query results (optional).
55
+ # sort - The sorting criteria for the query results (optional).
56
+ # limit - The maximum number of embeddings to retrieve (optional).
57
+ # offset - The offset for pagination (optional).
58
+ # page - The page number for pagination (optional).
59
+ # page_size - The page size for pagination (optional).
60
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
61
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
62
+ # and documents by default.
63
+ #
64
+ # Examples
65
+ #
66
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
67
+ # embeddings = collection.get([Array#sort, "Array#each"])
68
+ #
69
+ # Returns an Array of Embeddings
70
+ def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
71
+ if !page.nil? && !page_size.nil?
72
+ offset = (page - 1) * page_size
73
+ limit = page_size
74
+ end
75
+
76
+ payload = {
77
+ ids:,
78
+ where:,
79
+ sort:,
80
+ limit:,
81
+ offset:,
82
+ where_document:,
83
+ include:
84
+ }
85
+
86
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/get", payload)
87
+
88
+ if result.success?
89
+ build_embeddings_response(result.success.body)
90
+ else
91
+ raise_failure_error(result)
92
+ end
93
+ end
94
+
95
+ # Add one or many embeddings to the collection.
96
+ #
97
+ # embeddings - An Array of Embeddings or one Embedding to add.
98
+ #
99
+ # Examples
100
+ #
101
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
102
+ # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
103
+ #
104
+ # Returns true with success or raises a Chroma::Error on failure.
105
+ def add(embeddings = [])
106
+ embeddings_array = Array(embeddings)
107
+ return false if embeddings_array.size == 0
108
+
109
+ payload = build_embeddings_payload(embeddings_array)
110
+
111
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/add", payload)
112
+
113
+ return true if result.success?
114
+
115
+ raise_failure_error(result)
116
+ end
117
+
118
+ # Delete embeddings from the collection.
119
+ #
120
+ # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
121
+ # where [Hash] Additional conditions to filter the embeddings to delete (optional).
122
+ # where_document [Hash] Additional conditions to filter the associated documents (optional).
123
+ #
124
+ # Examples
125
+ #
126
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
127
+ # collection.delete(["Array#fetch", "Array#sort"])
128
+ #
129
+ # Returns an Array of deleted global ids.
130
+ def delete(ids: nil, where: {}, where_document: {})
131
+ payload = {
132
+ ids:,
133
+ where:,
134
+ where_document:
135
+ }
136
+
137
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/delete", payload)
138
+
139
+ return result.success.body if result.success?
140
+
141
+ raise_failure_error(result)
142
+ end
143
+
144
+ # Update one or many embeddings to the collection.
145
+ #
146
+ # embeddings - An Array of Embeddings or one Embedding to add.
147
+ #
148
+ # Examples
149
+ #
150
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
151
+ # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
152
+ #
153
+ # Returns true with success or raises a Chroma::Error on failure.
154
+ def update(embeddings = [])
155
+ embeddings_array = Array(embeddings)
156
+ return false if embeddings_array.size == 0
157
+
158
+ payload = build_embeddings_payload(embeddings_array)
159
+ payload.delete(:increment_index)
160
+
161
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/update", payload)
162
+
163
+ return true if result.success?
164
+
165
+ raise_failure_error(result)
166
+ end
167
+
168
+ # Upsert (insert or update) one or many embeddings to the collection.
169
+ #
170
+ # embeddings - An Array of Embeddings or one Embedding to add.
171
+ #
172
+ # Examples
173
+ #
174
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
175
+ # embeddings = [
176
+ # Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}),
177
+ # Embedding.new(id: "Array#select", embeddings[5.6, 3.1, 4.7], metadata: {url: "https://..."})
178
+ # ]
179
+ # collection.upsert()
180
+ #
181
+ # Returns true with success or raises a Chroma::Error on failure.
182
+ def upsert(embeddings = [])
183
+ embeddings_array = Array(embeddings)
184
+ return false if embeddings_array.size == 0
185
+
186
+ payload = build_embeddings_payload(embeddings_array)
187
+
188
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/upsert", payload)
189
+
190
+ return true if result.success?
191
+
192
+ raise_failure_error(result)
193
+ end
194
+
195
+ # Count the number of embeddings in a collection.
196
+ #
197
+ # Examples
198
+ #
199
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
200
+ # collection.count
201
+ #
202
+ # Returns the count of embeddings in the collection.
203
+ def count
204
+ result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{name}/count")
205
+
206
+ return result.success.body if result.success?
207
+
208
+ raise_failure_error(result)
209
+ end
210
+
211
+ # Modify the name and metadata of the current collection.
212
+ #
213
+ # new_name - The new name for the collection.
214
+ # new_metadata - The new metadata hash for the collection.
215
+ #
216
+ # Examples:
217
+ #
218
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
219
+ # collection.modify("ruby-3.2-documentation")
220
+ #
221
+ # Returns nothing.
222
+ def modify(new_name, new_metadata: {})
223
+ payload = {new_name:}
224
+ payload[:new_metadata] = new_metadata if new_metadata.any?
225
+
226
+ result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{name}", payload)
227
+
228
+ if result.success?
229
+ @name = new_name
230
+ @metadata = new_metadata
231
+ else
232
+ raise_failure_error(result)
233
+ end
234
+ end
235
+
236
+ # Creates an index for the collection.
237
+ #
238
+ # Examples:
239
+ #
240
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
241
+ # collection.create_index
242
+ #
243
+ # Returns true on success or raise a Chroma::Error on failure.
244
+ def create_index
245
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/create_index")
246
+
247
+ return true if result.success?
248
+
249
+ raise_failure_error(result)
250
+ end
251
+
252
+ # Create a new collection on the database.
253
+ #
254
+ # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
255
+ # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
256
+ # contains no two consecutive periods
257
+ # metadata - A hash of additional metadata associated with the collection.
258
+ #
259
+ # Examples
260
+ #
261
+ # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
262
+ #
263
+ # Returns the created collection object.
264
+ def self.create(name, metadata = nil)
265
+ payload = {name:, metadata:, get_or_create: false}
266
+
267
+ result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
268
+
269
+ if result.success?
270
+ data = result.success.body
271
+ new(name: data["name"], metadata: data["metadata"])
272
+ else
273
+ raise_failure_error(result)
274
+ end
275
+ end
276
+
277
+ # Retrieves a collection from the database.
278
+ #
279
+ # name - The name of the collection to retrieve.
280
+ #
281
+ # Examples
282
+ #
283
+ # collection = Chroma::Resources::Colection.get("ruby-documentation")
284
+ #
285
+ # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
286
+ def self.get(name)
287
+ result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
288
+
289
+ if result.success?
290
+ data = result.success.body
291
+ new(name: data["name"], metadata: data["metadata"])
292
+ else
293
+ raise_failure_error(result)
294
+ end
295
+ end
296
+
297
+ # Retrieves all collections in the database.
298
+ #
299
+ # Examples
300
+ #
301
+ # collections = Chroma::Resources::Collection.list
302
+ #
303
+ # Returns An array of all collections in the database.
304
+ def self.list
305
+ result = execute_request(:get, "#{Chroma.api_url}/collections")
306
+
307
+ if result.success?
308
+ data = result.success.body
309
+ data.map { |item| new(name: item["name"], metadata: item["metadata"]) }
310
+ else
311
+ raise_failure_error(result)
312
+ end
313
+ end
314
+
315
+ # Deletes a collection from the database.
316
+ #
317
+ # name - The name of the collection to delete.
318
+ #
319
+ # Examples
320
+ #
321
+ # Chroma::Resources::Collection.delete("ruby-documentation")
322
+ #
323
+ # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
324
+ def self.delete(name)
325
+ result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
326
+
327
+ return true if result.success?
328
+
329
+ raise_failure_error(result)
330
+ end
331
+
332
+ def self.raise_failure_error(result)
333
+ case result.failure.error
334
+ in Exception => exception
335
+ raise Chroma::APIConnectionError.new(exception.message)
336
+ in Net::HTTPInternalServerError => response
337
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
338
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
339
+ else
340
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
341
+ end
342
+ else
343
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
344
+ end
345
+ end
346
+ private_class_method :raise_failure_error
347
+
348
+ private
349
+
350
+ def build_embeddings_payload(embeddings, increment_index = true)
351
+ payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
352
+
353
+ embeddings.each do |embedding|
354
+ payload[:ids] << embedding.id
355
+ payload[:embeddings] << embedding.embedding
356
+ payload[:metadatas] << embedding.metadata
357
+ payload[:documents] << embedding.document
358
+ end
359
+
360
+ payload
361
+ end
362
+
363
+ def build_embeddings_response(result)
364
+ Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
365
+
366
+ result_ids = result.fetch("ids", []).flatten
367
+ result_embeddings = (result.dig("embeddings") || []).flatten
368
+ result_documents = (result.dig("documents") || []).flatten
369
+ result_metadatas = (result.dig("metadatas") || []).flatten
370
+ result_distances = (result.dig("distances") || []).flatten
371
+
372
+ Chroma::Util.log_debug("Ids #{result_ids.inspect}")
373
+ Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
374
+ Chroma::Util.log_debug("Documents #{result_documents.inspect}")
375
+ Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
376
+ Chroma::Util.log_debug("distances #{result_distances.inspect}")
377
+
378
+ result_ids.map.with_index do |id, index|
379
+ Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
380
+ end
381
+ end
382
+ end
383
+ end
384
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Resources
5
+ # The Database class provides methods for interacting with the Chroma database server.
6
+ class Database
7
+ include Chroma::APIOperations::Request
8
+ # Get the version of the Chroma database server.
9
+ #
10
+ # Returns the version of the Chroma database server.
11
+ def self.version
12
+ result = execute_request(:get, "#{Chroma.api_url}/version")
13
+
14
+ return result.success.body if result.success?
15
+
16
+ raise_failure_error(result)
17
+ end
18
+
19
+ # Reset the Chroma database server. This can't be undone.
20
+ #
21
+ # Returns true on success or raise a Chroma::Error on failure.
22
+ def self.reset
23
+ result = execute_request(:post, "#{Chroma.api_url}/reset")
24
+
25
+ return result.success.body if result.success?
26
+
27
+ raise_failure_error(result)
28
+ end
29
+
30
+ # Persist Chroma database data.
31
+ #
32
+ # Resturn true on success or raise a Chroma::Error on failure.
33
+ def self.persist
34
+ result = execute_request(:post, "#{Chroma.api_url}/persist")
35
+
36
+ return result.success.body if result.success?
37
+
38
+ raise_failure_error(result)
39
+ end
40
+
41
+ # Check the heartbeat of the Chroma database server.
42
+ #
43
+ # Return a Hash with a timestamp.
44
+ def self.heartbeat
45
+ result = execute_request(:get, "#{Chroma.api_url}/heartbeat")
46
+
47
+ return result.success.body if result.success?
48
+
49
+ raise_failure_error(result)
50
+ end
51
+
52
+ def self.raise_failure_error(result)
53
+ case result.failure.error
54
+ in Exception => exception
55
+ raise Chroma::APIConnectionError.new(exception.message)
56
+ in Net::HTTPInternalServerError => response
57
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
58
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
59
+ else
60
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
61
+ end
62
+ else
63
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
64
+ end
65
+ end
66
+ private_class_method :raise_failure_error
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Resources
5
+ # A Embedding class represents an embedding by its Id, metadata, and document.
6
+ # This class is used by Collection class.
7
+ class Embedding
8
+ attr_reader :id
9
+ attr_reader :embedding
10
+ attr_reader :metadata
11
+ attr_reader :document
12
+ attr_reader :distance
13
+
14
+ def initialize(id:, embedding: nil, metadata: nil, document: nil, distance: nil)
15
+ @id = id
16
+ @embedding = embedding
17
+ @metadata = metadata
18
+ @document = document
19
+ @distance = distance
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ # The Util module provides utility methods for logging messages and data at different levels of severity.
5
+ module Util
6
+ # Logs an error message with the given data using the provided Logger instance.
7
+ #
8
+ # message - A String message to be logged.
9
+ # data - A Hash of additional data to be included in the log entry.
10
+ #
11
+ # Examples
12
+ #
13
+ # Util.log_error("An error occurred", { user_id: 123, error_code: "404" })
14
+ #
15
+ # Returns nothing.
16
+ def self.log_error(message, data = {})
17
+ config = data.delete(:config) || Chroma.config
18
+ logger = config.logger || Chroma.logger
19
+
20
+ if (!logger.nil? || !config.log_level.nil?) && config.log_level <= Chroma::LEVEL_ERROR
21
+ log_internal(message, data, level: Chroma::LEVEL_ERROR, logger: Chroma.logger)
22
+ end
23
+ end
24
+
25
+ # Logs a debug message with the given data using the provided Logger instance.
26
+ #
27
+ # message - A String message to be logged.
28
+ # data - A Hash of additional data to be included in the log entry.
29
+ #
30
+ # Examples
31
+ #
32
+ # Util.log_debug("Debugging information", { user_id: 123, action: "update" })
33
+ #
34
+ # Returns nothing.
35
+ def self.log_debug(message, data = {})
36
+ config = data.delete(:config) || Chroma.config
37
+ logger = config.logger || Chroma.logger
38
+
39
+ if (!logger.nil? || !config.log_level.nil?) && config.log_level <= Chroma::LEVEL_DEBUG
40
+ log_internal(message, data, level: Chroma::LEVEL_DEBUG, logger: Chroma.logger)
41
+ end
42
+ end
43
+
44
+ # Logs an informational message with the given data using the provided Logger instance.
45
+ #
46
+ # message - A String message to be logged.
47
+ # data - A Hash of additional data to be included in the log entry.
48
+ #
49
+ # Examples
50
+ #
51
+ # Util.log_info("Processing request", { request_id: "abc123", route: "/users" })
52
+ #
53
+ # Returns nothing.
54
+ def self.log_info(message, data = {})
55
+ config = data.delete(:config) || Chroma.config
56
+ logger = config.logger || Chroma.logger
57
+
58
+ if (!logger.nil? || !config.log_level.nil?) && config.log_level <= Chroma::LEVEL_INFO
59
+ log_internal(message, data, level: Chroma::LEVEL_INFO, logger: Chroma.logger)
60
+ end
61
+ end
62
+
63
+ def self.log_internal(message, data = {}, level:, logger:)
64
+ data_str = data.reject { |_k, v| v.nil? }.map { |(k, v)| "#{k}=#{v}" }.join(" ")
65
+
66
+ logger&.log(level, "message=#{message} #{data_str}".strip)
67
+ end
68
+ private_class_method :log_internal
69
+ end
70
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Chroma
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/chroma-db.rb ADDED
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dry-monads"
4
+ require "net/http"
5
+ require "uri"
6
+ require "json"
7
+ require "logger"
8
+ require "forwardable"
9
+
10
+ require_relative "chroma/version"
11
+ require_relative "chroma/util"
12
+ require_relative "chroma/chroma_configuration"
13
+ require_relative "chroma/chroma"
14
+ require_relative "chroma/api_operations/request"
15
+ require_relative "chroma/errors"
16
+ require_relative "chroma/resources/embedding"
17
+ require_relative "chroma/resources/collection"
18
+ require_relative "chroma/resources/database"