chroma-db 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,384 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Resources
5
+ # A Collection class represents a store for your embeddings, documents, and any additional metadata.
6
+ # This class can be instantiated by receiving the collection's name and metadata hash.
7
+ class Collection
8
+ include Chroma::APIOperations::Request
9
+
10
+ attr_reader :name
11
+ attr_reader :metadata
12
+
13
+ def initialize(name:, metadata: nil)
14
+ @name = name
15
+ @metadata = metadata
16
+ end
17
+
18
+ # Query the collection and return an array of embeddings.
19
+ #
20
+ # query_embeddings - An array of the embeddings to use for querying the collection.
21
+ # results - The maximum number of results to return. 10 by default.
22
+ # where - A Hash of additional conditions to filter the query results (optional).
23
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
24
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
25
+ # documents, and distances by default.
26
+ #
27
+ # Examples
28
+ #
29
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
30
+ # embeddings = collection.query(query_embeddings: [[1.5, 2.9, 3.3]], results: 5)
31
+ #
32
+ # Return an Array of Embedding with query results.
33
+ def query(query_embeddings:, results: 10, where: {}, where_document: {}, include: %w[metadatas documents distances])
34
+ payload = {
35
+ query_embeddings:,
36
+ n_results: results,
37
+ where:,
38
+ where_document:,
39
+ include:
40
+ }
41
+
42
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/query", payload)
43
+
44
+ if result.success?
45
+ build_embeddings_response(result.success.body)
46
+ else
47
+ raise_failure_error(result)
48
+ end
49
+ end
50
+
51
+ # Get embeddings from the collection.
52
+ #
53
+ # ids - An Array of the specific embedding IDs to retrieve (optional).
54
+ # where - A Hash of additional conditions to filter the query results (optional).
55
+ # sort - The sorting criteria for the query results (optional).
56
+ # limit - The maximum number of embeddings to retrieve (optional).
57
+ # offset - The offset for pagination (optional).
58
+ # page - The page number for pagination (optional).
59
+ # page_size - The page size for pagination (optional).
60
+ # where_document - A Hash of additional conditions to filter the associated documents (optional).
61
+ # include - An Array of the additional information to include in the query results (optional). Metadatas,
62
+ # and documents by default.
63
+ #
64
+ # Examples
65
+ #
66
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
67
+ # embeddings = collection.get([Array#sort, "Array#each"])
68
+ #
69
+ # Returns an Array of Embeddings
70
+ def get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[metadatas documents])
71
+ if !page.nil? && !page_size.nil?
72
+ offset = (page - 1) * page_size
73
+ limit = page_size
74
+ end
75
+
76
+ payload = {
77
+ ids:,
78
+ where:,
79
+ sort:,
80
+ limit:,
81
+ offset:,
82
+ where_document:,
83
+ include:
84
+ }
85
+
86
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/get", payload)
87
+
88
+ if result.success?
89
+ build_embeddings_response(result.success.body)
90
+ else
91
+ raise_failure_error(result)
92
+ end
93
+ end
94
+
95
+ # Add one or many embeddings to the collection.
96
+ #
97
+ # embeddings - An Array of Embeddings or one Embedding to add.
98
+ #
99
+ # Examples
100
+ #
101
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
102
+ # collection.add(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
103
+ #
104
+ # Returns true with success or raises a Chroma::Error on failure.
105
+ def add(embeddings = [])
106
+ embeddings_array = Array(embeddings)
107
+ return false if embeddings_array.size == 0
108
+
109
+ payload = build_embeddings_payload(embeddings_array)
110
+
111
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/add", payload)
112
+
113
+ return true if result.success?
114
+
115
+ raise_failure_error(result)
116
+ end
117
+
118
+ # Delete embeddings from the collection.
119
+ #
120
+ # ids [Array<Integer>, nil] The specific embedding IDs to delete (optional).
121
+ # where [Hash] Additional conditions to filter the embeddings to delete (optional).
122
+ # where_document [Hash] Additional conditions to filter the associated documents (optional).
123
+ #
124
+ # Examples
125
+ #
126
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
127
+ # collection.delete(["Array#fetch", "Array#sort"])
128
+ #
129
+ # Returns an Array of deleted global ids.
130
+ def delete(ids: nil, where: {}, where_document: {})
131
+ payload = {
132
+ ids:,
133
+ where:,
134
+ where_document:
135
+ }
136
+
137
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/delete", payload)
138
+
139
+ return result.success.body if result.success?
140
+
141
+ raise_failure_error(result)
142
+ end
143
+
144
+ # Update one or many embeddings to the collection.
145
+ #
146
+ # embeddings - An Array of Embeddings or one Embedding to add.
147
+ #
148
+ # Examples
149
+ #
150
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
151
+ # collection.update(Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}))
152
+ #
153
+ # Returns true with success or raises a Chroma::Error on failure.
154
+ def update(embeddings = [])
155
+ embeddings_array = Array(embeddings)
156
+ return false if embeddings_array.size == 0
157
+
158
+ payload = build_embeddings_payload(embeddings_array)
159
+ payload.delete(:increment_index)
160
+
161
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/update", payload)
162
+
163
+ return true if result.success?
164
+
165
+ raise_failure_error(result)
166
+ end
167
+
168
+ # Upsert (insert or update) one or many embeddings to the collection.
169
+ #
170
+ # embeddings - An Array of Embeddings or one Embedding to add.
171
+ #
172
+ # Examples
173
+ #
174
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
175
+ # embeddings = [
176
+ # Embedding.new(id: "Array#fetch", embeddings[9.8, 2.3, 2.9], metadata: {url: "https://..."}),
177
+ # Embedding.new(id: "Array#select", embeddings[5.6, 3.1, 4.7], metadata: {url: "https://..."})
178
+ # ]
179
+ # collection.upsert()
180
+ #
181
+ # Returns true with success or raises a Chroma::Error on failure.
182
+ def upsert(embeddings = [])
183
+ embeddings_array = Array(embeddings)
184
+ return false if embeddings_array.size == 0
185
+
186
+ payload = build_embeddings_payload(embeddings_array)
187
+
188
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/upsert", payload)
189
+
190
+ return true if result.success?
191
+
192
+ raise_failure_error(result)
193
+ end
194
+
195
+ # Count the number of embeddings in a collection.
196
+ #
197
+ # Examples
198
+ #
199
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
200
+ # collection.count
201
+ #
202
+ # Returns the count of embeddings in the collection.
203
+ def count
204
+ result = self.class.execute_request(:get, "#{Chroma.api_url}/collections/#{name}/count")
205
+
206
+ return result.success.body if result.success?
207
+
208
+ raise_failure_error(result)
209
+ end
210
+
211
+ # Modify the name and metadata of the current collection.
212
+ #
213
+ # new_name - The new name for the collection.
214
+ # new_metadata - The new metadata hash for the collection.
215
+ #
216
+ # Examples:
217
+ #
218
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
219
+ # collection.modify("ruby-3.2-documentation")
220
+ #
221
+ # Returns nothing.
222
+ def modify(new_name, new_metadata: {})
223
+ payload = {new_name:}
224
+ payload[:new_metadata] = new_metadata if new_metadata.any?
225
+
226
+ result = self.class.execute_request(:put, "#{Chroma.api_url}/collections/#{name}", payload)
227
+
228
+ if result.success?
229
+ @name = new_name
230
+ @metadata = new_metadata
231
+ else
232
+ raise_failure_error(result)
233
+ end
234
+ end
235
+
236
+ # Creates an index for the collection.
237
+ #
238
+ # Examples:
239
+ #
240
+ # collection = Chroma::Resource::Collection.get("ruby-documentation")
241
+ # collection.create_index
242
+ #
243
+ # Returns true on success or raise a Chroma::Error on failure.
244
+ def create_index
245
+ result = self.class.execute_request(:post, "#{Chroma.api_url}/collections/#{name}/create_index")
246
+
247
+ return true if result.success?
248
+
249
+ raise_failure_error(result)
250
+ end
251
+
252
+ # Create a new collection on the database.
253
+ #
254
+ # name - The name of the collection. Name needs to be between 3-63 characters, starts and ends
255
+ # with an alphanumeric character, contains only alphanumeric characters, underscores or hyphens (-), and
256
+ # contains no two consecutive periods
257
+ # metadata - A hash of additional metadata associated with the collection.
258
+ #
259
+ # Examples
260
+ #
261
+ # collection = Chorma::Resources::Collection.create("ruby-documentation", {source: "Ruby lang website"})
262
+ #
263
+ # Returns the created collection object.
264
+ def self.create(name, metadata = nil)
265
+ payload = {name:, metadata:, get_or_create: false}
266
+
267
+ result = execute_request(:post, "#{Chroma.api_url}/collections", payload)
268
+
269
+ if result.success?
270
+ data = result.success.body
271
+ new(name: data["name"], metadata: data["metadata"])
272
+ else
273
+ raise_failure_error(result)
274
+ end
275
+ end
276
+
277
+ # Retrieves a collection from the database.
278
+ #
279
+ # name - The name of the collection to retrieve.
280
+ #
281
+ # Examples
282
+ #
283
+ # collection = Chroma::Resources::Colection.get("ruby-documentation")
284
+ #
285
+ # Returns The retrieved collection object. Raises Chroma::InvalidRequestError if not found.
286
+ def self.get(name)
287
+ result = execute_request(:get, "#{Chroma.api_url}/collections/#{name}")
288
+
289
+ if result.success?
290
+ data = result.success.body
291
+ new(name: data["name"], metadata: data["metadata"])
292
+ else
293
+ raise_failure_error(result)
294
+ end
295
+ end
296
+
297
+ # Retrieves all collections in the database.
298
+ #
299
+ # Examples
300
+ #
301
+ # collections = Chroma::Resources::Collection.list
302
+ #
303
+ # Returns An array of all collections in the database.
304
+ def self.list
305
+ result = execute_request(:get, "#{Chroma.api_url}/collections")
306
+
307
+ if result.success?
308
+ data = result.success.body
309
+ data.map { |item| new(name: item["name"], metadata: item["metadata"]) }
310
+ else
311
+ raise_failure_error(result)
312
+ end
313
+ end
314
+
315
+ # Deletes a collection from the database.
316
+ #
317
+ # name - The name of the collection to delete.
318
+ #
319
+ # Examples
320
+ #
321
+ # Chroma::Resources::Collection.delete("ruby-documentation")
322
+ #
323
+ # Returns true if the collection was successfully deleted, raise Chroma::InvalidRequestError otherwise.
324
+ def self.delete(name)
325
+ result = execute_request(:delete, "#{Chroma.api_url}/collections/#{name}")
326
+
327
+ return true if result.success?
328
+
329
+ raise_failure_error(result)
330
+ end
331
+
332
+ def self.raise_failure_error(result)
333
+ case result.failure.error
334
+ in Exception => exception
335
+ raise Chroma::APIConnectionError.new(exception.message)
336
+ in Net::HTTPInternalServerError => response
337
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
338
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
339
+ else
340
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
341
+ end
342
+ else
343
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
344
+ end
345
+ end
346
+ private_class_method :raise_failure_error
347
+
348
+ private
349
+
350
+ def build_embeddings_payload(embeddings, increment_index = true)
351
+ payload = {ids: [], embeddings: [], metadatas: [], documents: [], increment_index: increment_index}
352
+
353
+ embeddings.each do |embedding|
354
+ payload[:ids] << embedding.id
355
+ payload[:embeddings] << embedding.embedding
356
+ payload[:metadatas] << embedding.metadata
357
+ payload[:documents] << embedding.document
358
+ end
359
+
360
+ payload
361
+ end
362
+
363
+ def build_embeddings_response(result)
364
+ Chroma::Util.log_debug("Building embeddings from #{result.inspect}")
365
+
366
+ result_ids = result.fetch("ids", []).flatten
367
+ result_embeddings = (result.dig("embeddings") || []).flatten
368
+ result_documents = (result.dig("documents") || []).flatten
369
+ result_metadatas = (result.dig("metadatas") || []).flatten
370
+ result_distances = (result.dig("distances") || []).flatten
371
+
372
+ Chroma::Util.log_debug("Ids #{result_ids.inspect}")
373
+ Chroma::Util.log_debug("Embeddings #{result_embeddings.inspect}")
374
+ Chroma::Util.log_debug("Documents #{result_documents.inspect}")
375
+ Chroma::Util.log_debug("Metadatas #{result_metadatas.inspect}")
376
+ Chroma::Util.log_debug("distances #{result_distances.inspect}")
377
+
378
+ result_ids.map.with_index do |id, index|
379
+ Chroma::Resources::Embedding.new(id: id, embedding: result_embeddings[index], document: result_documents[index], metadata: result_metadatas[index], distance: result_distances[index])
380
+ end
381
+ end
382
+ end
383
+ end
384
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Resources
5
+ # The Database class provides methods for interacting with the Chroma database server.
6
+ class Database
7
+ include Chroma::APIOperations::Request
8
+ # Get the version of the Chroma database server.
9
+ #
10
+ # Returns the version of the Chroma database server.
11
+ def self.version
12
+ result = execute_request(:get, "#{Chroma.api_url}/version")
13
+
14
+ return result.success.body if result.success?
15
+
16
+ raise_failure_error(result)
17
+ end
18
+
19
+ # Reset the Chroma database server. This can't be undone.
20
+ #
21
+ # Returns true on success or raise a Chroma::Error on failure.
22
+ def self.reset
23
+ result = execute_request(:post, "#{Chroma.api_url}/reset")
24
+
25
+ return result.success.body if result.success?
26
+
27
+ raise_failure_error(result)
28
+ end
29
+
30
+ # Persist Chroma database data.
31
+ #
32
+ # Resturn true on success or raise a Chroma::Error on failure.
33
+ def self.persist
34
+ result = execute_request(:post, "#{Chroma.api_url}/persist")
35
+
36
+ return result.success.body if result.success?
37
+
38
+ raise_failure_error(result)
39
+ end
40
+
41
+ # Check the heartbeat of the Chroma database server.
42
+ #
43
+ # Return a Hash with a timestamp.
44
+ def self.heartbeat
45
+ result = execute_request(:get, "#{Chroma.api_url}/heartbeat")
46
+
47
+ return result.success.body if result.success?
48
+
49
+ raise_failure_error(result)
50
+ end
51
+
52
+ def self.raise_failure_error(result)
53
+ case result.failure.error
54
+ in Exception => exception
55
+ raise Chroma::APIConnectionError.new(exception.message)
56
+ in Net::HTTPInternalServerError => response
57
+ if response.body.is_a?(String) && (response.body.include?("ValueError") || response.body.include?("IndexError") || response.body.include?("TypeError"))
58
+ raise Chroma::InvalidRequestError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
59
+ else
60
+ raise Chroma::APIConnectionError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
61
+ end
62
+ else
63
+ raise Chroma::APIError.new(result.failure.body, status: result.failure.status, body: result.failure.body)
64
+ end
65
+ end
66
+ private_class_method :raise_failure_error
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Resources
5
+ # A Embedding class represents an embedding by its Id, metadata, and document.
6
+ # This class is used by Collection class.
7
+ class Embedding
8
+ attr_reader :id
9
+ attr_reader :embedding
10
+ attr_reader :metadata
11
+ attr_reader :document
12
+ attr_reader :distance
13
+
14
+ def initialize(id:, embedding: nil, metadata: nil, document: nil, distance: nil)
15
+ @id = id
16
+ @embedding = embedding
17
+ @metadata = metadata
18
+ @document = document
19
+ @distance = distance
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ # The Util module provides utility methods for logging messages and data at different levels of severity.
5
+ module Util
6
+ # Logs an error message with the given data using the provided Logger instance.
7
+ #
8
+ # message - A String message to be logged.
9
+ # data - A Hash of additional data to be included in the log entry.
10
+ #
11
+ # Examples
12
+ #
13
+ # Util.log_error("An error occurred", { user_id: 123, error_code: "404" })
14
+ #
15
+ # Returns nothing.
16
+ def self.log_error(message, data = {})
17
+ config = data.delete(:config) || Chroma.config
18
+ logger = config.logger || Chroma.logger
19
+
20
+ if (!logger.nil? || !config.log_level.nil?) && config.log_level <= Chroma::LEVEL_ERROR
21
+ log_internal(message, data, level: Chroma::LEVEL_ERROR, logger: Chroma.logger)
22
+ end
23
+ end
24
+
25
+ # Logs a debug message with the given data using the provided Logger instance.
26
+ #
27
+ # message - A String message to be logged.
28
+ # data - A Hash of additional data to be included in the log entry.
29
+ #
30
+ # Examples
31
+ #
32
+ # Util.log_debug("Debugging information", { user_id: 123, action: "update" })
33
+ #
34
+ # Returns nothing.
35
+ def self.log_debug(message, data = {})
36
+ config = data.delete(:config) || Chroma.config
37
+ logger = config.logger || Chroma.logger
38
+
39
+ if (!logger.nil? || !config.log_level.nil?) && config.log_level <= Chroma::LEVEL_DEBUG
40
+ log_internal(message, data, level: Chroma::LEVEL_DEBUG, logger: Chroma.logger)
41
+ end
42
+ end
43
+
44
+ # Logs an informational message with the given data using the provided Logger instance.
45
+ #
46
+ # message - A String message to be logged.
47
+ # data - A Hash of additional data to be included in the log entry.
48
+ #
49
+ # Examples
50
+ #
51
+ # Util.log_info("Processing request", { request_id: "abc123", route: "/users" })
52
+ #
53
+ # Returns nothing.
54
+ def self.log_info(message, data = {})
55
+ config = data.delete(:config) || Chroma.config
56
+ logger = config.logger || Chroma.logger
57
+
58
+ if (!logger.nil? || !config.log_level.nil?) && config.log_level <= Chroma::LEVEL_INFO
59
+ log_internal(message, data, level: Chroma::LEVEL_INFO, logger: Chroma.logger)
60
+ end
61
+ end
62
+
63
+ def self.log_internal(message, data = {}, level:, logger:)
64
+ data_str = data.reject { |_k, v| v.nil? }.map { |(k, v)| "#{k}=#{v}" }.join(" ")
65
+
66
+ logger&.log(level, "message=#{message} #{data_str}".strip)
67
+ end
68
+ private_class_method :log_internal
69
+ end
70
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Chroma
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/chroma-db.rb ADDED
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dry-monads"
4
+ require "net/http"
5
+ require "uri"
6
+ require "json"
7
+ require "logger"
8
+ require "forwardable"
9
+
10
+ require_relative "chroma/version"
11
+ require_relative "chroma/util"
12
+ require_relative "chroma/chroma_configuration"
13
+ require_relative "chroma/chroma"
14
+ require_relative "chroma/api_operations/request"
15
+ require_relative "chroma/errors"
16
+ require_relative "chroma/resources/embedding"
17
+ require_relative "chroma/resources/collection"
18
+ require_relative "chroma/resources/database"