chromadb-experimental 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/lib/chromadb/admin_client.rb +6 -0
  3. data/lib/chromadb/client.rb +317 -0
  4. data/lib/chromadb/collection.rb +573 -0
  5. data/lib/chromadb/embedding_functions/chroma_bm25.rb +459 -0
  6. data/lib/chromadb/embedding_functions/chroma_cloud_qwen.rb +139 -0
  7. data/lib/chromadb/embedding_functions/chroma_cloud_splade.rb +121 -0
  8. data/lib/chromadb/embedding_functions.rb +121 -0
  9. data/lib/chromadb/errors.rb +120 -0
  10. data/lib/chromadb/http_client.rb +142 -0
  11. data/lib/chromadb/openapi/lib/chromadb/api/default_api.rb +2349 -0
  12. data/lib/chromadb/openapi/lib/chromadb/api_client.rb +392 -0
  13. data/lib/chromadb/openapi/lib/chromadb/api_error.rb +58 -0
  14. data/lib/chromadb/openapi/lib/chromadb/configuration.rb +295 -0
  15. data/lib/chromadb/openapi/lib/chromadb/models/add_collection_records_payload.rb +260 -0
  16. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_request.rb +250 -0
  17. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_response.rb +235 -0
  18. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_api_response.rb +361 -0
  19. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_info.rb +240 -0
  20. data/lib/chromadb/openapi/lib/chromadb/models/bool_inverted_index_type.rb +229 -0
  21. data/lib/chromadb/openapi/lib/chromadb/models/bool_value_type.rb +221 -0
  22. data/lib/chromadb/openapi/lib/chromadb/models/checklist_response.rb +245 -0
  23. data/lib/chromadb/openapi/lib/chromadb/models/collection.rb +315 -0
  24. data/lib/chromadb/openapi/lib/chromadb/models/collection_configuration.rb +240 -0
  25. data/lib/chromadb/openapi/lib/chromadb/models/create_collection_payload.rb +260 -0
  26. data/lib/chromadb/openapi/lib/chromadb/models/create_database_payload.rb +220 -0
  27. data/lib/chromadb/openapi/lib/chromadb/models/create_tenant_payload.rb +220 -0
  28. data/lib/chromadb/openapi/lib/chromadb/models/database.rb +240 -0
  29. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_request.rb +221 -0
  30. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_response.rb +220 -0
  31. data/lib/chromadb/openapi/lib/chromadb/models/embedding_function_new_configuration.rb +230 -0
  32. data/lib/chromadb/openapi/lib/chromadb/models/error_response.rb +230 -0
  33. data/lib/chromadb/openapi/lib/chromadb/models/float_inverted_index_type.rb +229 -0
  34. data/lib/chromadb/openapi/lib/chromadb/models/float_list_value_type.rb +221 -0
  35. data/lib/chromadb/openapi/lib/chromadb/models/float_value_type.rb +221 -0
  36. data/lib/chromadb/openapi/lib/chromadb/models/fork_collection_payload.rb +220 -0
  37. data/lib/chromadb/openapi/lib/chromadb/models/fts_index_type.rb +229 -0
  38. data/lib/chromadb/openapi/lib/chromadb/models/get_attached_function_response.rb +224 -0
  39. data/lib/chromadb/openapi/lib/chromadb/models/get_response.rb +270 -0
  40. data/lib/chromadb/openapi/lib/chromadb/models/get_tenant_response.rb +230 -0
  41. data/lib/chromadb/openapi/lib/chromadb/models/get_user_identity_response.rb +246 -0
  42. data/lib/chromadb/openapi/lib/chromadb/models/heartbeat_response.rb +235 -0
  43. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_configuration.rb +330 -0
  44. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_index_config.rb +371 -0
  45. data/lib/chromadb/openapi/lib/chromadb/models/include.rb +210 -0
  46. data/lib/chromadb/openapi/lib/chromadb/models/int_inverted_index_type.rb +229 -0
  47. data/lib/chromadb/openapi/lib/chromadb/models/int_value_type.rb +221 -0
  48. data/lib/chromadb/openapi/lib/chromadb/models/query_response.rb +280 -0
  49. data/lib/chromadb/openapi/lib/chromadb/models/raw_where_fields.rb +230 -0
  50. data/lib/chromadb/openapi/lib/chromadb/models/schema.rb +258 -0
  51. data/lib/chromadb/openapi/lib/chromadb/models/search_payload.rb +256 -0
  52. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_filter.rb +230 -0
  53. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_group_by.rb +230 -0
  54. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_limit.rb +230 -0
  55. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_select.rb +220 -0
  56. data/lib/chromadb/openapi/lib/chromadb/models/search_request_payload.rb +220 -0
  57. data/lib/chromadb/openapi/lib/chromadb/models/search_response.rb +270 -0
  58. data/lib/chromadb/openapi/lib/chromadb/models/space.rb +210 -0
  59. data/lib/chromadb/openapi/lib/chromadb/models/spann_configuration.rb +420 -0
  60. data/lib/chromadb/openapi/lib/chromadb/models/spann_index_config.rb +536 -0
  61. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector.rb +244 -0
  62. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_config.rb +242 -0
  63. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_type.rb +234 -0
  64. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_value_type.rb +221 -0
  65. data/lib/chromadb/openapi/lib/chromadb/models/string_inverted_index_type.rb +229 -0
  66. data/lib/chromadb/openapi/lib/chromadb/models/string_value_type.rb +231 -0
  67. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_configuration.rb +240 -0
  68. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_payload.rb +240 -0
  69. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_records_payload.rb +260 -0
  70. data/lib/chromadb/openapi/lib/chromadb/models/update_hnsw_configuration.rb +345 -0
  71. data/lib/chromadb/openapi/lib/chromadb/models/update_spann_configuration.rb +260 -0
  72. data/lib/chromadb/openapi/lib/chromadb/models/update_tenant_payload.rb +220 -0
  73. data/lib/chromadb/openapi/lib/chromadb/models/upsert_collection_records_payload.rb +260 -0
  74. data/lib/chromadb/openapi/lib/chromadb/models/value_types.rb +271 -0
  75. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_config.rb +261 -0
  76. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_type.rb +234 -0
  77. data/lib/chromadb/openapi/lib/chromadb/version.rb +15 -0
  78. data/lib/chromadb/openapi/lib/chromadb.rb +102 -0
  79. data/lib/chromadb/openapi.rb +6 -0
  80. data/lib/chromadb/schema.rb +744 -0
  81. data/lib/chromadb/schemas/chroma-cloud-qwen.json +61 -0
  82. data/lib/chromadb/schemas/chroma-cloud-splade.json +31 -0
  83. data/lib/chromadb/schemas/chroma_bm25.json +37 -0
  84. data/lib/chromadb/search/key.rb +94 -0
  85. data/lib/chromadb/search/limit.rb +41 -0
  86. data/lib/chromadb/search/rank.rb +425 -0
  87. data/lib/chromadb/search/search.rb +73 -0
  88. data/lib/chromadb/search/select.rb +54 -0
  89. data/lib/chromadb/search/where.rb +157 -0
  90. data/lib/chromadb/search.rb +8 -0
  91. data/lib/chromadb/types/results.rb +96 -0
  92. data/lib/chromadb/types/sparse_vector.rb +86 -0
  93. data/lib/chromadb/types/validation.rb +519 -0
  94. data/lib/chromadb/types.rb +13 -0
  95. data/lib/chromadb/version.rb +5 -0
  96. data/lib/chromadb.rb +15 -0
  97. metadata +233 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: fae99389435684425baadb4d7c94dcd59b4ff2b1b304b157ce63c1a35b541fd8
4
+ data.tar.gz: d231d2cdc468b5eb1301470e6025d032fde0743f7127a1b21cb679f7f717b2e1
5
+ SHA512:
6
+ metadata.gz: 752dfdd6d4ef1b99bc8412910d6c8cf1add8f6fe346d91ffbfc53086b69186aa6d91caed18281b9e6b0f1662a6e308eb69fc53fc7bc05e6e8d4446561f8c0597
7
+ data.tar.gz: 740a7ff83040ce0c8c8c7db33d4d568858d83a3c21113ec79b008cba7a8b7eb5c63ca1d1b3c12c1b4818395c3632b7685da9bc8ccc30fca973984813558bd086
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ class AdminClient < Client
5
+ end
6
+ end
@@ -0,0 +1,317 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ DEFAULT_TENANT = "default_tenant"
5
+ DEFAULT_DATABASE = "default_database"
6
+
7
+ class Client
8
+ attr_reader :tenant, :database, :transport, :headers
9
+
10
+ def initialize(host: "localhost", port: 8000, ssl: false, path: Chroma::HttpTransport::DEFAULT_API_PATH,
11
+ headers: nil, tenant: nil, database: nil, ssl_verify: true, timeout: nil, use_defaults: true)
12
+ @host = host
13
+ @port = port
14
+ @ssl = ssl
15
+ @path = path
16
+
17
+ @transport = Chroma::HttpTransport.new(
18
+ host: host,
19
+ port: port,
20
+ ssl: ssl,
21
+ path: path,
22
+ headers: headers,
23
+ ssl_verify: ssl_verify,
24
+ timeout: timeout,
25
+ )
26
+ @headers = @transport.headers
27
+
28
+ env_tenant = ENV["CHROMA_TENANT"]
29
+ env_database = ENV["CHROMA_DATABASE"]
30
+
31
+ if use_defaults
32
+ @tenant = tenant || env_tenant || DEFAULT_TENANT
33
+ @database = database || env_database || DEFAULT_DATABASE
34
+ else
35
+ @tenant = tenant || env_tenant
36
+ @database = database || env_database
37
+ end
38
+
39
+ register_cloud_api_key_from_headers
40
+ end
41
+
42
+ def heartbeat
43
+ response = @transport.request(:get, "/heartbeat")
44
+ response["nanosecond heartbeat"] || response["nanosecond_heartbeat"] || response
45
+ end
46
+
47
+ def get_version
48
+ @transport.request(:get, "/version")
49
+ end
50
+
51
+ def get_settings
52
+ {
53
+ host: @host,
54
+ port: @port,
55
+ ssl: @ssl,
56
+ path: @path,
57
+ tenant: @tenant,
58
+ database: @database
59
+ }
60
+ end
61
+
62
+ def get_user_identity
63
+ @transport.request(:get, "/auth/identity")
64
+ end
65
+
66
+ def get_max_batch_size
67
+ @transport.max_batch_size
68
+ end
69
+
70
+ def supports_base64_encoding?
71
+ @transport.supports_base64_encoding?
72
+ end
73
+
74
+ def reset
75
+ @transport.request(:post, "/reset")
76
+ end
77
+
78
+ def set_tenant(tenant, database: nil)
79
+ db = database || @database || DEFAULT_DATABASE
80
+ validate_tenant_database(tenant, db)
81
+ @tenant = tenant
82
+ @database = db
83
+ end
84
+
85
+ def set_database(database)
86
+ validate_tenant_database(@tenant || DEFAULT_TENANT, database)
87
+ @database = database
88
+ end
89
+
90
+ def create_database(name, tenant: nil)
91
+ tenant = resolve_tenant_name(tenant)
92
+ @transport.request(:post, "/tenants/#{tenant}/databases", json: { "name" => name })
93
+ nil
94
+ end
95
+
96
+ def get_database(name, tenant: nil)
97
+ tenant = resolve_tenant_name(tenant)
98
+ @transport.request(:get, "/tenants/#{tenant}/databases/#{name}")
99
+ end
100
+
101
+ def delete_database(name, tenant: nil)
102
+ tenant = resolve_tenant_name(tenant)
103
+ @transport.request(:delete, "/tenants/#{tenant}/databases/#{name}")
104
+ nil
105
+ end
106
+
107
+ def list_databases(limit: nil, offset: nil, tenant: nil)
108
+ tenant = resolve_tenant_name(tenant)
109
+ @transport.request(:get, "/tenants/#{tenant}/databases", params: { limit: limit, offset: offset })
110
+ end
111
+
112
+ def create_tenant(name)
113
+ @transport.request(:post, "/tenants", json: { "name" => name })
114
+ nil
115
+ end
116
+
117
+ def get_tenant(name = nil)
118
+ tenant = name || @tenant
119
+ raise ArgumentError, "Tenant name must be provided" if tenant.nil? || tenant.to_s.empty?
120
+ @transport.request(:get, "/tenants/#{tenant}")
121
+ end
122
+
123
+ def list_collections(limit: nil, offset: nil)
124
+ path = tenant_database_path
125
+ response = @transport.request(
126
+ :get,
127
+ "/tenants/#{path[:tenant]}/databases/#{path[:database]}/collections",
128
+ params: { limit: limit, offset: offset },
129
+ )
130
+ response.map { |collection| build_collection(collection) }
131
+ end
132
+
133
+ def count_collections
134
+ path = tenant_database_path
135
+ @transport.request(
136
+ :get,
137
+ "/tenants/#{path[:tenant]}/databases/#{path[:database]}/collections_count",
138
+ )
139
+ end
140
+
141
+ def create_collection(name:, metadata: nil, embedding_function: nil, configuration: nil, schema: nil, get_or_create: false)
142
+ path = tenant_database_path
143
+
144
+ payload = { "name" => name, "get_or_create" => get_or_create }
145
+ payload["metadata"] = Chroma::Types::Validation.serialize_metadata(metadata) if metadata
146
+
147
+ config_payload = configuration_to_payload(configuration)
148
+ if embedding_function
149
+ config_payload["embedding_function"] = EmbeddingFunctions.prepare_embedding_function_config(embedding_function)
150
+ end
151
+ payload["configuration"] = config_payload unless config_payload.empty?
152
+
153
+ payload["schema"] = schema.is_a?(Schema) ? schema.serialize_to_json : schema if schema
154
+
155
+ response = @transport.request(
156
+ :post,
157
+ "/tenants/#{path[:tenant]}/databases/#{path[:database]}/collections",
158
+ json: payload,
159
+ )
160
+
161
+ build_collection(response, embedding_function: embedding_function, schema_override: schema)
162
+ end
163
+
164
+ def get_collection(name: nil, id: nil)
165
+ identifier = id || name
166
+ raise ArgumentError, "Collection name or id must be provided" unless identifier
167
+
168
+ path = tenant_database_path
169
+ response = @transport.request(
170
+ :get,
171
+ "/tenants/#{path[:tenant]}/databases/#{path[:database]}/collections/#{identifier}",
172
+ )
173
+ build_collection(response)
174
+ end
175
+
176
+ def get_or_create_collection(name:, metadata: nil, embedding_function: nil, configuration: nil, schema: nil)
177
+ create_collection(
178
+ name: name,
179
+ metadata: metadata,
180
+ embedding_function: embedding_function,
181
+ configuration: configuration,
182
+ schema: schema,
183
+ get_or_create: true,
184
+ )
185
+ end
186
+
187
+ def delete_collection(name: nil, id: nil)
188
+ identifier = id || name
189
+ raise ArgumentError, "Collection name or id must be provided" unless identifier
190
+
191
+ path = tenant_database_path
192
+ @transport.request(
193
+ :delete,
194
+ "/tenants/#{path[:tenant]}/databases/#{path[:database]}/collections/#{identifier}",
195
+ )
196
+ nil
197
+ end
198
+
199
+ def update_collection(id:, name: nil, metadata: nil, configuration: nil)
200
+ path = tenant_database_path
201
+ payload = {}
202
+ payload["name"] = name if name
203
+ payload["metadata"] = Chroma::Types::Validation.serialize_metadata(metadata) if metadata
204
+ config_payload = configuration_to_payload(configuration)
205
+ payload["configuration"] = config_payload unless config_payload.empty?
206
+
207
+ @transport.request(
208
+ :put,
209
+ "/tenants/#{path[:tenant]}/databases/#{path[:database]}/collections/#{id}",
210
+ json: payload,
211
+ )
212
+ nil
213
+ end
214
+
215
+ def tenant_database_path
216
+ resolve_tenant_database
217
+ end
218
+
219
+ private
220
+
221
+ def resolve_tenant_database
222
+ return { tenant: @tenant, database: @database } if @tenant && @database
223
+
224
+ identity = get_user_identity
225
+ tenant = identity["tenant"]
226
+ databases = Array(identity["databases"]).uniq
227
+ if databases.empty?
228
+ raise Chroma::AuthorizationError, "Your API key does not have access to any databases for tenant #{tenant}"
229
+ end
230
+ if databases.length > 1 || databases[0] == "*"
231
+ raise Chroma::InvalidArgumentError,
232
+ "Your API key is scoped to more than 1 DB. Please provide a DB name to the CloudClient constructor"
233
+ end
234
+
235
+ @tenant = tenant
236
+ @database = databases[0]
237
+ { tenant: @tenant, database: @database }
238
+ end
239
+
240
+ def resolve_tenant_name(tenant)
241
+ return tenant if tenant
242
+ return @tenant if @tenant
243
+
244
+ begin
245
+ identity = get_user_identity
246
+ resolved = identity.is_a?(Hash) ? identity["tenant"] : nil
247
+ @tenant = resolved if resolved
248
+ rescue StandardError
249
+ # ignore and fall back to default tenant
250
+ end
251
+
252
+ @tenant || DEFAULT_TENANT
253
+ end
254
+
255
+ def configuration_to_payload(configuration)
256
+ return {} if configuration.nil?
257
+ return configuration.to_h if configuration.respond_to?(:to_h)
258
+ configuration
259
+ end
260
+
261
+ def build_collection(model, embedding_function: nil, schema_override: nil)
262
+ schema_json = schema_override.is_a?(Schema) ? schema_override : Schema.deserialize_from_json(model["schema"], client: self)
263
+ config_json = model["configuration_json"] || model["configuration"] || {}
264
+ ef_config = config_json["embedding_function"]
265
+ ef_instance = embedding_function || EmbeddingFunctions.build_embedding_function(ef_config, client: self)
266
+
267
+ Collection.new(
268
+ client: self,
269
+ model: model,
270
+ embedding_function: ef_instance,
271
+ schema: schema_json,
272
+ )
273
+ end
274
+
275
+ def validate_tenant_database(tenant, database)
276
+ get_tenant(tenant)
277
+ get_database(database, tenant: tenant)
278
+ rescue Faraday::ConnectionFailed
279
+ raise ArgumentError, "Could not connect to a Chroma server. Are you sure it is running?"
280
+ rescue Chroma::ChromaError => e
281
+ raise e
282
+ rescue StandardError
283
+ raise ArgumentError, "Could not connect to tenant #{tenant}. Are you sure it exists?"
284
+ end
285
+
286
+ def register_cloud_api_key_from_headers
287
+ token = @headers["x-chroma-token"] || @headers["X-Chroma-Token"]
288
+ Chroma::SharedState.register_cloud_api_key(token)
289
+ end
290
+ end
291
+
292
+ class HttpClient < Client
293
+ end
294
+
295
+ class CloudClient < Client
296
+ def initialize(cloud_host: "api.trychroma.com", cloud_port: 443, enable_ssl: true, headers: nil,
297
+ tenant: nil, database: nil, api_key: nil, ssl_verify: true, timeout: nil)
298
+ api_key ||= ENV["CHROMA_API_KEY"]
299
+ raise ArgumentError, "CHROMA_API_KEY is required for CloudClient" if api_key.nil? || api_key.empty?
300
+
301
+ merged_headers = (headers || {}).dup
302
+ merged_headers["x-chroma-token"] ||= api_key
303
+
304
+ super(
305
+ host: cloud_host,
306
+ port: cloud_port,
307
+ ssl: enable_ssl,
308
+ headers: merged_headers,
309
+ tenant: tenant,
310
+ database: database,
311
+ ssl_verify: ssl_verify,
312
+ timeout: timeout,
313
+ use_defaults: false,
314
+ )
315
+ end
316
+ end
317
+ end