chromadb-experimental 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/lib/chromadb/admin_client.rb +6 -0
  3. data/lib/chromadb/client.rb +317 -0
  4. data/lib/chromadb/collection.rb +573 -0
  5. data/lib/chromadb/embedding_functions/chroma_bm25.rb +459 -0
  6. data/lib/chromadb/embedding_functions/chroma_cloud_qwen.rb +139 -0
  7. data/lib/chromadb/embedding_functions/chroma_cloud_splade.rb +121 -0
  8. data/lib/chromadb/embedding_functions.rb +121 -0
  9. data/lib/chromadb/errors.rb +120 -0
  10. data/lib/chromadb/http_client.rb +142 -0
  11. data/lib/chromadb/openapi/lib/chromadb/api/default_api.rb +2349 -0
  12. data/lib/chromadb/openapi/lib/chromadb/api_client.rb +392 -0
  13. data/lib/chromadb/openapi/lib/chromadb/api_error.rb +58 -0
  14. data/lib/chromadb/openapi/lib/chromadb/configuration.rb +295 -0
  15. data/lib/chromadb/openapi/lib/chromadb/models/add_collection_records_payload.rb +260 -0
  16. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_request.rb +250 -0
  17. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_response.rb +235 -0
  18. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_api_response.rb +361 -0
  19. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_info.rb +240 -0
  20. data/lib/chromadb/openapi/lib/chromadb/models/bool_inverted_index_type.rb +229 -0
  21. data/lib/chromadb/openapi/lib/chromadb/models/bool_value_type.rb +221 -0
  22. data/lib/chromadb/openapi/lib/chromadb/models/checklist_response.rb +245 -0
  23. data/lib/chromadb/openapi/lib/chromadb/models/collection.rb +315 -0
  24. data/lib/chromadb/openapi/lib/chromadb/models/collection_configuration.rb +240 -0
  25. data/lib/chromadb/openapi/lib/chromadb/models/create_collection_payload.rb +260 -0
  26. data/lib/chromadb/openapi/lib/chromadb/models/create_database_payload.rb +220 -0
  27. data/lib/chromadb/openapi/lib/chromadb/models/create_tenant_payload.rb +220 -0
  28. data/lib/chromadb/openapi/lib/chromadb/models/database.rb +240 -0
  29. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_request.rb +221 -0
  30. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_response.rb +220 -0
  31. data/lib/chromadb/openapi/lib/chromadb/models/embedding_function_new_configuration.rb +230 -0
  32. data/lib/chromadb/openapi/lib/chromadb/models/error_response.rb +230 -0
  33. data/lib/chromadb/openapi/lib/chromadb/models/float_inverted_index_type.rb +229 -0
  34. data/lib/chromadb/openapi/lib/chromadb/models/float_list_value_type.rb +221 -0
  35. data/lib/chromadb/openapi/lib/chromadb/models/float_value_type.rb +221 -0
  36. data/lib/chromadb/openapi/lib/chromadb/models/fork_collection_payload.rb +220 -0
  37. data/lib/chromadb/openapi/lib/chromadb/models/fts_index_type.rb +229 -0
  38. data/lib/chromadb/openapi/lib/chromadb/models/get_attached_function_response.rb +224 -0
  39. data/lib/chromadb/openapi/lib/chromadb/models/get_response.rb +270 -0
  40. data/lib/chromadb/openapi/lib/chromadb/models/get_tenant_response.rb +230 -0
  41. data/lib/chromadb/openapi/lib/chromadb/models/get_user_identity_response.rb +246 -0
  42. data/lib/chromadb/openapi/lib/chromadb/models/heartbeat_response.rb +235 -0
  43. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_configuration.rb +330 -0
  44. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_index_config.rb +371 -0
  45. data/lib/chromadb/openapi/lib/chromadb/models/include.rb +210 -0
  46. data/lib/chromadb/openapi/lib/chromadb/models/int_inverted_index_type.rb +229 -0
  47. data/lib/chromadb/openapi/lib/chromadb/models/int_value_type.rb +221 -0
  48. data/lib/chromadb/openapi/lib/chromadb/models/query_response.rb +280 -0
  49. data/lib/chromadb/openapi/lib/chromadb/models/raw_where_fields.rb +230 -0
  50. data/lib/chromadb/openapi/lib/chromadb/models/schema.rb +258 -0
  51. data/lib/chromadb/openapi/lib/chromadb/models/search_payload.rb +256 -0
  52. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_filter.rb +230 -0
  53. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_group_by.rb +230 -0
  54. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_limit.rb +230 -0
  55. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_select.rb +220 -0
  56. data/lib/chromadb/openapi/lib/chromadb/models/search_request_payload.rb +220 -0
  57. data/lib/chromadb/openapi/lib/chromadb/models/search_response.rb +270 -0
  58. data/lib/chromadb/openapi/lib/chromadb/models/space.rb +210 -0
  59. data/lib/chromadb/openapi/lib/chromadb/models/spann_configuration.rb +420 -0
  60. data/lib/chromadb/openapi/lib/chromadb/models/spann_index_config.rb +536 -0
  61. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector.rb +244 -0
  62. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_config.rb +242 -0
  63. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_type.rb +234 -0
  64. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_value_type.rb +221 -0
  65. data/lib/chromadb/openapi/lib/chromadb/models/string_inverted_index_type.rb +229 -0
  66. data/lib/chromadb/openapi/lib/chromadb/models/string_value_type.rb +231 -0
  67. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_configuration.rb +240 -0
  68. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_payload.rb +240 -0
  69. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_records_payload.rb +260 -0
  70. data/lib/chromadb/openapi/lib/chromadb/models/update_hnsw_configuration.rb +345 -0
  71. data/lib/chromadb/openapi/lib/chromadb/models/update_spann_configuration.rb +260 -0
  72. data/lib/chromadb/openapi/lib/chromadb/models/update_tenant_payload.rb +220 -0
  73. data/lib/chromadb/openapi/lib/chromadb/models/upsert_collection_records_payload.rb +260 -0
  74. data/lib/chromadb/openapi/lib/chromadb/models/value_types.rb +271 -0
  75. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_config.rb +261 -0
  76. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_type.rb +234 -0
  77. data/lib/chromadb/openapi/lib/chromadb/version.rb +15 -0
  78. data/lib/chromadb/openapi/lib/chromadb.rb +102 -0
  79. data/lib/chromadb/openapi.rb +6 -0
  80. data/lib/chromadb/schema.rb +744 -0
  81. data/lib/chromadb/schemas/chroma-cloud-qwen.json +61 -0
  82. data/lib/chromadb/schemas/chroma-cloud-splade.json +31 -0
  83. data/lib/chromadb/schemas/chroma_bm25.json +37 -0
  84. data/lib/chromadb/search/key.rb +94 -0
  85. data/lib/chromadb/search/limit.rb +41 -0
  86. data/lib/chromadb/search/rank.rb +425 -0
  87. data/lib/chromadb/search/search.rb +73 -0
  88. data/lib/chromadb/search/select.rb +54 -0
  89. data/lib/chromadb/search/where.rb +157 -0
  90. data/lib/chromadb/search.rb +8 -0
  91. data/lib/chromadb/types/results.rb +96 -0
  92. data/lib/chromadb/types/sparse_vector.rb +86 -0
  93. data/lib/chromadb/types/validation.rb +519 -0
  94. data/lib/chromadb/types.rb +13 -0
  95. data/lib/chromadb/version.rb +5 -0
  96. data/lib/chromadb.rb +15 -0
  97. metadata +233 -0
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json_schemer"
4
+ require "porter2stemmer"
5
+
6
+ module Chroma
7
+ module EmbeddingFunctions
8
+ KNOWN_DENSE = {}
9
+ KNOWN_SPARSE = {}
10
+
11
+ module_function
12
+
13
+ def register_embedding_function(name, klass)
14
+ raise ArgumentError, "Embedding function #{name} already registered" if KNOWN_DENSE.key?(name)
15
+ KNOWN_DENSE[name] = klass
16
+ end
17
+
18
+ def register_sparse_embedding_function(name, klass)
19
+ raise ArgumentError, "Sparse embedding function #{name} already registered" if KNOWN_SPARSE.key?(name)
20
+ KNOWN_SPARSE[name] = klass
21
+ end
22
+
23
+ def resolve_name(fn)
24
+ return nil unless fn
25
+ if fn.respond_to?(:name)
26
+ value = fn.name
27
+ return value if value.is_a?(String)
28
+ end
29
+ nil
30
+ end
31
+
32
+ def prepare_embedding_function_config(fn)
33
+ return { "type" => "legacy" } if fn.nil?
34
+
35
+ name = resolve_name(fn)
36
+ return { "type" => "legacy" } if name.nil?
37
+ return { "type" => "legacy" } unless fn.respond_to?(:get_config) && fn.class.respond_to?(:build_from_config)
38
+
39
+ config = fn.get_config
40
+ if fn.respond_to?(:validate_config)
41
+ fn.validate_config(config)
42
+ elsif fn.class.respond_to?(:validate_config)
43
+ fn.class.validate_config(config)
44
+ end
45
+
46
+ { "type" => "known", "name" => name, "config" => config }
47
+ end
48
+
49
+ def build_embedding_function(ef_config, client: nil)
50
+ return nil if ef_config.nil?
51
+ return nil unless ef_config.is_a?(Hash)
52
+ return nil unless ef_config["type"] == "known"
53
+
54
+ name = ef_config["name"]
55
+ config = ef_config["config"] || {}
56
+
57
+ klass = KNOWN_DENSE[name]
58
+ return nil unless klass
59
+
60
+ if klass.respond_to?(:build_from_config)
61
+ return klass.build_from_config(config, client: client)
62
+ end
63
+
64
+ nil
65
+ end
66
+
67
+ def build_sparse_embedding_function(ef_config, client: nil)
68
+ return nil if ef_config.nil?
69
+ return nil unless ef_config.is_a?(Hash)
70
+ return nil unless ef_config["type"] == "known"
71
+
72
+ name = ef_config["name"]
73
+ config = ef_config["config"] || {}
74
+
75
+ klass = KNOWN_SPARSE[name]
76
+ return nil unless klass
77
+
78
+ if klass.respond_to?(:build_from_config)
79
+ return klass.build_from_config(config, client: client)
80
+ end
81
+
82
+ nil
83
+ end
84
+
85
+ def validate_config_schema(config, schema_name)
86
+ schemer = schema_for(schema_name)
87
+ errors = schemer.validate(config).to_a
88
+ return if errors.empty?
89
+
90
+ messages = errors.map { |err| err["message"] }.uniq
91
+ raise ArgumentError, "Invalid configuration for #{schema_name}: #{messages.join('; ')}"
92
+ end
93
+
94
+ def schema_for(schema_name)
95
+ @schemers ||= {}
96
+ return @schemers[schema_name] if @schemers[schema_name]
97
+
98
+ schema_path = File.expand_path("schemas/#{schema_name}.json", __dir__)
99
+ schema = JSON.parse(File.read(schema_path))
100
+ @schemers[schema_name] = JSONSchemer.schema(schema)
101
+ end
102
+ end
103
+
104
+ module SharedState
105
+ @cloud_api_key = nil
106
+
107
+ module_function
108
+
109
+ def register_cloud_api_key(key)
110
+ @cloud_api_key = key if key && !key.to_s.empty?
111
+ end
112
+
113
+ def cloud_api_key
114
+ @cloud_api_key
115
+ end
116
+ end
117
+ end
118
+
119
+ require_relative "embedding_functions/chroma_cloud_qwen"
120
+ require_relative "embedding_functions/chroma_cloud_splade"
121
+ require_relative "embedding_functions/chroma_bm25"
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ class ChromaError < StandardError
5
+ attr_accessor :trace_id
6
+
7
+ def self.name
8
+ "ChromaError"
9
+ end
10
+ end
11
+
12
+ class InvalidDimensionError < ChromaError
13
+ def self.name
14
+ "InvalidDimension"
15
+ end
16
+ end
17
+
18
+ class IDAlreadyExistsError < ChromaError
19
+ def self.name
20
+ "IDAlreadyExists"
21
+ end
22
+ end
23
+
24
+ class ChromaAuthError < ChromaError
25
+ def self.name
26
+ "AuthError"
27
+ end
28
+ end
29
+
30
+ class DuplicateIDError < ChromaError
31
+ def self.name
32
+ "DuplicateID"
33
+ end
34
+ end
35
+
36
+ class InvalidArgumentError < ChromaError
37
+ def self.name
38
+ "InvalidArgument"
39
+ end
40
+ end
41
+
42
+ class InvalidUUIDError < ChromaError
43
+ def self.name
44
+ "InvalidUUID"
45
+ end
46
+ end
47
+
48
+ class InvalidHTTPVersionError < ChromaError
49
+ def self.name
50
+ "InvalidHTTPVersion"
51
+ end
52
+ end
53
+
54
+ class AuthorizationError < ChromaError
55
+ def self.name
56
+ "AuthorizationError"
57
+ end
58
+ end
59
+
60
+ class NotFoundError < ChromaError
61
+ def self.name
62
+ "NotFoundError"
63
+ end
64
+ end
65
+
66
+ class UniqueConstraintError < ChromaError
67
+ def self.name
68
+ "UniqueConstraintError"
69
+ end
70
+ end
71
+
72
+ class BatchSizeExceededError < ChromaError
73
+ def self.name
74
+ "BatchSizeExceededError"
75
+ end
76
+ end
77
+
78
+ class VersionMismatchError < ChromaError
79
+ def self.name
80
+ "VersionMismatchError"
81
+ end
82
+ end
83
+
84
+ class InternalError < ChromaError
85
+ def self.name
86
+ "InternalError"
87
+ end
88
+ end
89
+
90
+ class RateLimitError < ChromaError
91
+ def self.name
92
+ "RateLimitError"
93
+ end
94
+ end
95
+
96
+ class QuotaError < ChromaError
97
+ def self.name
98
+ "QuotaError"
99
+ end
100
+ end
101
+
102
+ ERROR_TYPES = {
103
+ "InvalidDimension" => InvalidDimensionError,
104
+ "InvalidArgumentError" => InvalidArgumentError,
105
+ "IDAlreadyExists" => IDAlreadyExistsError,
106
+ "DuplicateID" => DuplicateIDError,
107
+ "InvalidUUID" => InvalidUUIDError,
108
+ "InvalidHTTPVersion" => InvalidHTTPVersionError,
109
+ "AuthorizationError" => AuthorizationError,
110
+ "NotFoundError" => NotFoundError,
111
+ "BatchSizeExceededError" => BatchSizeExceededError,
112
+ "VersionMismatchError" => VersionMismatchError,
113
+ "RateLimitError" => RateLimitError,
114
+ "AuthError" => ChromaAuthError,
115
+ "UniqueConstraintError" => UniqueConstraintError,
116
+ "QuotaError" => QuotaError,
117
+ "InternalError" => InternalError,
118
+ "ChromaError" => ChromaError
119
+ }.freeze
120
+ end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "uri"
5
+
6
+ module Chroma
7
+ class HttpTransport
8
+ DEFAULT_API_PATH = "/api/v2"
9
+
10
+ attr_reader :base_url, :headers
11
+
12
+ def initialize(host:, port: 8000, ssl: false, path: DEFAULT_API_PATH, headers: nil, ssl_verify: true, timeout: nil)
13
+ @base_url = self.class.resolve_url(
14
+ host: host,
15
+ port: port,
16
+ ssl: ssl,
17
+ default_api_path: path,
18
+ )
19
+ @headers = (headers || {}).dup
20
+ @headers["Content-Type"] ||= "application/json"
21
+ @headers["User-Agent"] ||= "Chroma Ruby Client v#{Chroma::VERSION} (https://github.com/chroma-core/chroma)"
22
+ @preflight_checks = nil
23
+
24
+ @connection = Faraday.new(url: @base_url, ssl: { verify: ssl_verify }) do |builder|
25
+ if timeout
26
+ builder.options.timeout = timeout
27
+ builder.options.open_timeout = timeout
28
+ end
29
+ end
30
+ end
31
+
32
+ def request(method, path, params: nil, json: nil)
33
+ normalized_path = path.start_with?("/") ? path[1..] : path
34
+ response = @connection.run_request(method.to_sym, normalized_path, nil, @headers) do |req|
35
+ req.params.update(clean_params(params)) if params
36
+ req.body = JSON.generate(json) if json
37
+ end
38
+
39
+ raise_chroma_error(response) unless response.success?
40
+
41
+ parse_response(response.body)
42
+ end
43
+
44
+ def get_pre_flight_checks
45
+ @preflight_checks ||= request(:get, "/pre-flight-checks")
46
+ end
47
+
48
+ def supports_base64_encoding?
49
+ checks = get_pre_flight_checks
50
+ checks.is_a?(Hash) && checks["supports_base64_encoding"] == true
51
+ end
52
+
53
+ def max_batch_size
54
+ checks = get_pre_flight_checks
55
+ return -1 unless checks.is_a?(Hash)
56
+ checks.fetch("max_batch_size", -1)
57
+ end
58
+
59
+ def request_headers
60
+ @headers.dup
61
+ end
62
+
63
+ def self.resolve_url(host:, port:, ssl:, default_api_path: DEFAULT_API_PATH)
64
+ validate_host(host)
65
+
66
+ skip_port = host.start_with?("http://", "https://")
67
+ parsed = URI.parse(host.start_with?("http") ? host : "http://#{host}")
68
+
69
+ scheme = if host.start_with?("http")
70
+ parsed.scheme
71
+ else
72
+ ssl ? "https" : "http"
73
+ end
74
+
75
+ netloc = parsed.host || parsed.path
76
+ if skip_port && parsed.port
77
+ netloc = "#{netloc}:#{parsed.port}"
78
+ end
79
+ port_value = skip_port ? parsed.port : port
80
+ path = parsed.path
81
+ path = default_api_path if path.nil? || path.empty? || path == netloc
82
+
83
+ trimmed_path = path.end_with?("/") ? path.chomp("/") : path
84
+ if default_api_path && !default_api_path.empty? && !trimmed_path.end_with?(default_api_path)
85
+ path = trimmed_path + default_api_path
86
+ else
87
+ path = trimmed_path
88
+ end
89
+
90
+ normalized_path = path.start_with?("/") ? path : "/#{path}"
91
+ normalized_path = normalized_path.gsub(%r{//+}, "/")
92
+
93
+ port_segment = skip_port ? "" : ":#{port_value}"
94
+ "#{scheme}://#{netloc}#{port_segment}#{normalized_path}"
95
+ end
96
+
97
+ def self.validate_host(host)
98
+ parsed = URI.parse(host)
99
+ if host.include?("/") && parsed.scheme.nil?
100
+ raise ArgumentError,
101
+ "Invalid URL. Seems that you are trying to pass URL as a host but without specifying the protocol. Please add http:// or https:// to the host."
102
+ end
103
+ if host.include?("/") && !%w[http https].include?(parsed.scheme)
104
+ raise ArgumentError, "Invalid URL. Unrecognized protocol - #{parsed.scheme}."
105
+ end
106
+ end
107
+
108
+ private
109
+
110
+ def clean_params(params)
111
+ params.reject { |_k, v| v.nil? }
112
+ end
113
+
114
+ def parse_response(body)
115
+ return nil if body.nil? || body.to_s.strip.empty?
116
+ JSON.parse(body)
117
+ rescue JSON::ParserError
118
+ body
119
+ end
120
+
121
+ def raise_chroma_error(response)
122
+ body = response.body
123
+ trace_id = response.headers["chroma-trace-id"]
124
+
125
+ begin
126
+ data = JSON.parse(body)
127
+ if data.is_a?(Hash) && data["error"]
128
+ error_class = Chroma::ERROR_TYPES[data["error"]] || Chroma::ChromaError
129
+ error = error_class.new(data["message"])
130
+ error.trace_id = trace_id if error.respond_to?(:trace_id=)
131
+ raise error
132
+ end
133
+ rescue JSON::ParserError
134
+ # fall through
135
+ end
136
+
137
+ message = body.to_s
138
+ message = "#{message} (trace ID: #{trace_id})" if trace_id
139
+ raise Chroma::ChromaError, message
140
+ end
141
+ end
142
+ end