chromadb-experimental 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/lib/chromadb/admin_client.rb +6 -0
  3. data/lib/chromadb/client.rb +317 -0
  4. data/lib/chromadb/collection.rb +573 -0
  5. data/lib/chromadb/embedding_functions/chroma_bm25.rb +459 -0
  6. data/lib/chromadb/embedding_functions/chroma_cloud_qwen.rb +139 -0
  7. data/lib/chromadb/embedding_functions/chroma_cloud_splade.rb +121 -0
  8. data/lib/chromadb/embedding_functions.rb +121 -0
  9. data/lib/chromadb/errors.rb +120 -0
  10. data/lib/chromadb/http_client.rb +142 -0
  11. data/lib/chromadb/openapi/lib/chromadb/api/default_api.rb +2349 -0
  12. data/lib/chromadb/openapi/lib/chromadb/api_client.rb +392 -0
  13. data/lib/chromadb/openapi/lib/chromadb/api_error.rb +58 -0
  14. data/lib/chromadb/openapi/lib/chromadb/configuration.rb +295 -0
  15. data/lib/chromadb/openapi/lib/chromadb/models/add_collection_records_payload.rb +260 -0
  16. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_request.rb +250 -0
  17. data/lib/chromadb/openapi/lib/chromadb/models/attach_function_response.rb +235 -0
  18. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_api_response.rb +361 -0
  19. data/lib/chromadb/openapi/lib/chromadb/models/attached_function_info.rb +240 -0
  20. data/lib/chromadb/openapi/lib/chromadb/models/bool_inverted_index_type.rb +229 -0
  21. data/lib/chromadb/openapi/lib/chromadb/models/bool_value_type.rb +221 -0
  22. data/lib/chromadb/openapi/lib/chromadb/models/checklist_response.rb +245 -0
  23. data/lib/chromadb/openapi/lib/chromadb/models/collection.rb +315 -0
  24. data/lib/chromadb/openapi/lib/chromadb/models/collection_configuration.rb +240 -0
  25. data/lib/chromadb/openapi/lib/chromadb/models/create_collection_payload.rb +260 -0
  26. data/lib/chromadb/openapi/lib/chromadb/models/create_database_payload.rb +220 -0
  27. data/lib/chromadb/openapi/lib/chromadb/models/create_tenant_payload.rb +220 -0
  28. data/lib/chromadb/openapi/lib/chromadb/models/database.rb +240 -0
  29. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_request.rb +221 -0
  30. data/lib/chromadb/openapi/lib/chromadb/models/detach_function_response.rb +220 -0
  31. data/lib/chromadb/openapi/lib/chromadb/models/embedding_function_new_configuration.rb +230 -0
  32. data/lib/chromadb/openapi/lib/chromadb/models/error_response.rb +230 -0
  33. data/lib/chromadb/openapi/lib/chromadb/models/float_inverted_index_type.rb +229 -0
  34. data/lib/chromadb/openapi/lib/chromadb/models/float_list_value_type.rb +221 -0
  35. data/lib/chromadb/openapi/lib/chromadb/models/float_value_type.rb +221 -0
  36. data/lib/chromadb/openapi/lib/chromadb/models/fork_collection_payload.rb +220 -0
  37. data/lib/chromadb/openapi/lib/chromadb/models/fts_index_type.rb +229 -0
  38. data/lib/chromadb/openapi/lib/chromadb/models/get_attached_function_response.rb +224 -0
  39. data/lib/chromadb/openapi/lib/chromadb/models/get_response.rb +270 -0
  40. data/lib/chromadb/openapi/lib/chromadb/models/get_tenant_response.rb +230 -0
  41. data/lib/chromadb/openapi/lib/chromadb/models/get_user_identity_response.rb +246 -0
  42. data/lib/chromadb/openapi/lib/chromadb/models/heartbeat_response.rb +235 -0
  43. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_configuration.rb +330 -0
  44. data/lib/chromadb/openapi/lib/chromadb/models/hnsw_index_config.rb +371 -0
  45. data/lib/chromadb/openapi/lib/chromadb/models/include.rb +210 -0
  46. data/lib/chromadb/openapi/lib/chromadb/models/int_inverted_index_type.rb +229 -0
  47. data/lib/chromadb/openapi/lib/chromadb/models/int_value_type.rb +221 -0
  48. data/lib/chromadb/openapi/lib/chromadb/models/query_response.rb +280 -0
  49. data/lib/chromadb/openapi/lib/chromadb/models/raw_where_fields.rb +230 -0
  50. data/lib/chromadb/openapi/lib/chromadb/models/schema.rb +258 -0
  51. data/lib/chromadb/openapi/lib/chromadb/models/search_payload.rb +256 -0
  52. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_filter.rb +230 -0
  53. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_group_by.rb +230 -0
  54. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_limit.rb +230 -0
  55. data/lib/chromadb/openapi/lib/chromadb/models/search_payload_select.rb +220 -0
  56. data/lib/chromadb/openapi/lib/chromadb/models/search_request_payload.rb +220 -0
  57. data/lib/chromadb/openapi/lib/chromadb/models/search_response.rb +270 -0
  58. data/lib/chromadb/openapi/lib/chromadb/models/space.rb +210 -0
  59. data/lib/chromadb/openapi/lib/chromadb/models/spann_configuration.rb +420 -0
  60. data/lib/chromadb/openapi/lib/chromadb/models/spann_index_config.rb +536 -0
  61. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector.rb +244 -0
  62. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_config.rb +242 -0
  63. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_type.rb +234 -0
  64. data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_value_type.rb +221 -0
  65. data/lib/chromadb/openapi/lib/chromadb/models/string_inverted_index_type.rb +229 -0
  66. data/lib/chromadb/openapi/lib/chromadb/models/string_value_type.rb +231 -0
  67. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_configuration.rb +240 -0
  68. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_payload.rb +240 -0
  69. data/lib/chromadb/openapi/lib/chromadb/models/update_collection_records_payload.rb +260 -0
  70. data/lib/chromadb/openapi/lib/chromadb/models/update_hnsw_configuration.rb +345 -0
  71. data/lib/chromadb/openapi/lib/chromadb/models/update_spann_configuration.rb +260 -0
  72. data/lib/chromadb/openapi/lib/chromadb/models/update_tenant_payload.rb +220 -0
  73. data/lib/chromadb/openapi/lib/chromadb/models/upsert_collection_records_payload.rb +260 -0
  74. data/lib/chromadb/openapi/lib/chromadb/models/value_types.rb +271 -0
  75. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_config.rb +261 -0
  76. data/lib/chromadb/openapi/lib/chromadb/models/vector_index_type.rb +234 -0
  77. data/lib/chromadb/openapi/lib/chromadb/version.rb +15 -0
  78. data/lib/chromadb/openapi/lib/chromadb.rb +102 -0
  79. data/lib/chromadb/openapi.rb +6 -0
  80. data/lib/chromadb/schema.rb +744 -0
  81. data/lib/chromadb/schemas/chroma-cloud-qwen.json +61 -0
  82. data/lib/chromadb/schemas/chroma-cloud-splade.json +31 -0
  83. data/lib/chromadb/schemas/chroma_bm25.json +37 -0
  84. data/lib/chromadb/search/key.rb +94 -0
  85. data/lib/chromadb/search/limit.rb +41 -0
  86. data/lib/chromadb/search/rank.rb +425 -0
  87. data/lib/chromadb/search/search.rb +73 -0
  88. data/lib/chromadb/search/select.rb +54 -0
  89. data/lib/chromadb/search/where.rb +157 -0
  90. data/lib/chromadb/search.rb +8 -0
  91. data/lib/chromadb/types/results.rb +96 -0
  92. data/lib/chromadb/types/sparse_vector.rb +86 -0
  93. data/lib/chromadb/types/validation.rb +519 -0
  94. data/lib/chromadb/types.rb +13 -0
  95. data/lib/chromadb/version.rb +5 -0
  96. data/lib/chromadb.rb +15 -0
  97. metadata +233 -0
@@ -0,0 +1,61 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "Chroma Cloud Qwen Embedding Function Schema",
4
+ "description": "Schema for the Chroma Cloud Qwen embedding function configuration",
5
+ "version": "1.0.0",
6
+ "type": "object",
7
+ "properties": {
8
+ "model": {
9
+ "type": "string",
10
+ "enum": [
11
+ "Qwen/Qwen3-Embedding-0.6B"
12
+ ],
13
+ "description": "The specific Qwen model to use for embeddings"
14
+ },
15
+ "task": {
16
+ "type": [
17
+ "string",
18
+ "null"
19
+ ],
20
+ "description": "The task for which embeddings are being generated. If null or empty, empty instructions will be used."
21
+ },
22
+ "instructions": {
23
+ "type": "object",
24
+ "description": "A mapping of tasks to instructions for targets (documents/queries)",
25
+ "properties": {
26
+ "nl_to_code": {
27
+ "type": "object",
28
+ "properties": {
29
+ "documents": {
30
+ "type": "string",
31
+ "description": "Instructions for embedding documents"
32
+ },
33
+ "query": {
34
+ "type": "string",
35
+ "description": "Instructions for embedding queries"
36
+ }
37
+ },
38
+ "required": [
39
+ "documents",
40
+ "query"
41
+ ],
42
+ "additionalProperties": false
43
+ }
44
+ },
45
+ "required": [
46
+ "nl_to_code"
47
+ ],
48
+ "additionalProperties": false
49
+ },
50
+ "api_key_env_var": {
51
+ "type": "string",
52
+ "description": "Environment variable name that contains your API key for the Chroma Embedding API",
53
+ "default": "CHROMA_API_KEY"
54
+ }
55
+ },
56
+ "required": [
57
+ "model",
58
+ "task"
59
+ ],
60
+ "additionalProperties": false
61
+ }
@@ -0,0 +1,31 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "Chroma Cloud Splade Embedding Function Schema",
4
+ "description": "Schema for the Chroma Cloud Splade sparse embedding function configuration",
5
+ "version": "1.0.0",
6
+ "type": "object",
7
+ "properties": {
8
+ "model": {
9
+ "type": "string",
10
+ "enum": [
11
+ "prithivida/Splade_PP_en_v1"
12
+ ],
13
+ "description": "The specific Splade model to use for sparse embeddings"
14
+ },
15
+ "api_key_env_var": {
16
+ "type": "string",
17
+ "description": "Environment variable name that contains your API key for the Chroma Embedding API",
18
+ "default": "CHROMA_API_KEY"
19
+ },
20
+ "include_tokens": {
21
+ "type": "boolean",
22
+ "description": "Whether to store token labels in the sparse vector output",
23
+ "default": false
24
+ }
25
+ },
26
+ "required": [
27
+ "api_key_env_var",
28
+ "model"
29
+ ],
30
+ "additionalProperties": false
31
+ }
@@ -0,0 +1,37 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "Chroma BM25 Embedding Function Schema",
4
+ "description": "Schema for the Chroma BM25 sparse embedding function configuration",
5
+ "version": "1.0.0",
6
+ "type": "object",
7
+ "properties": {
8
+ "k": {
9
+ "type": "number",
10
+ "description": "BM25 saturation parameter controlling term frequency scaling"
11
+ },
12
+ "b": {
13
+ "type": "number",
14
+ "description": "BM25 length normalization parameter"
15
+ },
16
+ "avg_doc_length": {
17
+ "type": "number",
18
+ "description": "Average document length in tokens used for normalization"
19
+ },
20
+ "token_max_length": {
21
+ "type": "number",
22
+ "description": "Maximum token length allowed before filtering"
23
+ },
24
+ "stopwords": {
25
+ "type": "array",
26
+ "description": "Optional custom stopword list (in lowercase) to override the defaults",
27
+ "items": {
28
+ "type": "string"
29
+ }
30
+ },
31
+ "include_tokens": {
32
+ "type": "boolean",
33
+ "description": "Whether to store token strings in the sparse vectors (default: true)"
34
+ }
35
+ },
36
+ "additionalProperties": false
37
+ }
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Search
5
+ class Key
6
+ attr_reader :name
7
+
8
+ def initialize(name)
9
+ @name = name
10
+ end
11
+
12
+ def eq(value)
13
+ WhereExpression.create_comparison(@name, "$eq", value)
14
+ end
15
+
16
+ def ne(value)
17
+ WhereExpression.create_comparison(@name, "$ne", value)
18
+ end
19
+
20
+ def gt(value)
21
+ WhereExpression.create_comparison(@name, "$gt", value)
22
+ end
23
+
24
+ def gte(value)
25
+ WhereExpression.create_comparison(@name, "$gte", value)
26
+ end
27
+
28
+ def lt(value)
29
+ WhereExpression.create_comparison(@name, "$lt", value)
30
+ end
31
+
32
+ def lte(value)
33
+ WhereExpression.create_comparison(@name, "$lte", value)
34
+ end
35
+
36
+ def is_in(values)
37
+ array = iterable_to_array(values)
38
+ assert_non_empty(array, "$in requires at least one value")
39
+ WhereExpression.create_comparison(@name, "$in", array)
40
+ end
41
+
42
+ def not_in(values)
43
+ array = iterable_to_array(values)
44
+ assert_non_empty(array, "$nin requires at least one value")
45
+ WhereExpression.create_comparison(@name, "$nin", array)
46
+ end
47
+
48
+ def contains(value)
49
+ raise TypeError, "$contains requires a string value" unless value.is_a?(String)
50
+ WhereExpression.create_comparison(@name, "$contains", value)
51
+ end
52
+
53
+ def not_contains(value)
54
+ raise TypeError, "$not_contains requires a string value" unless value.is_a?(String)
55
+ WhereExpression.create_comparison(@name, "$not_contains", value)
56
+ end
57
+
58
+ def regex(pattern)
59
+ raise TypeError, "$regex requires a string pattern" unless pattern.is_a?(String)
60
+ WhereExpression.create_comparison(@name, "$regex", pattern)
61
+ end
62
+
63
+ def not_regex(pattern)
64
+ raise TypeError, "$not_regex requires a string pattern" unless pattern.is_a?(String)
65
+ WhereExpression.create_comparison(@name, "$not_regex", pattern)
66
+ end
67
+
68
+ private
69
+
70
+ def iterable_to_array(values)
71
+ return values.to_a if values.respond_to?(:to_a)
72
+ Array(values)
73
+ end
74
+
75
+ def assert_non_empty(array, message)
76
+ raise ArgumentError, message if array.empty?
77
+ end
78
+ end
79
+
80
+ module K
81
+ ID = Key.new("#id")
82
+ DOCUMENT = Key.new("#document")
83
+ EMBEDDING = Key.new("#embedding")
84
+ METADATA = Key.new("#metadata")
85
+ SCORE = Key.new("#score")
86
+
87
+ module_function
88
+
89
+ def [](name)
90
+ Key.new(name)
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Search
5
+ class Limit
6
+ attr_reader :offset, :limit
7
+
8
+ def initialize(offset: 0, limit: nil)
9
+ unless offset.is_a?(Integer) && offset >= 0
10
+ raise TypeError, "Limit offset must be a non-negative integer"
11
+ end
12
+ if !limit.nil?
13
+ unless limit.is_a?(Integer) && limit > 0
14
+ raise TypeError, "Limit must be a positive integer when provided"
15
+ end
16
+ end
17
+ @offset = offset
18
+ @limit = limit
19
+ end
20
+
21
+ def self.from(input, offset_override = nil)
22
+ return Limit.new(offset: input.offset, limit: input.limit) if input.is_a?(Limit)
23
+ if input.is_a?(Numeric)
24
+ return Limit.new(limit: input.to_i, offset: offset_override || 0)
25
+ end
26
+ return Limit.new if input.nil?
27
+ if input.is_a?(Hash)
28
+ return Limit.new(offset: input[:offset] || input["offset"] || 0,
29
+ limit: input[:limit] || input["limit"])
30
+ end
31
+ raise TypeError, "Invalid limit input"
32
+ end
33
+
34
+ def to_h
35
+ result = { "offset" => @offset }
36
+ result["limit"] = @limit if @limit
37
+ result
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,425 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Chroma
4
+ module Search
5
+ class RankExpression
6
+ def to_h
7
+ raise NotImplementedError
8
+ end
9
+
10
+ def add(*others)
11
+ return self if others.empty?
12
+ expressions = [ self ] + others.map.with_index { |item, idx| ::Chroma::Search.require_rank(item, "add operand #{idx}") }
13
+ SumRankExpression.create(expressions)
14
+ end
15
+
16
+ def subtract(other)
17
+ SubRankExpression.new(self, ::Chroma::Search.require_rank(other, "subtract operand"))
18
+ end
19
+
20
+ def multiply(*others)
21
+ return self if others.empty?
22
+ expressions = [ self ] + others.map.with_index { |item, idx| ::Chroma::Search.require_rank(item, "multiply operand #{idx}") }
23
+ MulRankExpression.create(expressions)
24
+ end
25
+
26
+ def divide(other)
27
+ DivRankExpression.new(self, ::Chroma::Search.require_rank(other, "divide operand"))
28
+ end
29
+
30
+ def negate
31
+ multiply(-1)
32
+ end
33
+
34
+ def abs
35
+ AbsRankExpression.new(self)
36
+ end
37
+
38
+ def exp
39
+ ExpRankExpression.new(self)
40
+ end
41
+
42
+ def log
43
+ LogRankExpression.new(self)
44
+ end
45
+
46
+ def max(*others)
47
+ return self if others.empty?
48
+ expressions = [ self ] + others.map.with_index { |item, idx| ::Chroma::Search.require_rank(item, "max operand #{idx}") }
49
+ MaxRankExpression.create(expressions)
50
+ end
51
+
52
+ def min(*others)
53
+ return self if others.empty?
54
+ expressions = [ self ] + others.map.with_index { |item, idx| ::Chroma::Search.require_rank(item, "min operand #{idx}") }
55
+ MinRankExpression.create(expressions)
56
+ end
57
+
58
+ def self.from(input)
59
+ return input if input.is_a?(RankExpression)
60
+ return nil if input.nil?
61
+ if input.is_a?(Numeric)
62
+ return ValueRankExpression.new(::Chroma::Search.require_number(input, "Val requires a numeric value"))
63
+ end
64
+ if input.is_a?(Hash)
65
+ return RawRankExpression.new(input)
66
+ end
67
+ raise TypeError, "Rank input must be a RankExpression, number, or Hash"
68
+ end
69
+ end
70
+
71
+ class RawRankExpression < RankExpression
72
+ def initialize(raw)
73
+ @raw = raw
74
+ end
75
+
76
+ def to_h
77
+ deep_clone(@raw)
78
+ end
79
+
80
+ private
81
+
82
+ def deep_clone(value)
83
+ Marshal.load(Marshal.dump(value))
84
+ end
85
+ end
86
+
87
+ class ValueRankExpression < RankExpression
88
+ def initialize(value)
89
+ @value = value
90
+ end
91
+
92
+ def to_h
93
+ { "$val" => @value }
94
+ end
95
+ end
96
+
97
+ class SumRankExpression < RankExpression
98
+ def initialize(ranks)
99
+ @ranks = ranks
100
+ end
101
+
102
+ def self.create(ranks)
103
+ flattened = []
104
+ ranks.each do |rank|
105
+ if rank.is_a?(SumRankExpression)
106
+ flattened.concat(rank.operands)
107
+ else
108
+ flattened << rank
109
+ end
110
+ end
111
+ return flattened[0] if flattened.length == 1
112
+ SumRankExpression.new(flattened)
113
+ end
114
+
115
+ def operands
116
+ @ranks.dup
117
+ end
118
+
119
+ def to_h
120
+ { "$sum" => @ranks.map(&:to_h) }
121
+ end
122
+ end
123
+
124
+ class SubRankExpression < RankExpression
125
+ def initialize(left, right)
126
+ @left = left
127
+ @right = right
128
+ end
129
+
130
+ def to_h
131
+ { "$sub" => { "left" => @left.to_h, "right" => @right.to_h } }
132
+ end
133
+ end
134
+
135
+ class MulRankExpression < RankExpression
136
+ def initialize(ranks)
137
+ @ranks = ranks
138
+ end
139
+
140
+ def self.create(ranks)
141
+ flattened = []
142
+ ranks.each do |rank|
143
+ if rank.is_a?(MulRankExpression)
144
+ flattened.concat(rank.operands)
145
+ else
146
+ flattened << rank
147
+ end
148
+ end
149
+ return flattened[0] if flattened.length == 1
150
+ MulRankExpression.new(flattened)
151
+ end
152
+
153
+ def operands
154
+ @ranks.dup
155
+ end
156
+
157
+ def to_h
158
+ { "$mul" => @ranks.map(&:to_h) }
159
+ end
160
+ end
161
+
162
+ class DivRankExpression < RankExpression
163
+ def initialize(left, right)
164
+ @left = left
165
+ @right = right
166
+ end
167
+
168
+ def to_h
169
+ { "$div" => { "left" => @left.to_h, "right" => @right.to_h } }
170
+ end
171
+ end
172
+
173
+ class AbsRankExpression < RankExpression
174
+ def initialize(operand)
175
+ @operand = operand
176
+ end
177
+
178
+ def to_h
179
+ { "$abs" => @operand.to_h }
180
+ end
181
+ end
182
+
183
+ class ExpRankExpression < RankExpression
184
+ def initialize(operand)
185
+ @operand = operand
186
+ end
187
+
188
+ def to_h
189
+ { "$exp" => @operand.to_h }
190
+ end
191
+ end
192
+
193
+ class LogRankExpression < RankExpression
194
+ def initialize(operand)
195
+ @operand = operand
196
+ end
197
+
198
+ def to_h
199
+ { "$log" => @operand.to_h }
200
+ end
201
+ end
202
+
203
+ class MaxRankExpression < RankExpression
204
+ def initialize(ranks)
205
+ @ranks = ranks
206
+ end
207
+
208
+ def self.create(ranks)
209
+ flattened = []
210
+ ranks.each do |rank|
211
+ if rank.is_a?(MaxRankExpression)
212
+ flattened.concat(rank.operands)
213
+ else
214
+ flattened << rank
215
+ end
216
+ end
217
+ return flattened[0] if flattened.length == 1
218
+ MaxRankExpression.new(flattened)
219
+ end
220
+
221
+ def operands
222
+ @ranks.dup
223
+ end
224
+
225
+ def to_h
226
+ { "$max" => @ranks.map(&:to_h) }
227
+ end
228
+ end
229
+
230
+ class MinRankExpression < RankExpression
231
+ def initialize(ranks)
232
+ @ranks = ranks
233
+ end
234
+
235
+ def self.create(ranks)
236
+ flattened = []
237
+ ranks.each do |rank|
238
+ if rank.is_a?(MinRankExpression)
239
+ flattened.concat(rank.operands)
240
+ else
241
+ flattened << rank
242
+ end
243
+ end
244
+ return flattened[0] if flattened.length == 1
245
+ MinRankExpression.new(flattened)
246
+ end
247
+
248
+ def operands
249
+ @ranks.dup
250
+ end
251
+
252
+ def to_h
253
+ { "$min" => @ranks.map(&:to_h) }
254
+ end
255
+ end
256
+
257
+ class KnnRankExpression < RankExpression
258
+ def initialize(config)
259
+ @config = config
260
+ end
261
+
262
+ def to_h
263
+ base = {
264
+ "query" => @config[:query],
265
+ "key" => @config[:key],
266
+ "limit" => @config[:limit]
267
+ }
268
+ base["default"] = @config[:default_value] if @config.key?(:default_value)
269
+ base["return_rank"] = true if @config[:return_rank]
270
+ { "$knn" => base }
271
+ end
272
+ end
273
+
274
+ def self.Val(value)
275
+ ValueRankExpression.new(require_number(value, "Val requires a numeric value"))
276
+ end
277
+
278
+ def self.Knn(options)
279
+ KnnRankExpression.new(normalize_knn_options(options))
280
+ end
281
+
282
+ def self.Rrf(ranks:, k: 60, weights: nil, normalize: false)
283
+ unless k.is_a?(Integer) && k > 0
284
+ raise TypeError, "Rrf k must be a positive integer"
285
+ end
286
+ unless ranks.is_a?(Array) && !ranks.empty?
287
+ raise TypeError, "Rrf requires at least one rank expression"
288
+ end
289
+
290
+ expressions = ranks.map.with_index { |rank, index| require_rank(rank, "ranks[#{index}]") }
291
+
292
+ weight_values = weights ? weights.dup : Array.new(expressions.length, 1)
293
+ if weight_values.length != expressions.length
294
+ raise ArgumentError, "Number of weights must match number of ranks"
295
+ end
296
+ if weight_values.any? { |value| !value.is_a?(Numeric) || value.negative? }
297
+ raise TypeError, "Weights must be non-negative numbers"
298
+ end
299
+
300
+ if normalize
301
+ total = weight_values.reduce(0.0) { |sum, value| sum + value }
302
+ raise ArgumentError, "Weights must sum to a positive value when normalize=true" if total <= 0
303
+ weight_values = weight_values.map { |value| value / total }
304
+ end
305
+
306
+ terms = expressions.map.with_index do |rank, index|
307
+ weight = weight_values[index]
308
+ numerator = Val(weight)
309
+ denominator = rank.add(k)
310
+ numerator.divide(denominator)
311
+ end
312
+
313
+ fused = terms.reduce { |acc, term| acc.add(term) }
314
+ fused.negate
315
+ end
316
+
317
+ def self.Sum(*inputs)
318
+ raise ArgumentError, "Sum requires at least one rank expression" if inputs.empty?
319
+ expressions = inputs.map.with_index { |rank, index| require_rank(rank, "Sum operand #{index}") }
320
+ SumRankExpression.create(expressions)
321
+ end
322
+
323
+ def self.Sub(left, right)
324
+ SubRankExpression.new(require_rank(left, "Sub left"), require_rank(right, "Sub right"))
325
+ end
326
+
327
+ def self.Mul(*inputs)
328
+ raise ArgumentError, "Mul requires at least one rank expression" if inputs.empty?
329
+ expressions = inputs.map.with_index { |rank, index| require_rank(rank, "Mul operand #{index}") }
330
+ MulRankExpression.create(expressions)
331
+ end
332
+
333
+ def self.Div(left, right)
334
+ DivRankExpression.new(require_rank(left, "Div left"), require_rank(right, "Div right"))
335
+ end
336
+
337
+ def self.Abs(input)
338
+ require_rank(input, "Abs").abs
339
+ end
340
+
341
+ def self.Exp(input)
342
+ require_rank(input, "Exp").exp
343
+ end
344
+
345
+ def self.Log(input)
346
+ require_rank(input, "Log").log
347
+ end
348
+
349
+ def self.Max(*inputs)
350
+ raise ArgumentError, "Max requires at least one rank expression" if inputs.empty?
351
+ expressions = inputs.map.with_index { |rank, index| require_rank(rank, "Max operand #{index}") }
352
+ MaxRankExpression.create(expressions)
353
+ end
354
+
355
+ def self.Min(*inputs)
356
+ raise ArgumentError, "Min requires at least one rank expression" if inputs.empty?
357
+ expressions = inputs.map.with_index { |rank, index| require_rank(rank, "Min operand #{index}") }
358
+ MinRankExpression.create(expressions)
359
+ end
360
+
361
+ def self.require_rank(input, context)
362
+ result = RankExpression.from(input)
363
+ raise TypeError, "#{context} must be a rank expression" unless result
364
+ result
365
+ end
366
+
367
+ def self.require_number(value, message)
368
+ unless value.is_a?(Numeric) && value.finite?
369
+ raise TypeError, message
370
+ end
371
+ value.to_f
372
+ end
373
+
374
+ def self.normalize_knn_options(options)
375
+ limit = options[:limit] || options["limit"] || 128
376
+ unless limit.is_a?(Integer) && limit > 0
377
+ raise TypeError, "Knn limit must be a positive integer"
378
+ end
379
+
380
+ query_input = options[:query] || options["query"]
381
+ query = if query_input.is_a?(String)
382
+ query_input
383
+ elsif query_input.is_a?(Chroma::Types::SparseVector)
384
+ { "indices" => query_input.indices.dup, "values" => query_input.values.dup }
385
+ elsif query_input.is_a?(Hash) && query_input.key?(:indices) && query_input.key?(:values)
386
+ { "indices" => query_input[:indices].dup, "values" => query_input[:values].dup }
387
+ else
388
+ normalize_dense_vector(query_input)
389
+ end
390
+
391
+ key_input = options[:key] || options["key"]
392
+ key = key_input.respond_to?(:name) ? key_input.name : key_input
393
+ key ||= "#embedding"
394
+ raise TypeError, "Knn key must be a String or Key instance" unless key.is_a?(String)
395
+
396
+ default_value = options.key?(:default) ? options[:default] : options["default"]
397
+ if !default_value.nil?
398
+ default_value = require_number(default_value, "Knn default must be a number")
399
+ raise TypeError, "Knn default must be a finite number" unless default_value.finite?
400
+ end
401
+
402
+ return_rank = options.key?(:returnRank) ? options[:returnRank] : options["returnRank"]
403
+ return_rank = options.key?(:return_rank) ? options[:return_rank] : return_rank
404
+
405
+ {
406
+ query: query.is_a?(Hash) ? Marshal.load(Marshal.dump(query)) : query,
407
+ key: key,
408
+ limit: limit,
409
+ return_rank: !!return_rank
410
+ }.tap do |config|
411
+ config[:default_value] = default_value unless default_value.nil?
412
+ end
413
+ end
414
+
415
+ def self.normalize_dense_vector(vector)
416
+ array = vector.is_a?(Array) ? vector.dup : vector.to_a
417
+ array.map do |value|
418
+ unless value.is_a?(Numeric) && value.finite?
419
+ raise TypeError, "Dense query vector values must be finite numbers"
420
+ end
421
+ value.to_f
422
+ end
423
+ end
424
+ end
425
+ end