chromadb-experimental 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/chromadb/admin_client.rb +6 -0
- data/lib/chromadb/client.rb +317 -0
- data/lib/chromadb/collection.rb +573 -0
- data/lib/chromadb/embedding_functions/chroma_bm25.rb +459 -0
- data/lib/chromadb/embedding_functions/chroma_cloud_qwen.rb +139 -0
- data/lib/chromadb/embedding_functions/chroma_cloud_splade.rb +121 -0
- data/lib/chromadb/embedding_functions.rb +121 -0
- data/lib/chromadb/errors.rb +120 -0
- data/lib/chromadb/http_client.rb +142 -0
- data/lib/chromadb/openapi/lib/chromadb/api/default_api.rb +2349 -0
- data/lib/chromadb/openapi/lib/chromadb/api_client.rb +392 -0
- data/lib/chromadb/openapi/lib/chromadb/api_error.rb +58 -0
- data/lib/chromadb/openapi/lib/chromadb/configuration.rb +295 -0
- data/lib/chromadb/openapi/lib/chromadb/models/add_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attach_function_request.rb +250 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attach_function_response.rb +235 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attached_function_api_response.rb +361 -0
- data/lib/chromadb/openapi/lib/chromadb/models/attached_function_info.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/bool_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/bool_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/checklist_response.rb +245 -0
- data/lib/chromadb/openapi/lib/chromadb/models/collection.rb +315 -0
- data/lib/chromadb/openapi/lib/chromadb/models/collection_configuration.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_collection_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_database_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/create_tenant_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/database.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/detach_function_request.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/detach_function_response.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/embedding_function_new_configuration.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/error_response.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_list_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/float_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/fork_collection_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/fts_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_attached_function_response.rb +224 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_response.rb +270 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_tenant_response.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/get_user_identity_response.rb +246 -0
- data/lib/chromadb/openapi/lib/chromadb/models/heartbeat_response.rb +235 -0
- data/lib/chromadb/openapi/lib/chromadb/models/hnsw_configuration.rb +330 -0
- data/lib/chromadb/openapi/lib/chromadb/models/hnsw_index_config.rb +371 -0
- data/lib/chromadb/openapi/lib/chromadb/models/include.rb +210 -0
- data/lib/chromadb/openapi/lib/chromadb/models/int_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/int_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/query_response.rb +280 -0
- data/lib/chromadb/openapi/lib/chromadb/models/raw_where_fields.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/schema.rb +258 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload.rb +256 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_filter.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_group_by.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_limit.rb +230 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_payload_select.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_request_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/search_response.rb +270 -0
- data/lib/chromadb/openapi/lib/chromadb/models/space.rb +210 -0
- data/lib/chromadb/openapi/lib/chromadb/models/spann_configuration.rb +420 -0
- data/lib/chromadb/openapi/lib/chromadb/models/spann_index_config.rb +536 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector.rb +244 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_config.rb +242 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_index_type.rb +234 -0
- data/lib/chromadb/openapi/lib/chromadb/models/sparse_vector_value_type.rb +221 -0
- data/lib/chromadb/openapi/lib/chromadb/models/string_inverted_index_type.rb +229 -0
- data/lib/chromadb/openapi/lib/chromadb/models/string_value_type.rb +231 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_configuration.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_payload.rb +240 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_hnsw_configuration.rb +345 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_spann_configuration.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/update_tenant_payload.rb +220 -0
- data/lib/chromadb/openapi/lib/chromadb/models/upsert_collection_records_payload.rb +260 -0
- data/lib/chromadb/openapi/lib/chromadb/models/value_types.rb +271 -0
- data/lib/chromadb/openapi/lib/chromadb/models/vector_index_config.rb +261 -0
- data/lib/chromadb/openapi/lib/chromadb/models/vector_index_type.rb +234 -0
- data/lib/chromadb/openapi/lib/chromadb/version.rb +15 -0
- data/lib/chromadb/openapi/lib/chromadb.rb +102 -0
- data/lib/chromadb/openapi.rb +6 -0
- data/lib/chromadb/schema.rb +744 -0
- data/lib/chromadb/schemas/chroma-cloud-qwen.json +61 -0
- data/lib/chromadb/schemas/chroma-cloud-splade.json +31 -0
- data/lib/chromadb/schemas/chroma_bm25.json +37 -0
- data/lib/chromadb/search/key.rb +94 -0
- data/lib/chromadb/search/limit.rb +41 -0
- data/lib/chromadb/search/rank.rb +425 -0
- data/lib/chromadb/search/search.rb +73 -0
- data/lib/chromadb/search/select.rb +54 -0
- data/lib/chromadb/search/where.rb +157 -0
- data/lib/chromadb/search.rb +8 -0
- data/lib/chromadb/types/results.rb +96 -0
- data/lib/chromadb/types/sparse_vector.rb +86 -0
- data/lib/chromadb/types/validation.rb +519 -0
- data/lib/chromadb/types.rb +13 -0
- data/lib/chromadb/version.rb +5 -0
- data/lib/chromadb.rb +15 -0
- metadata +233 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"title": "Chroma Cloud Qwen Embedding Function Schema",
|
|
4
|
+
"description": "Schema for the Chroma Cloud Qwen embedding function configuration",
|
|
5
|
+
"version": "1.0.0",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"model": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"enum": [
|
|
11
|
+
"Qwen/Qwen3-Embedding-0.6B"
|
|
12
|
+
],
|
|
13
|
+
"description": "The specific Qwen model to use for embeddings"
|
|
14
|
+
},
|
|
15
|
+
"task": {
|
|
16
|
+
"type": [
|
|
17
|
+
"string",
|
|
18
|
+
"null"
|
|
19
|
+
],
|
|
20
|
+
"description": "The task for which embeddings are being generated. If null or empty, empty instructions will be used."
|
|
21
|
+
},
|
|
22
|
+
"instructions": {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"description": "A mapping of tasks to instructions for targets (documents/queries)",
|
|
25
|
+
"properties": {
|
|
26
|
+
"nl_to_code": {
|
|
27
|
+
"type": "object",
|
|
28
|
+
"properties": {
|
|
29
|
+
"documents": {
|
|
30
|
+
"type": "string",
|
|
31
|
+
"description": "Instructions for embedding documents"
|
|
32
|
+
},
|
|
33
|
+
"query": {
|
|
34
|
+
"type": "string",
|
|
35
|
+
"description": "Instructions for embedding queries"
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"required": [
|
|
39
|
+
"documents",
|
|
40
|
+
"query"
|
|
41
|
+
],
|
|
42
|
+
"additionalProperties": false
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
"required": [
|
|
46
|
+
"nl_to_code"
|
|
47
|
+
],
|
|
48
|
+
"additionalProperties": false
|
|
49
|
+
},
|
|
50
|
+
"api_key_env_var": {
|
|
51
|
+
"type": "string",
|
|
52
|
+
"description": "Environment variable name that contains your API key for the Chroma Embedding API",
|
|
53
|
+
"default": "CHROMA_API_KEY"
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
"required": [
|
|
57
|
+
"model",
|
|
58
|
+
"task"
|
|
59
|
+
],
|
|
60
|
+
"additionalProperties": false
|
|
61
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"title": "Chroma Cloud Splade Embedding Function Schema",
|
|
4
|
+
"description": "Schema for the Chroma Cloud Splade sparse embedding function configuration",
|
|
5
|
+
"version": "1.0.0",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"model": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"enum": [
|
|
11
|
+
"prithivida/Splade_PP_en_v1"
|
|
12
|
+
],
|
|
13
|
+
"description": "The specific Splade model to use for sparse embeddings"
|
|
14
|
+
},
|
|
15
|
+
"api_key_env_var": {
|
|
16
|
+
"type": "string",
|
|
17
|
+
"description": "Environment variable name that contains your API key for the Chroma Embedding API",
|
|
18
|
+
"default": "CHROMA_API_KEY"
|
|
19
|
+
},
|
|
20
|
+
"include_tokens": {
|
|
21
|
+
"type": "boolean",
|
|
22
|
+
"description": "Whether to store token labels in the sparse vector output",
|
|
23
|
+
"default": false
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"required": [
|
|
27
|
+
"api_key_env_var",
|
|
28
|
+
"model"
|
|
29
|
+
],
|
|
30
|
+
"additionalProperties": false
|
|
31
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"title": "Chroma BM25 Embedding Function Schema",
|
|
4
|
+
"description": "Schema for the Chroma BM25 sparse embedding function configuration",
|
|
5
|
+
"version": "1.0.0",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"k": {
|
|
9
|
+
"type": "number",
|
|
10
|
+
"description": "BM25 saturation parameter controlling term frequency scaling"
|
|
11
|
+
},
|
|
12
|
+
"b": {
|
|
13
|
+
"type": "number",
|
|
14
|
+
"description": "BM25 length normalization parameter"
|
|
15
|
+
},
|
|
16
|
+
"avg_doc_length": {
|
|
17
|
+
"type": "number",
|
|
18
|
+
"description": "Average document length in tokens used for normalization"
|
|
19
|
+
},
|
|
20
|
+
"token_max_length": {
|
|
21
|
+
"type": "number",
|
|
22
|
+
"description": "Maximum token length allowed before filtering"
|
|
23
|
+
},
|
|
24
|
+
"stopwords": {
|
|
25
|
+
"type": "array",
|
|
26
|
+
"description": "Optional custom stopword list (in lowercase) to override the defaults",
|
|
27
|
+
"items": {
|
|
28
|
+
"type": "string"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"include_tokens": {
|
|
32
|
+
"type": "boolean",
|
|
33
|
+
"description": "Whether to store token strings in the sparse vectors (default: true)"
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"additionalProperties": false
|
|
37
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Chroma
|
|
4
|
+
module Search
|
|
5
|
+
class Key
|
|
6
|
+
attr_reader :name
|
|
7
|
+
|
|
8
|
+
def initialize(name)
|
|
9
|
+
@name = name
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def eq(value)
|
|
13
|
+
WhereExpression.create_comparison(@name, "$eq", value)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def ne(value)
|
|
17
|
+
WhereExpression.create_comparison(@name, "$ne", value)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def gt(value)
|
|
21
|
+
WhereExpression.create_comparison(@name, "$gt", value)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def gte(value)
|
|
25
|
+
WhereExpression.create_comparison(@name, "$gte", value)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def lt(value)
|
|
29
|
+
WhereExpression.create_comparison(@name, "$lt", value)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def lte(value)
|
|
33
|
+
WhereExpression.create_comparison(@name, "$lte", value)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def is_in(values)
|
|
37
|
+
array = iterable_to_array(values)
|
|
38
|
+
assert_non_empty(array, "$in requires at least one value")
|
|
39
|
+
WhereExpression.create_comparison(@name, "$in", array)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def not_in(values)
|
|
43
|
+
array = iterable_to_array(values)
|
|
44
|
+
assert_non_empty(array, "$nin requires at least one value")
|
|
45
|
+
WhereExpression.create_comparison(@name, "$nin", array)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def contains(value)
|
|
49
|
+
raise TypeError, "$contains requires a string value" unless value.is_a?(String)
|
|
50
|
+
WhereExpression.create_comparison(@name, "$contains", value)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def not_contains(value)
|
|
54
|
+
raise TypeError, "$not_contains requires a string value" unless value.is_a?(String)
|
|
55
|
+
WhereExpression.create_comparison(@name, "$not_contains", value)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def regex(pattern)
|
|
59
|
+
raise TypeError, "$regex requires a string pattern" unless pattern.is_a?(String)
|
|
60
|
+
WhereExpression.create_comparison(@name, "$regex", pattern)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def not_regex(pattern)
|
|
64
|
+
raise TypeError, "$not_regex requires a string pattern" unless pattern.is_a?(String)
|
|
65
|
+
WhereExpression.create_comparison(@name, "$not_regex", pattern)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def iterable_to_array(values)
|
|
71
|
+
return values.to_a if values.respond_to?(:to_a)
|
|
72
|
+
Array(values)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def assert_non_empty(array, message)
|
|
76
|
+
raise ArgumentError, message if array.empty?
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
module K
|
|
81
|
+
ID = Key.new("#id")
|
|
82
|
+
DOCUMENT = Key.new("#document")
|
|
83
|
+
EMBEDDING = Key.new("#embedding")
|
|
84
|
+
METADATA = Key.new("#metadata")
|
|
85
|
+
SCORE = Key.new("#score")
|
|
86
|
+
|
|
87
|
+
module_function
|
|
88
|
+
|
|
89
|
+
def [](name)
|
|
90
|
+
Key.new(name)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Chroma
|
|
4
|
+
module Search
|
|
5
|
+
class Limit
|
|
6
|
+
attr_reader :offset, :limit
|
|
7
|
+
|
|
8
|
+
def initialize(offset: 0, limit: nil)
|
|
9
|
+
unless offset.is_a?(Integer) && offset >= 0
|
|
10
|
+
raise TypeError, "Limit offset must be a non-negative integer"
|
|
11
|
+
end
|
|
12
|
+
if !limit.nil?
|
|
13
|
+
unless limit.is_a?(Integer) && limit > 0
|
|
14
|
+
raise TypeError, "Limit must be a positive integer when provided"
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
@offset = offset
|
|
18
|
+
@limit = limit
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.from(input, offset_override = nil)
|
|
22
|
+
return Limit.new(offset: input.offset, limit: input.limit) if input.is_a?(Limit)
|
|
23
|
+
if input.is_a?(Numeric)
|
|
24
|
+
return Limit.new(limit: input.to_i, offset: offset_override || 0)
|
|
25
|
+
end
|
|
26
|
+
return Limit.new if input.nil?
|
|
27
|
+
if input.is_a?(Hash)
|
|
28
|
+
return Limit.new(offset: input[:offset] || input["offset"] || 0,
|
|
29
|
+
limit: input[:limit] || input["limit"])
|
|
30
|
+
end
|
|
31
|
+
raise TypeError, "Invalid limit input"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def to_h
|
|
35
|
+
result = { "offset" => @offset }
|
|
36
|
+
result["limit"] = @limit if @limit
|
|
37
|
+
result
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Chroma
|
|
4
|
+
module Search
|
|
5
|
+
class RankExpression
|
|
6
|
+
def to_h
|
|
7
|
+
raise NotImplementedError
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def add(*others)
|
|
11
|
+
return self if others.empty?
|
|
12
|
+
expressions = [ self ] + others.map.with_index { |item, idx| ::Chroma::Search.require_rank(item, "add operand #{idx}") }
|
|
13
|
+
SumRankExpression.create(expressions)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def subtract(other)
|
|
17
|
+
SubRankExpression.new(self, ::Chroma::Search.require_rank(other, "subtract operand"))
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def multiply(*others)
|
|
21
|
+
return self if others.empty?
|
|
22
|
+
expressions = [ self ] + others.map.with_index { |item, idx| ::Chroma::Search.require_rank(item, "multiply operand #{idx}") }
|
|
23
|
+
MulRankExpression.create(expressions)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def divide(other)
|
|
27
|
+
DivRankExpression.new(self, ::Chroma::Search.require_rank(other, "divide operand"))
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def negate
|
|
31
|
+
multiply(-1)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def abs
|
|
35
|
+
AbsRankExpression.new(self)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def exp
|
|
39
|
+
ExpRankExpression.new(self)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def log
|
|
43
|
+
LogRankExpression.new(self)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def max(*others)
|
|
47
|
+
return self if others.empty?
|
|
48
|
+
expressions = [ self ] + others.map.with_index { |item, idx| ::Chroma::Search.require_rank(item, "max operand #{idx}") }
|
|
49
|
+
MaxRankExpression.create(expressions)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def min(*others)
|
|
53
|
+
return self if others.empty?
|
|
54
|
+
expressions = [ self ] + others.map.with_index { |item, idx| ::Chroma::Search.require_rank(item, "min operand #{idx}") }
|
|
55
|
+
MinRankExpression.create(expressions)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def self.from(input)
|
|
59
|
+
return input if input.is_a?(RankExpression)
|
|
60
|
+
return nil if input.nil?
|
|
61
|
+
if input.is_a?(Numeric)
|
|
62
|
+
return ValueRankExpression.new(::Chroma::Search.require_number(input, "Val requires a numeric value"))
|
|
63
|
+
end
|
|
64
|
+
if input.is_a?(Hash)
|
|
65
|
+
return RawRankExpression.new(input)
|
|
66
|
+
end
|
|
67
|
+
raise TypeError, "Rank input must be a RankExpression, number, or Hash"
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class RawRankExpression < RankExpression
|
|
72
|
+
def initialize(raw)
|
|
73
|
+
@raw = raw
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def to_h
|
|
77
|
+
deep_clone(@raw)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def deep_clone(value)
|
|
83
|
+
Marshal.load(Marshal.dump(value))
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
class ValueRankExpression < RankExpression
|
|
88
|
+
def initialize(value)
|
|
89
|
+
@value = value
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def to_h
|
|
93
|
+
{ "$val" => @value }
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
class SumRankExpression < RankExpression
|
|
98
|
+
def initialize(ranks)
|
|
99
|
+
@ranks = ranks
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def self.create(ranks)
|
|
103
|
+
flattened = []
|
|
104
|
+
ranks.each do |rank|
|
|
105
|
+
if rank.is_a?(SumRankExpression)
|
|
106
|
+
flattened.concat(rank.operands)
|
|
107
|
+
else
|
|
108
|
+
flattened << rank
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
return flattened[0] if flattened.length == 1
|
|
112
|
+
SumRankExpression.new(flattened)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def operands
|
|
116
|
+
@ranks.dup
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def to_h
|
|
120
|
+
{ "$sum" => @ranks.map(&:to_h) }
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
class SubRankExpression < RankExpression
|
|
125
|
+
def initialize(left, right)
|
|
126
|
+
@left = left
|
|
127
|
+
@right = right
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def to_h
|
|
131
|
+
{ "$sub" => { "left" => @left.to_h, "right" => @right.to_h } }
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
class MulRankExpression < RankExpression
|
|
136
|
+
def initialize(ranks)
|
|
137
|
+
@ranks = ranks
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def self.create(ranks)
|
|
141
|
+
flattened = []
|
|
142
|
+
ranks.each do |rank|
|
|
143
|
+
if rank.is_a?(MulRankExpression)
|
|
144
|
+
flattened.concat(rank.operands)
|
|
145
|
+
else
|
|
146
|
+
flattened << rank
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
return flattened[0] if flattened.length == 1
|
|
150
|
+
MulRankExpression.new(flattened)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def operands
|
|
154
|
+
@ranks.dup
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def to_h
|
|
158
|
+
{ "$mul" => @ranks.map(&:to_h) }
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
class DivRankExpression < RankExpression
|
|
163
|
+
def initialize(left, right)
|
|
164
|
+
@left = left
|
|
165
|
+
@right = right
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def to_h
|
|
169
|
+
{ "$div" => { "left" => @left.to_h, "right" => @right.to_h } }
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
class AbsRankExpression < RankExpression
|
|
174
|
+
def initialize(operand)
|
|
175
|
+
@operand = operand
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def to_h
|
|
179
|
+
{ "$abs" => @operand.to_h }
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
class ExpRankExpression < RankExpression
|
|
184
|
+
def initialize(operand)
|
|
185
|
+
@operand = operand
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def to_h
|
|
189
|
+
{ "$exp" => @operand.to_h }
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
class LogRankExpression < RankExpression
|
|
194
|
+
def initialize(operand)
|
|
195
|
+
@operand = operand
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def to_h
|
|
199
|
+
{ "$log" => @operand.to_h }
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
class MaxRankExpression < RankExpression
|
|
204
|
+
def initialize(ranks)
|
|
205
|
+
@ranks = ranks
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def self.create(ranks)
|
|
209
|
+
flattened = []
|
|
210
|
+
ranks.each do |rank|
|
|
211
|
+
if rank.is_a?(MaxRankExpression)
|
|
212
|
+
flattened.concat(rank.operands)
|
|
213
|
+
else
|
|
214
|
+
flattened << rank
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
return flattened[0] if flattened.length == 1
|
|
218
|
+
MaxRankExpression.new(flattened)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def operands
|
|
222
|
+
@ranks.dup
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def to_h
|
|
226
|
+
{ "$max" => @ranks.map(&:to_h) }
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
class MinRankExpression < RankExpression
|
|
231
|
+
def initialize(ranks)
|
|
232
|
+
@ranks = ranks
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def self.create(ranks)
|
|
236
|
+
flattened = []
|
|
237
|
+
ranks.each do |rank|
|
|
238
|
+
if rank.is_a?(MinRankExpression)
|
|
239
|
+
flattened.concat(rank.operands)
|
|
240
|
+
else
|
|
241
|
+
flattened << rank
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
return flattened[0] if flattened.length == 1
|
|
245
|
+
MinRankExpression.new(flattened)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def operands
|
|
249
|
+
@ranks.dup
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def to_h
|
|
253
|
+
{ "$min" => @ranks.map(&:to_h) }
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
class KnnRankExpression < RankExpression
|
|
258
|
+
def initialize(config)
|
|
259
|
+
@config = config
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def to_h
|
|
263
|
+
base = {
|
|
264
|
+
"query" => @config[:query],
|
|
265
|
+
"key" => @config[:key],
|
|
266
|
+
"limit" => @config[:limit]
|
|
267
|
+
}
|
|
268
|
+
base["default"] = @config[:default_value] if @config.key?(:default_value)
|
|
269
|
+
base["return_rank"] = true if @config[:return_rank]
|
|
270
|
+
{ "$knn" => base }
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def self.Val(value)
|
|
275
|
+
ValueRankExpression.new(require_number(value, "Val requires a numeric value"))
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def self.Knn(options)
|
|
279
|
+
KnnRankExpression.new(normalize_knn_options(options))
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def self.Rrf(ranks:, k: 60, weights: nil, normalize: false)
|
|
283
|
+
unless k.is_a?(Integer) && k > 0
|
|
284
|
+
raise TypeError, "Rrf k must be a positive integer"
|
|
285
|
+
end
|
|
286
|
+
unless ranks.is_a?(Array) && !ranks.empty?
|
|
287
|
+
raise TypeError, "Rrf requires at least one rank expression"
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
expressions = ranks.map.with_index { |rank, index| require_rank(rank, "ranks[#{index}]") }
|
|
291
|
+
|
|
292
|
+
weight_values = weights ? weights.dup : Array.new(expressions.length, 1)
|
|
293
|
+
if weight_values.length != expressions.length
|
|
294
|
+
raise ArgumentError, "Number of weights must match number of ranks"
|
|
295
|
+
end
|
|
296
|
+
if weight_values.any? { |value| !value.is_a?(Numeric) || value.negative? }
|
|
297
|
+
raise TypeError, "Weights must be non-negative numbers"
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
if normalize
|
|
301
|
+
total = weight_values.reduce(0.0) { |sum, value| sum + value }
|
|
302
|
+
raise ArgumentError, "Weights must sum to a positive value when normalize=true" if total <= 0
|
|
303
|
+
weight_values = weight_values.map { |value| value / total }
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
terms = expressions.map.with_index do |rank, index|
|
|
307
|
+
weight = weight_values[index]
|
|
308
|
+
numerator = Val(weight)
|
|
309
|
+
denominator = rank.add(k)
|
|
310
|
+
numerator.divide(denominator)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
fused = terms.reduce { |acc, term| acc.add(term) }
|
|
314
|
+
fused.negate
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def self.Sum(*inputs)
|
|
318
|
+
raise ArgumentError, "Sum requires at least one rank expression" if inputs.empty?
|
|
319
|
+
expressions = inputs.map.with_index { |rank, index| require_rank(rank, "Sum operand #{index}") }
|
|
320
|
+
SumRankExpression.create(expressions)
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def self.Sub(left, right)
|
|
324
|
+
SubRankExpression.new(require_rank(left, "Sub left"), require_rank(right, "Sub right"))
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def self.Mul(*inputs)
|
|
328
|
+
raise ArgumentError, "Mul requires at least one rank expression" if inputs.empty?
|
|
329
|
+
expressions = inputs.map.with_index { |rank, index| require_rank(rank, "Mul operand #{index}") }
|
|
330
|
+
MulRankExpression.create(expressions)
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def self.Div(left, right)
|
|
334
|
+
DivRankExpression.new(require_rank(left, "Div left"), require_rank(right, "Div right"))
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
def self.Abs(input)
|
|
338
|
+
require_rank(input, "Abs").abs
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def self.Exp(input)
|
|
342
|
+
require_rank(input, "Exp").exp
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def self.Log(input)
|
|
346
|
+
require_rank(input, "Log").log
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def self.Max(*inputs)
|
|
350
|
+
raise ArgumentError, "Max requires at least one rank expression" if inputs.empty?
|
|
351
|
+
expressions = inputs.map.with_index { |rank, index| require_rank(rank, "Max operand #{index}") }
|
|
352
|
+
MaxRankExpression.create(expressions)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def self.Min(*inputs)
|
|
356
|
+
raise ArgumentError, "Min requires at least one rank expression" if inputs.empty?
|
|
357
|
+
expressions = inputs.map.with_index { |rank, index| require_rank(rank, "Min operand #{index}") }
|
|
358
|
+
MinRankExpression.create(expressions)
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def self.require_rank(input, context)
|
|
362
|
+
result = RankExpression.from(input)
|
|
363
|
+
raise TypeError, "#{context} must be a rank expression" unless result
|
|
364
|
+
result
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def self.require_number(value, message)
|
|
368
|
+
unless value.is_a?(Numeric) && value.finite?
|
|
369
|
+
raise TypeError, message
|
|
370
|
+
end
|
|
371
|
+
value.to_f
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def self.normalize_knn_options(options)
|
|
375
|
+
limit = options[:limit] || options["limit"] || 128
|
|
376
|
+
unless limit.is_a?(Integer) && limit > 0
|
|
377
|
+
raise TypeError, "Knn limit must be a positive integer"
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
query_input = options[:query] || options["query"]
|
|
381
|
+
query = if query_input.is_a?(String)
|
|
382
|
+
query_input
|
|
383
|
+
elsif query_input.is_a?(Chroma::Types::SparseVector)
|
|
384
|
+
{ "indices" => query_input.indices.dup, "values" => query_input.values.dup }
|
|
385
|
+
elsif query_input.is_a?(Hash) && query_input.key?(:indices) && query_input.key?(:values)
|
|
386
|
+
{ "indices" => query_input[:indices].dup, "values" => query_input[:values].dup }
|
|
387
|
+
else
|
|
388
|
+
normalize_dense_vector(query_input)
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
key_input = options[:key] || options["key"]
|
|
392
|
+
key = key_input.respond_to?(:name) ? key_input.name : key_input
|
|
393
|
+
key ||= "#embedding"
|
|
394
|
+
raise TypeError, "Knn key must be a String or Key instance" unless key.is_a?(String)
|
|
395
|
+
|
|
396
|
+
default_value = options.key?(:default) ? options[:default] : options["default"]
|
|
397
|
+
if !default_value.nil?
|
|
398
|
+
default_value = require_number(default_value, "Knn default must be a number")
|
|
399
|
+
raise TypeError, "Knn default must be a finite number" unless default_value.finite?
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
return_rank = options.key?(:returnRank) ? options[:returnRank] : options["returnRank"]
|
|
403
|
+
return_rank = options.key?(:return_rank) ? options[:return_rank] : return_rank
|
|
404
|
+
|
|
405
|
+
{
|
|
406
|
+
query: query.is_a?(Hash) ? Marshal.load(Marshal.dump(query)) : query,
|
|
407
|
+
key: key,
|
|
408
|
+
limit: limit,
|
|
409
|
+
return_rank: !!return_rank
|
|
410
|
+
}.tap do |config|
|
|
411
|
+
config[:default_value] = default_value unless default_value.nil?
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
def self.normalize_dense_vector(vector)
|
|
416
|
+
array = vector.is_a?(Array) ? vector.dup : vector.to_a
|
|
417
|
+
array.map do |value|
|
|
418
|
+
unless value.is_a?(Numeric) && value.finite?
|
|
419
|
+
raise TypeError, "Dense query vector values must be finite numbers"
|
|
420
|
+
end
|
|
421
|
+
value.to_f
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
end
|