rag-ruby 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/MILESTONES.md +12 -0
- data/lib/rag_ruby/configuration.rb +53 -2
- data/lib/rag_ruby/embedders/hugging_face.rb +61 -0
- data/lib/rag_ruby/embedders/ollama.rb +39 -0
- data/lib/rag_ruby/embedders/voyage.rb +48 -0
- data/lib/rag_ruby/generators/anthropic.rb +83 -0
- data/lib/rag_ruby/generators/gemini.rb +80 -0
- data/lib/rag_ruby/generators/ollama.rb +57 -0
- data/lib/rag_ruby/pipeline.rb +30 -2
- data/lib/rag_ruby/stores/memory.rb +46 -4
- data/lib/rag_ruby/version.rb +1 -1
- data/lib/rag_ruby.rb +7 -0
- metadata +8 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 415d46447fc599932fe8f23891c018c4d31fe99de8624efb9173a33fb2fff8c6
|
|
4
|
+
data.tar.gz: 46b603041b9a0078a22de67ff1d4f6c283c4b5a3809864932ac2e13b700d05ac
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cda111927e7b402a0a3d66d1d7a5581333f21175421a3f738099f612ae289b6f64b41fc79b9c57c78eb0421a4f1c9b90b97798f07720bb3b4118ab71ea19d65b
|
|
7
|
+
data.tar.gz: c0d88360302da40b8dde125c3ae4df4894a402e6abd809f81ed0baabc98544f9c128b5c5e75e4deb1d2e77c56f123379b0c1b03755a932cfa5045373a7bd8a17
|
data/MILESTONES.md
ADDED
|
@@ -12,7 +12,10 @@ module RagRuby
|
|
|
12
12
|
EMBEDDER_REGISTRY = {
|
|
13
13
|
openai: ->(opts) { Embedders::OpenAI.new(**opts) },
|
|
14
14
|
onnx: ->(opts) { Embedders::Onnx.new(**opts) },
|
|
15
|
-
cohere: ->(opts) { Embedders::Cohere.new(**opts) }
|
|
15
|
+
cohere: ->(opts) { Embedders::Cohere.new(**opts) },
|
|
16
|
+
voyage: ->(opts) { Embedders::Voyage.new(**opts) },
|
|
17
|
+
ollama: ->(opts) { Embedders::Ollama.new(**opts) },
|
|
18
|
+
hugging_face: ->(opts) { Embedders::HuggingFace.new(**opts) }
|
|
16
19
|
}.freeze
|
|
17
20
|
|
|
18
21
|
STORE_REGISTRY = {
|
|
@@ -22,11 +25,30 @@ module RagRuby
|
|
|
22
25
|
|
|
23
26
|
GENERATOR_REGISTRY = {
|
|
24
27
|
openai: ->(opts) { Generators::OpenAI.new(**opts) },
|
|
25
|
-
ruby_llm: ->(opts) { Generators::RubyLLM.new(**opts) }
|
|
28
|
+
ruby_llm: ->(opts) { Generators::RubyLLM.new(**opts) },
|
|
29
|
+
anthropic: ->(opts) { Generators::Anthropic.new(**opts) },
|
|
30
|
+
gemini: ->(opts) { Generators::Gemini.new(**opts) },
|
|
31
|
+
ollama: ->(opts) { Generators::Ollama.new(**opts) }
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
# Maps API key env vars / prefixes to provider symbols for auto-detection
|
|
35
|
+
AUTO_DETECT_EMBEDDER = {
|
|
36
|
+
"VOYAGE_API_KEY" => :voyage,
|
|
37
|
+
"COHERE_API_KEY" => :cohere,
|
|
38
|
+
"HUGGINGFACE_API_KEY" => :hugging_face,
|
|
39
|
+
"OPENAI_API_KEY" => :openai
|
|
40
|
+
}.freeze
|
|
41
|
+
|
|
42
|
+
AUTO_DETECT_GENERATOR = {
|
|
43
|
+
"ANTHROPIC_API_KEY" => :anthropic,
|
|
44
|
+
"GEMINI_API_KEY" => :gemini,
|
|
45
|
+
"OPENAI_API_KEY" => :openai
|
|
26
46
|
}.freeze
|
|
27
47
|
|
|
28
48
|
attr_accessor :loader_instance, :embedder_instance, :store_instance, :generator_instance,
|
|
49
|
+
:reranker_instance,
|
|
29
50
|
:chunk_size, :chunk_overlap, :chunk_strategy,
|
|
51
|
+
:retrieval_strategy, :mmr_lambda, :mmr_fetch_k,
|
|
30
52
|
:http_timeout, :read_timeout
|
|
31
53
|
|
|
32
54
|
def initialize
|
|
@@ -34,6 +56,9 @@ module RagRuby
|
|
|
34
56
|
@chunk_size = 1000
|
|
35
57
|
@chunk_overlap = 200
|
|
36
58
|
@chunk_strategy = :recursive_character
|
|
59
|
+
@retrieval_strategy = :similarity
|
|
60
|
+
@mmr_lambda = 0.5
|
|
61
|
+
@mmr_fetch_k = 20
|
|
37
62
|
@http_timeout = 30
|
|
38
63
|
@read_timeout = 60
|
|
39
64
|
end
|
|
@@ -82,6 +107,16 @@ module RagRuby
|
|
|
82
107
|
end
|
|
83
108
|
end
|
|
84
109
|
|
|
110
|
+
def reranker(instance)
|
|
111
|
+
@reranker_instance = instance
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def retrieval(strategy, lambda: nil, fetch_k: nil)
|
|
115
|
+
@retrieval_strategy = strategy
|
|
116
|
+
@mmr_lambda = lambda if lambda
|
|
117
|
+
@mmr_fetch_k = fetch_k if fetch_k
|
|
118
|
+
end
|
|
119
|
+
|
|
85
120
|
def on(event, &block)
|
|
86
121
|
@callbacks[event] << block
|
|
87
122
|
end
|
|
@@ -89,5 +124,21 @@ module RagRuby
|
|
|
89
124
|
def callbacks_for(event)
|
|
90
125
|
@callbacks[event]
|
|
91
126
|
end
|
|
127
|
+
|
|
128
|
+
# Auto-detect embedder from available API keys
|
|
129
|
+
def self.detect_embedder
|
|
130
|
+
AUTO_DETECT_EMBEDDER.each do |env_var, provider|
|
|
131
|
+
return provider if ENV[env_var] && !ENV[env_var].empty?
|
|
132
|
+
end
|
|
133
|
+
nil
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Auto-detect generator from available API keys
|
|
137
|
+
def self.detect_generator
|
|
138
|
+
AUTO_DETECT_GENERATOR.each do |env_var, provider|
|
|
139
|
+
return provider if ENV[env_var] && !ENV[env_var].empty?
|
|
140
|
+
end
|
|
141
|
+
nil
|
|
142
|
+
end
|
|
92
143
|
end
|
|
93
144
|
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
|
|
7
|
+
module RagRuby
|
|
8
|
+
module Embedders
|
|
9
|
+
class HuggingFace < Base
|
|
10
|
+
ENDPOINT = "https://api-inference.huggingface.co/pipeline/feature-extraction"
|
|
11
|
+
|
|
12
|
+
def initialize(model: "sentence-transformers/all-MiniLM-L6-v2", api_key: nil)
|
|
13
|
+
@model = model
|
|
14
|
+
@api_key = api_key || ENV["HUGGINGFACE_API_KEY"]
|
|
15
|
+
raise ArgumentError, "HuggingFace API key is required (set HUGGINGFACE_API_KEY or pass api_key:)" unless @api_key
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def embed(text)
|
|
19
|
+
embed_batch([text]).first
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def embed_batch(texts)
|
|
23
|
+
uri = URI.parse("#{ENDPOINT}/#{@model}")
|
|
24
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
25
|
+
http.use_ssl = true
|
|
26
|
+
http.open_timeout = 30
|
|
27
|
+
http.read_timeout = 120
|
|
28
|
+
|
|
29
|
+
req = Net::HTTP::Post.new(uri)
|
|
30
|
+
req["Authorization"] = "Bearer #{@api_key}"
|
|
31
|
+
req["Content-Type"] = "application/json"
|
|
32
|
+
req.body = JSON.generate(inputs: texts)
|
|
33
|
+
|
|
34
|
+
response = http.request(req)
|
|
35
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
36
|
+
raise RagRuby::Error, "HuggingFace API error (#{response.code}): #{response.body}"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
embeddings = JSON.parse(response.body)
|
|
40
|
+
# HF returns [[token_embeddings]] for each text — mean pool if needed
|
|
41
|
+
embeddings.map { |e| e.first.is_a?(Array) ? mean_pool(e) : e }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def dimension
|
|
45
|
+
384
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def mean_pool(token_embeddings)
|
|
51
|
+
dim = token_embeddings.first.length
|
|
52
|
+
count = token_embeddings.length.to_f
|
|
53
|
+
sum = Array.new(dim, 0.0)
|
|
54
|
+
token_embeddings.each do |vec|
|
|
55
|
+
vec.each_with_index { |v, i| sum[i] += v }
|
|
56
|
+
end
|
|
57
|
+
sum.map { |v| v / count }
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
|
|
7
|
+
module RagRuby
|
|
8
|
+
module Embedders
|
|
9
|
+
class Ollama < Base
|
|
10
|
+
def initialize(model: "nomic-embed-text", base_url: nil)
|
|
11
|
+
@model = model
|
|
12
|
+
@base_url = base_url || ENV["OLLAMA_URL"] || "http://localhost:11434"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def embed(text)
|
|
16
|
+
uri = URI.parse("#{@base_url}/api/embeddings")
|
|
17
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
18
|
+
http.use_ssl = uri.scheme == "https"
|
|
19
|
+
http.open_timeout = 30
|
|
20
|
+
http.read_timeout = 120
|
|
21
|
+
|
|
22
|
+
req = Net::HTTP::Post.new(uri)
|
|
23
|
+
req["Content-Type"] = "application/json"
|
|
24
|
+
req.body = JSON.generate(model: @model, prompt: text)
|
|
25
|
+
|
|
26
|
+
response = http.request(req)
|
|
27
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
28
|
+
raise RagRuby::Error, "Ollama API error (#{response.code}): #{response.body}"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
JSON.parse(response.body)["embedding"]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def dimension
|
|
35
|
+
768
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
|
|
7
|
+
module RagRuby
|
|
8
|
+
module Embedders
|
|
9
|
+
class Voyage < Base
|
|
10
|
+
ENDPOINT = "https://api.voyageai.com/v1/embeddings"
|
|
11
|
+
|
|
12
|
+
def initialize(model: "voyage-3", api_key: nil)
|
|
13
|
+
@model = model
|
|
14
|
+
@api_key = api_key || ENV["VOYAGE_API_KEY"]
|
|
15
|
+
raise ArgumentError, "Voyage API key is required (set VOYAGE_API_KEY or pass api_key:)" unless @api_key
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def embed(text)
|
|
19
|
+
embed_batch([text]).first
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def embed_batch(texts)
|
|
23
|
+
uri = URI.parse(ENDPOINT)
|
|
24
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
25
|
+
http.use_ssl = true
|
|
26
|
+
http.open_timeout = 30
|
|
27
|
+
http.read_timeout = 60
|
|
28
|
+
|
|
29
|
+
req = Net::HTTP::Post.new(uri)
|
|
30
|
+
req["Authorization"] = "Bearer #{@api_key}"
|
|
31
|
+
req["Content-Type"] = "application/json"
|
|
32
|
+
req.body = JSON.generate(model: @model, input: texts)
|
|
33
|
+
|
|
34
|
+
response = http.request(req)
|
|
35
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
36
|
+
raise RagRuby::Error, "Voyage API error (#{response.code}): #{response.body}"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
parsed = JSON.parse(response.body)
|
|
40
|
+
parsed["data"].sort_by { |d| d["index"] }.map { |d| d["embedding"] }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def dimension
|
|
44
|
+
1024
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
|
|
7
|
+
module RagRuby
|
|
8
|
+
module Generators
|
|
9
|
+
class Anthropic < Base
|
|
10
|
+
ENDPOINT = "https://api.anthropic.com/v1/messages"
|
|
11
|
+
|
|
12
|
+
def initialize(model: "claude-sonnet-4-20250514", api_key: nil, max_tokens: 4096)
|
|
13
|
+
@model = model
|
|
14
|
+
@api_key = api_key || ENV["ANTHROPIC_API_KEY"]
|
|
15
|
+
@max_tokens = max_tokens
|
|
16
|
+
raise ArgumentError, "Anthropic API key is required (set ANTHROPIC_API_KEY or pass api_key:)" unless @api_key
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def generate(prompt:, system_prompt: nil, temperature: 0.7)
|
|
20
|
+
body = {
|
|
21
|
+
model: @model,
|
|
22
|
+
max_tokens: @max_tokens,
|
|
23
|
+
messages: [{ role: "user", content: prompt }],
|
|
24
|
+
temperature: temperature
|
|
25
|
+
}
|
|
26
|
+
body[:system] = system_prompt if system_prompt
|
|
27
|
+
|
|
28
|
+
request_with_retry do
|
|
29
|
+
uri = URI.parse(ENDPOINT)
|
|
30
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
31
|
+
http.use_ssl = true
|
|
32
|
+
http.open_timeout = 30
|
|
33
|
+
http.read_timeout = 120
|
|
34
|
+
|
|
35
|
+
req = Net::HTTP::Post.new(uri)
|
|
36
|
+
req["x-api-key"] = @api_key
|
|
37
|
+
req["anthropic-version"] = "2023-06-01"
|
|
38
|
+
req["Content-Type"] = "application/json"
|
|
39
|
+
req.body = JSON.generate(body)
|
|
40
|
+
|
|
41
|
+
response = http.request(req)
|
|
42
|
+
|
|
43
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
44
|
+
raise RagRuby::Error, "Anthropic API error (#{response.code}): #{response.body}"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
data = JSON.parse(response.body)
|
|
48
|
+
text = data.dig("content", 0, "text")
|
|
49
|
+
raise RagRuby::Error, "Empty response from Anthropic" if text.nil?
|
|
50
|
+
usage = data["usage"] || {}
|
|
51
|
+
|
|
52
|
+
{
|
|
53
|
+
text: text,
|
|
54
|
+
tokens_used: {
|
|
55
|
+
prompt: usage["input_tokens"],
|
|
56
|
+
completion: usage["output_tokens"]
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def request_with_retry(max_retries: 3)
|
|
65
|
+
retries = 0
|
|
66
|
+
begin
|
|
67
|
+
yield
|
|
68
|
+
rescue => e
|
|
69
|
+
retries += 1
|
|
70
|
+
if retries <= max_retries && retryable?(e)
|
|
71
|
+
sleep(2**(retries - 1))
|
|
72
|
+
retry
|
|
73
|
+
end
|
|
74
|
+
raise
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def retryable?(e)
|
|
79
|
+
e.message.match?(/429|500|502|503|529/)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
|
|
7
|
+
module RagRuby
|
|
8
|
+
module Generators
|
|
9
|
+
class Gemini < Base
|
|
10
|
+
ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models"
|
|
11
|
+
|
|
12
|
+
def initialize(model: "gemini-2.0-flash", api_key: nil)
|
|
13
|
+
@model = model
|
|
14
|
+
@api_key = api_key || ENV["GEMINI_API_KEY"]
|
|
15
|
+
raise ArgumentError, "Gemini API key is required (set GEMINI_API_KEY or pass api_key:)" unless @api_key
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def generate(prompt:, system_prompt: nil, temperature: 0.7)
|
|
19
|
+
body = {
|
|
20
|
+
contents: [{ parts: [{ text: prompt }] }],
|
|
21
|
+
generationConfig: { temperature: temperature }
|
|
22
|
+
}
|
|
23
|
+
if system_prompt
|
|
24
|
+
body[:systemInstruction] = { parts: [{ text: system_prompt }] }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
request_with_retry do
|
|
28
|
+
uri = URI.parse("#{ENDPOINT}/#{@model}:generateContent?key=#{@api_key}")
|
|
29
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
30
|
+
http.use_ssl = true
|
|
31
|
+
http.open_timeout = 30
|
|
32
|
+
http.read_timeout = 120
|
|
33
|
+
|
|
34
|
+
req = Net::HTTP::Post.new(uri)
|
|
35
|
+
req["Content-Type"] = "application/json"
|
|
36
|
+
req.body = JSON.generate(body)
|
|
37
|
+
|
|
38
|
+
response = http.request(req)
|
|
39
|
+
|
|
40
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
41
|
+
raise RagRuby::Error, "Gemini API error (#{response.code}): #{response.body}"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
data = JSON.parse(response.body)
|
|
45
|
+
text = data.dig("candidates", 0, "content", "parts", 0, "text")
|
|
46
|
+
raise RagRuby::Error, "Empty response from Gemini" if text.nil?
|
|
47
|
+
usage = data["usageMetadata"] || {}
|
|
48
|
+
|
|
49
|
+
{
|
|
50
|
+
text: text,
|
|
51
|
+
tokens_used: {
|
|
52
|
+
prompt: usage["promptTokenCount"],
|
|
53
|
+
completion: usage["candidatesTokenCount"]
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def request_with_retry(max_retries: 3)
|
|
62
|
+
retries = 0
|
|
63
|
+
begin
|
|
64
|
+
yield
|
|
65
|
+
rescue => e
|
|
66
|
+
retries += 1
|
|
67
|
+
if retries <= max_retries && retryable?(e)
|
|
68
|
+
sleep(2**(retries - 1))
|
|
69
|
+
retry
|
|
70
|
+
end
|
|
71
|
+
raise
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def retryable?(e)
|
|
76
|
+
e.message.match?(/429|500|502|503/)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
|
|
7
|
+
module RagRuby
|
|
8
|
+
module Generators
|
|
9
|
+
class Ollama < Base
|
|
10
|
+
def initialize(model: "llama3.2", base_url: nil)
|
|
11
|
+
@model = model
|
|
12
|
+
@base_url = base_url || ENV["OLLAMA_URL"] || "http://localhost:11434"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def generate(prompt:, system_prompt: nil, temperature: 0.7)
|
|
16
|
+
uri = URI.parse("#{@base_url}/api/chat")
|
|
17
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
18
|
+
http.use_ssl = uri.scheme == "https"
|
|
19
|
+
http.open_timeout = 30
|
|
20
|
+
http.read_timeout = 300
|
|
21
|
+
|
|
22
|
+
messages = []
|
|
23
|
+
messages << { role: "system", content: system_prompt } if system_prompt
|
|
24
|
+
messages << { role: "user", content: prompt }
|
|
25
|
+
|
|
26
|
+
body = {
|
|
27
|
+
model: @model,
|
|
28
|
+
messages: messages,
|
|
29
|
+
stream: false,
|
|
30
|
+
options: { temperature: temperature }
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
req = Net::HTTP::Post.new(uri)
|
|
34
|
+
req["Content-Type"] = "application/json"
|
|
35
|
+
req.body = JSON.generate(body)
|
|
36
|
+
|
|
37
|
+
response = http.request(req)
|
|
38
|
+
|
|
39
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
40
|
+
raise RagRuby::Error, "Ollama API error (#{response.code}): #{response.body}"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
data = JSON.parse(response.body)
|
|
44
|
+
text = data.dig("message", "content")
|
|
45
|
+
raise RagRuby::Error, "Empty response from Ollama" if text.nil?
|
|
46
|
+
|
|
47
|
+
{
|
|
48
|
+
text: text,
|
|
49
|
+
tokens_used: {
|
|
50
|
+
prompt: data["prompt_eval_count"],
|
|
51
|
+
completion: data["eval_count"]
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
data/lib/rag_ruby/pipeline.rb
CHANGED
|
@@ -54,8 +54,20 @@ module RagRuby
|
|
|
54
54
|
# Embed the question
|
|
55
55
|
query_embedding = @config.embedder_instance.embed(question)
|
|
56
56
|
|
|
57
|
-
# Search the store
|
|
58
|
-
|
|
57
|
+
# Search the store (with retrieval strategy)
|
|
58
|
+
search_opts = { top_k: top_k, filter: filter }
|
|
59
|
+
if @config.retrieval_strategy == :mmr
|
|
60
|
+
search_opts[:strategy] = :mmr
|
|
61
|
+
search_opts[:lambda] = @config.mmr_lambda
|
|
62
|
+
search_opts[:fetch_k] = @config.mmr_fetch_k
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
results = @config.store_instance.search(query_embedding, **search_opts)
|
|
66
|
+
|
|
67
|
+
# Rerank if configured
|
|
68
|
+
if @config.reranker_instance
|
|
69
|
+
results = rerank_results(question, results)
|
|
70
|
+
end
|
|
59
71
|
|
|
60
72
|
# Build sources from results
|
|
61
73
|
sources = results.map do |result|
|
|
@@ -161,6 +173,22 @@ module RagRuby
|
|
|
161
173
|
context.strip
|
|
162
174
|
end
|
|
163
175
|
|
|
176
|
+
def rerank_results(query, results)
|
|
177
|
+
documents = results.map do |r|
|
|
178
|
+
chunk = r[:chunk]
|
|
179
|
+
chunk.respond_to?(:text) ? chunk.text : chunk.to_s
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
reranked = @config.reranker_instance.rerank(query, documents)
|
|
183
|
+
|
|
184
|
+
reranked.map do |rr|
|
|
185
|
+
idx = rr.respond_to?(:index) ? rr.index : rr[:index]
|
|
186
|
+
score = rr.respond_to?(:score) ? rr.score : rr[:score]
|
|
187
|
+
original = results[idx]
|
|
188
|
+
original.merge(score: score)
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
164
192
|
def fire(event, *args)
|
|
165
193
|
@config.callbacks_for(event).each { |cb| cb.call(*args) }
|
|
166
194
|
end
|
|
@@ -14,7 +14,7 @@ module RagRuby
|
|
|
14
14
|
@entries[id] = Entry.new(id: id, embedding: embedding, metadata: metadata, chunk: chunk)
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
def search(embedding, top_k: 5, filter: nil)
|
|
17
|
+
def search(embedding, top_k: 5, filter: nil, strategy: :similarity, lambda: 0.5, fetch_k: 20)
|
|
18
18
|
results = @entries.values
|
|
19
19
|
|
|
20
20
|
if filter
|
|
@@ -23,11 +23,16 @@ module RagRuby
|
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
results
|
|
26
|
+
scored = results
|
|
27
27
|
.map { |entry| [entry, cosine_similarity(embedding, entry.embedding)] }
|
|
28
28
|
.sort_by { |_, score| -score }
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
|
|
30
|
+
if strategy == :mmr
|
|
31
|
+
mmr_select(scored, embedding, top_k: top_k, lambda: lambda, fetch_k: fetch_k)
|
|
32
|
+
else
|
|
33
|
+
scored.first(top_k)
|
|
34
|
+
.map { |entry, score| { id: entry.id, score: score, metadata: entry.metadata, chunk: entry.chunk } }
|
|
35
|
+
end
|
|
31
36
|
end
|
|
32
37
|
|
|
33
38
|
def delete(id)
|
|
@@ -52,6 +57,43 @@ module RagRuby
|
|
|
52
57
|
|
|
53
58
|
dot / (mag_a * mag_b)
|
|
54
59
|
end
|
|
60
|
+
|
|
61
|
+
# Maximal Marginal Relevance: balances relevance and diversity
|
|
62
|
+
def mmr_select(scored, query_embedding, top_k:, lambda:, fetch_k:)
|
|
63
|
+
candidates = scored.first(fetch_k)
|
|
64
|
+
return [] if candidates.empty?
|
|
65
|
+
|
|
66
|
+
selected = []
|
|
67
|
+
remaining = candidates.dup
|
|
68
|
+
|
|
69
|
+
top_k.times do
|
|
70
|
+
break if remaining.empty?
|
|
71
|
+
|
|
72
|
+
best = nil
|
|
73
|
+
best_mmr = -Float::INFINITY
|
|
74
|
+
|
|
75
|
+
remaining.each do |entry, relevance|
|
|
76
|
+
if selected.empty?
|
|
77
|
+
diversity = 0.0
|
|
78
|
+
else
|
|
79
|
+
diversity = selected.map { |sel, _| cosine_similarity(entry.embedding, sel.embedding) }.max
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
mmr_score = lambda * relevance - (1.0 - lambda) * diversity
|
|
83
|
+
|
|
84
|
+
if mmr_score > best_mmr
|
|
85
|
+
best_mmr = mmr_score
|
|
86
|
+
best = [entry, relevance]
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
break unless best
|
|
91
|
+
selected << best
|
|
92
|
+
remaining.delete(best)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
selected.map { |entry, score| { id: entry.id, score: score, metadata: entry.metadata, chunk: entry.chunk } }
|
|
96
|
+
end
|
|
55
97
|
end
|
|
56
98
|
end
|
|
57
99
|
end
|
data/lib/rag_ruby/version.rb
CHANGED
data/lib/rag_ruby.rb
CHANGED
|
@@ -15,12 +15,19 @@ require_relative "rag_ruby/loaders/active_record"
|
|
|
15
15
|
|
|
16
16
|
require_relative "rag_ruby/embedders/base"
|
|
17
17
|
require_relative "rag_ruby/embedders/openai"
|
|
18
|
+
require_relative "rag_ruby/embedders/cohere"
|
|
19
|
+
require_relative "rag_ruby/embedders/voyage"
|
|
20
|
+
require_relative "rag_ruby/embedders/ollama"
|
|
21
|
+
require_relative "rag_ruby/embedders/hugging_face"
|
|
18
22
|
|
|
19
23
|
require_relative "rag_ruby/stores/base"
|
|
20
24
|
require_relative "rag_ruby/stores/memory"
|
|
21
25
|
|
|
22
26
|
require_relative "rag_ruby/generators/base"
|
|
23
27
|
require_relative "rag_ruby/generators/openai"
|
|
28
|
+
require_relative "rag_ruby/generators/anthropic"
|
|
29
|
+
require_relative "rag_ruby/generators/gemini"
|
|
30
|
+
require_relative "rag_ruby/generators/ollama"
|
|
24
31
|
|
|
25
32
|
require_relative "rag_ruby/configuration"
|
|
26
33
|
require_relative "rag_ruby/pipeline"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rag-ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Johannes Dwi Cahyo
|
|
@@ -76,6 +76,7 @@ extra_rdoc_files: []
|
|
|
76
76
|
files:
|
|
77
77
|
- Gemfile
|
|
78
78
|
- LICENSE
|
|
79
|
+
- MILESTONES.md
|
|
79
80
|
- README.md
|
|
80
81
|
- Rakefile
|
|
81
82
|
- lib/rag_ruby.rb
|
|
@@ -85,9 +86,15 @@ files:
|
|
|
85
86
|
- lib/rag_ruby/document.rb
|
|
86
87
|
- lib/rag_ruby/embedders/base.rb
|
|
87
88
|
- lib/rag_ruby/embedders/cohere.rb
|
|
89
|
+
- lib/rag_ruby/embedders/hugging_face.rb
|
|
90
|
+
- lib/rag_ruby/embedders/ollama.rb
|
|
88
91
|
- lib/rag_ruby/embedders/onnx.rb
|
|
89
92
|
- lib/rag_ruby/embedders/openai.rb
|
|
93
|
+
- lib/rag_ruby/embedders/voyage.rb
|
|
94
|
+
- lib/rag_ruby/generators/anthropic.rb
|
|
90
95
|
- lib/rag_ruby/generators/base.rb
|
|
96
|
+
- lib/rag_ruby/generators/gemini.rb
|
|
97
|
+
- lib/rag_ruby/generators/ollama.rb
|
|
91
98
|
- lib/rag_ruby/generators/openai.rb
|
|
92
99
|
- lib/rag_ruby/generators/ruby_llm.rb
|
|
93
100
|
- lib/rag_ruby/loaders/active_record.rb
|