rag-ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +9 -0
- data/LICENSE +21 -0
- data/README.md +288 -0
- data/Rakefile +11 -0
- data/lib/rag_ruby/answer.rb +29 -0
- data/lib/rag_ruby/chunk.rb +27 -0
- data/lib/rag_ruby/configuration.rb +90 -0
- data/lib/rag_ruby/document.rb +25 -0
- data/lib/rag_ruby/embedders/base.rb +19 -0
- data/lib/rag_ruby/embedders/cohere.rb +50 -0
- data/lib/rag_ruby/embedders/onnx.rb +42 -0
- data/lib/rag_ruby/embedders/openai.rb +64 -0
- data/lib/rag_ruby/generators/base.rb +11 -0
- data/lib/rag_ruby/generators/openai.rb +60 -0
- data/lib/rag_ruby/generators/ruby_llm.rb +34 -0
- data/lib/rag_ruby/loaders/active_record.rb +37 -0
- data/lib/rag_ruby/loaders/base.rb +11 -0
- data/lib/rag_ruby/loaders/directory.rb +29 -0
- data/lib/rag_ruby/loaders/file.rb +32 -0
- data/lib/rag_ruby/loaders/url.rb +55 -0
- data/lib/rag_ruby/pipeline.rb +164 -0
- data/lib/rag_ruby/prompt_template.rb +32 -0
- data/lib/rag_ruby/rails/generators/install_generator.rb +32 -0
- data/lib/rag_ruby/rails/generators/templates/initializer.rb +10 -0
- data/lib/rag_ruby/rails/generators/templates/rag.yml +30 -0
- data/lib/rag_ruby/rails/indexable.rb +64 -0
- data/lib/rag_ruby/rails/railtie.rb +20 -0
- data/lib/rag_ruby/source.rb +30 -0
- data/lib/rag_ruby/stores/base.rb +23 -0
- data/lib/rag_ruby/stores/memory.rb +57 -0
- data/lib/rag_ruby/stores/zvec.rb +44 -0
- data/lib/rag_ruby/version.rb +5 -0
- data/lib/rag_ruby.rb +92 -0
- data/rag-ruby.gemspec +36 -0
- metadata +135 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
|
|
7
|
+
module RagRuby
|
|
8
|
+
module Generators
|
|
9
|
+
class OpenAI < Base
|
|
10
|
+
ENDPOINT = "https://api.openai.com/v1/chat/completions"
|
|
11
|
+
|
|
12
|
+
def initialize(model: "gpt-4o", api_key: nil)
|
|
13
|
+
@model = model
|
|
14
|
+
@api_key = api_key || ENV["OPENAI_API_KEY"]
|
|
15
|
+
raise ArgumentError, "OpenAI API key is required (set OPENAI_API_KEY or pass api_key:)" unless @api_key
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def generate(prompt:, system_prompt: nil, temperature: 0.7)
|
|
19
|
+
messages = []
|
|
20
|
+
messages << { role: "system", content: system_prompt } if system_prompt
|
|
21
|
+
messages << { role: "user", content: prompt }
|
|
22
|
+
|
|
23
|
+
body = {
|
|
24
|
+
model: @model,
|
|
25
|
+
messages: messages,
|
|
26
|
+
temperature: temperature
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
uri = URI.parse(ENDPOINT)
|
|
30
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
31
|
+
http.use_ssl = true
|
|
32
|
+
http.open_timeout = 30
|
|
33
|
+
http.read_timeout = 120
|
|
34
|
+
|
|
35
|
+
req = Net::HTTP::Post.new(uri)
|
|
36
|
+
req["Authorization"] = "Bearer #{@api_key}"
|
|
37
|
+
req["Content-Type"] = "application/json"
|
|
38
|
+
req.body = JSON.generate(body)
|
|
39
|
+
|
|
40
|
+
response = http.request(req)
|
|
41
|
+
|
|
42
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
43
|
+
raise "OpenAI API error (#{response.code}): #{response.body}"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
data = JSON.parse(response.body)
|
|
47
|
+
text = data.dig("choices", 0, "message", "content")
|
|
48
|
+
usage = data["usage"] || {}
|
|
49
|
+
|
|
50
|
+
{
|
|
51
|
+
text: text,
|
|
52
|
+
tokens_used: {
|
|
53
|
+
prompt: usage["prompt_tokens"],
|
|
54
|
+
completion: usage["completion_tokens"]
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
module Generators
|
|
5
|
+
class RubyLLM < Base
|
|
6
|
+
def initialize(model: "gpt-4o", provider: nil)
|
|
7
|
+
begin
|
|
8
|
+
require "ruby_llm"
|
|
9
|
+
rescue LoadError
|
|
10
|
+
raise LoadError, "ruby_llm gem is required. Add `gem 'ruby_llm'` to your Gemfile."
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
@model = model
|
|
14
|
+
@provider = provider
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def generate(prompt:, system_prompt: nil, temperature: 0.7)
|
|
18
|
+
chat = ::RubyLLM.chat(model: @model)
|
|
19
|
+
chat.with_temperature(temperature)
|
|
20
|
+
chat.with_instructions(system_prompt) if system_prompt
|
|
21
|
+
|
|
22
|
+
response = chat.ask(prompt)
|
|
23
|
+
|
|
24
|
+
{
|
|
25
|
+
text: response.content,
|
|
26
|
+
tokens_used: {
|
|
27
|
+
prompt: response.input_tokens,
|
|
28
|
+
completion: response.output_tokens
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
module Loaders
|
|
5
|
+
class ActiveRecord < Base
|
|
6
|
+
def initialize(column: :content, metadata_columns: [])
|
|
7
|
+
@column = column
|
|
8
|
+
@metadata_columns = metadata_columns
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def load(scope)
|
|
12
|
+
records = scope.respond_to?(:find_each) ? scope.to_a : [scope]
|
|
13
|
+
|
|
14
|
+
records.map do |record|
|
|
15
|
+
content = record.public_send(@column).to_s
|
|
16
|
+
metadata = build_metadata(record)
|
|
17
|
+
|
|
18
|
+
Document.new(
|
|
19
|
+
content: content,
|
|
20
|
+
metadata: metadata,
|
|
21
|
+
source: "#{record.class.name}##{record.id}"
|
|
22
|
+
)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def build_metadata(record)
|
|
29
|
+
meta = { model: record.class.name, id: record.id }
|
|
30
|
+
@metadata_columns.each do |col|
|
|
31
|
+
meta[col] = record.public_send(col) if record.respond_to?(col)
|
|
32
|
+
end
|
|
33
|
+
meta
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
module Loaders
|
|
5
|
+
class Directory < Base
|
|
6
|
+
DEFAULT_GLOB = "**/*.{txt,md,markdown}"
|
|
7
|
+
|
|
8
|
+
def initialize(glob: DEFAULT_GLOB)
|
|
9
|
+
@glob = glob
|
|
10
|
+
@file_loader = File.new
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def load(dir_path)
|
|
14
|
+
dir_path = ::File.expand_path(dir_path)
|
|
15
|
+
raise ArgumentError, "Directory not found: #{dir_path}" unless ::Dir.exist?(dir_path)
|
|
16
|
+
|
|
17
|
+
pattern = ::File.join(dir_path, @glob)
|
|
18
|
+
files = ::Dir.glob(pattern).sort
|
|
19
|
+
|
|
20
|
+
files.flat_map do |file_path|
|
|
21
|
+
@file_loader.load(file_path)
|
|
22
|
+
rescue ArgumentError
|
|
23
|
+
# Skip unsupported file types
|
|
24
|
+
[]
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
module Loaders
|
|
5
|
+
class File < Base
|
|
6
|
+
SUPPORTED_EXTENSIONS = %w[.txt .md .markdown].freeze
|
|
7
|
+
|
|
8
|
+
def load(path)
|
|
9
|
+
path = ::File.expand_path(path)
|
|
10
|
+
raise ArgumentError, "File not found: #{path}" unless ::File.exist?(path)
|
|
11
|
+
|
|
12
|
+
ext = ::File.extname(path).downcase
|
|
13
|
+
unless SUPPORTED_EXTENSIONS.include?(ext)
|
|
14
|
+
raise ArgumentError, "Unsupported file type: #{ext}. Supported: #{SUPPORTED_EXTENSIONS.join(', ')}"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
content = ::File.read(path, encoding: "UTF-8")
|
|
18
|
+
|
|
19
|
+
[Document.new(
|
|
20
|
+
content: content,
|
|
21
|
+
metadata: {
|
|
22
|
+
source: path,
|
|
23
|
+
filename: ::File.basename(path),
|
|
24
|
+
extension: ext,
|
|
25
|
+
size: ::File.size(path)
|
|
26
|
+
},
|
|
27
|
+
source: path
|
|
28
|
+
)]
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
module RagRuby
|
|
7
|
+
module Loaders
|
|
8
|
+
class URL < Base
|
|
9
|
+
def initialize(timeout: 30)
|
|
10
|
+
@timeout = timeout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def load(url)
|
|
14
|
+
uri = URI.parse(url)
|
|
15
|
+
raise ArgumentError, "Invalid URL: #{url}" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
|
16
|
+
|
|
17
|
+
response = fetch(uri)
|
|
18
|
+
content = response.body.force_encoding("UTF-8")
|
|
19
|
+
|
|
20
|
+
[Document.new(
|
|
21
|
+
content: content,
|
|
22
|
+
metadata: {
|
|
23
|
+
source: url,
|
|
24
|
+
content_type: response["content-type"],
|
|
25
|
+
status: response.code.to_i
|
|
26
|
+
},
|
|
27
|
+
source: url
|
|
28
|
+
)]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def fetch(uri, redirect_limit: 5)
|
|
34
|
+
raise "Too many redirects" if redirect_limit == 0
|
|
35
|
+
|
|
36
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
37
|
+
http.use_ssl = uri.scheme == "https"
|
|
38
|
+
http.open_timeout = @timeout
|
|
39
|
+
http.read_timeout = @timeout
|
|
40
|
+
|
|
41
|
+
request = Net::HTTP::Get.new(uri)
|
|
42
|
+
response = http.request(request)
|
|
43
|
+
|
|
44
|
+
case response
|
|
45
|
+
when Net::HTTPRedirection
|
|
46
|
+
fetch(URI.parse(response["location"]), redirect_limit: redirect_limit - 1)
|
|
47
|
+
when Net::HTTPSuccess
|
|
48
|
+
response
|
|
49
|
+
else
|
|
50
|
+
raise "HTTP #{response.code}: #{response.message}"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
5
|
+
module RagRuby
|
|
6
|
+
class Pipeline
|
|
7
|
+
attr_reader :config
|
|
8
|
+
|
|
9
|
+
def initialize(&block)
|
|
10
|
+
@config = Configuration.new
|
|
11
|
+
yield @config if block_given?
|
|
12
|
+
@prompt_template = PromptTemplate.new
|
|
13
|
+
@chunk_store = {} # id -> chunk mapping
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def ingest(source, loader: nil)
|
|
17
|
+
loader ||= @config.loader_instance || Loaders::File.new
|
|
18
|
+
|
|
19
|
+
# Load documents
|
|
20
|
+
fire(:before_load, source)
|
|
21
|
+
documents = loader.load(source)
|
|
22
|
+
fire(:after_load, documents)
|
|
23
|
+
|
|
24
|
+
documents.each do |doc|
|
|
25
|
+
next if doc.empty?
|
|
26
|
+
|
|
27
|
+
# Chunk
|
|
28
|
+
fire(:before_chunk, doc)
|
|
29
|
+
chunks = chunk_document(doc)
|
|
30
|
+
fire(:after_chunk, chunks)
|
|
31
|
+
|
|
32
|
+
# Embed
|
|
33
|
+
fire(:before_embed, chunks)
|
|
34
|
+
embed_chunks(chunks)
|
|
35
|
+
fire(:after_embed, chunks)
|
|
36
|
+
|
|
37
|
+
# Store
|
|
38
|
+
fire(:before_store, chunks)
|
|
39
|
+
store_chunks(chunks)
|
|
40
|
+
fire(:after_store, chunks)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def ingest_directory(dir_path, glob: "**/*.{md,txt}")
|
|
45
|
+
loader = Loaders::Directory.new(glob: glob)
|
|
46
|
+
ingest(dir_path, loader: loader)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def query(question, top_k: 5, filter: nil, temperature: 0.7, system_prompt: nil)
|
|
50
|
+
start_time = Time.now
|
|
51
|
+
|
|
52
|
+
fire(:before_query, question)
|
|
53
|
+
|
|
54
|
+
# Embed the question
|
|
55
|
+
query_embedding = @config.embedder_instance.embed(question)
|
|
56
|
+
|
|
57
|
+
# Search the store
|
|
58
|
+
results = @config.store_instance.search(query_embedding, top_k: top_k, filter: filter)
|
|
59
|
+
|
|
60
|
+
# Build sources from results
|
|
61
|
+
sources = results.map do |result|
|
|
62
|
+
chunk = result[:chunk] || @chunk_store[result[:id]]
|
|
63
|
+
next unless chunk
|
|
64
|
+
|
|
65
|
+
Source.new(chunk: chunk, score: result[:score])
|
|
66
|
+
end.compact
|
|
67
|
+
|
|
68
|
+
# Build context from retrieved chunks
|
|
69
|
+
context = build_context(sources)
|
|
70
|
+
|
|
71
|
+
# Generate answer
|
|
72
|
+
prompt_text = @prompt_template.render(context: context, question: question)
|
|
73
|
+
sys_prompt = system_prompt || @prompt_template.system_prompt
|
|
74
|
+
|
|
75
|
+
gen_result = @config.generator_instance.generate(
|
|
76
|
+
prompt: prompt_text,
|
|
77
|
+
system_prompt: sys_prompt,
|
|
78
|
+
temperature: temperature
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
duration = Time.now - start_time
|
|
82
|
+
|
|
83
|
+
answer = Answer.new(
|
|
84
|
+
text: gen_result[:text],
|
|
85
|
+
sources: sources,
|
|
86
|
+
tokens_used: gen_result[:tokens_used],
|
|
87
|
+
duration: duration,
|
|
88
|
+
query: question
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
fire(:after_query, question, answer)
|
|
92
|
+
|
|
93
|
+
answer
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
private
|
|
97
|
+
|
|
98
|
+
def chunk_document(doc)
|
|
99
|
+
begin
|
|
100
|
+
require "chunker_ruby"
|
|
101
|
+
chunker = ChunkerRuby::RecursiveCharacter.new(
|
|
102
|
+
chunk_size: @config.chunk_size,
|
|
103
|
+
chunk_overlap: @config.chunk_overlap
|
|
104
|
+
)
|
|
105
|
+
texts = chunker.split(doc.content)
|
|
106
|
+
rescue LoadError
|
|
107
|
+
# Fallback: simple chunking without chunker-ruby
|
|
108
|
+
texts = simple_chunk(doc.content, @config.chunk_size, @config.chunk_overlap)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
texts.each_with_index.map do |text, i|
|
|
112
|
+
chunk_text = text.respond_to?(:text) ? text.text : text.to_s
|
|
113
|
+
Chunk.new(
|
|
114
|
+
text: chunk_text,
|
|
115
|
+
metadata: doc.metadata.dup,
|
|
116
|
+
document_source: doc.source,
|
|
117
|
+
index: i
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def simple_chunk(text, size, overlap)
|
|
123
|
+
chunks = []
|
|
124
|
+
start = 0
|
|
125
|
+
while start < text.length
|
|
126
|
+
chunk_end = [start + size, text.length].min
|
|
127
|
+
chunks << text[start...chunk_end]
|
|
128
|
+
start += size - overlap
|
|
129
|
+
break if start >= text.length
|
|
130
|
+
end
|
|
131
|
+
chunks
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def embed_chunks(chunks)
|
|
135
|
+
texts = chunks.map(&:text)
|
|
136
|
+
embeddings = @config.embedder_instance.embed_batch(texts)
|
|
137
|
+
chunks.each_with_index do |chunk, i|
|
|
138
|
+
chunk.embedding = embeddings[i]
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def store_chunks(chunks)
|
|
143
|
+
chunks.each do |chunk|
|
|
144
|
+
id = SecureRandom.uuid
|
|
145
|
+
@chunk_store[id] = chunk
|
|
146
|
+
@config.store_instance.add(id, embedding: chunk.embedding, metadata: chunk.metadata, chunk: chunk)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def build_context(sources, max_chars: 12000)
|
|
151
|
+
context = ""
|
|
152
|
+
sources.each do |source|
|
|
153
|
+
candidate = context + "\n---\n" + source.text
|
|
154
|
+
break if candidate.length > max_chars
|
|
155
|
+
context = candidate
|
|
156
|
+
end
|
|
157
|
+
context.strip
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def fire(event, *args)
|
|
161
|
+
@config.callbacks_for(event).each { |cb| cb.call(*args) }
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
class PromptTemplate
|
|
5
|
+
DEFAULT_SYSTEM = "You are a helpful assistant that answers questions based on the provided context."
|
|
6
|
+
|
|
7
|
+
DEFAULT_USER = <<~PROMPT
|
|
8
|
+
Answer the question based on the following context. If the context doesn't
|
|
9
|
+
contain enough information to answer, say so.
|
|
10
|
+
|
|
11
|
+
Context:
|
|
12
|
+
{{context}}
|
|
13
|
+
|
|
14
|
+
Question: {{question}}
|
|
15
|
+
|
|
16
|
+
Answer:
|
|
17
|
+
PROMPT
|
|
18
|
+
|
|
19
|
+
attr_reader :system_prompt, :user_template
|
|
20
|
+
|
|
21
|
+
def initialize(system_prompt: DEFAULT_SYSTEM, user_template: DEFAULT_USER)
|
|
22
|
+
@system_prompt = system_prompt
|
|
23
|
+
@user_template = user_template
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def render(context:, question:)
|
|
27
|
+
user_template
|
|
28
|
+
.gsub("{{context}}", context)
|
|
29
|
+
.gsub("{{question}}", question)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
|
|
5
|
+
module Rag
|
|
6
|
+
module Generators
|
|
7
|
+
class InstallGenerator < Rails::Generators::Base
|
|
8
|
+
source_root ::File.expand_path("templates", __dir__)
|
|
9
|
+
|
|
10
|
+
desc "Install RagRuby configuration"
|
|
11
|
+
|
|
12
|
+
def create_config_file
|
|
13
|
+
template "rag.yml", "config/rag.yml"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def create_initializer
|
|
17
|
+
template "initializer.rb", "config/initializers/rag_ruby.rb"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def show_post_install
|
|
21
|
+
say ""
|
|
22
|
+
say "RagRuby installed successfully!", :green
|
|
23
|
+
say ""
|
|
24
|
+
say "Next steps:"
|
|
25
|
+
say " 1. Edit config/rag.yml with your settings"
|
|
26
|
+
say " 2. Set OPENAI_API_KEY in your environment"
|
|
27
|
+
say " 3. Add `include RagRuby::Indexable` to your models"
|
|
28
|
+
say ""
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# RagRuby configuration is loaded from config/rag.yml
|
|
4
|
+
# You can also configure programmatically here:
|
|
5
|
+
#
|
|
6
|
+
# RagRuby.configure do |config|
|
|
7
|
+
# config.embedder :openai, model: "text-embedding-3-small"
|
|
8
|
+
# config.store :memory, dimension: 1536
|
|
9
|
+
# config.generator :openai, model: "gpt-4o"
|
|
10
|
+
# end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
default: &default
|
|
2
|
+
chunker:
|
|
3
|
+
strategy: recursive_character
|
|
4
|
+
chunk_size: 1000
|
|
5
|
+
chunk_overlap: 200
|
|
6
|
+
embedder:
|
|
7
|
+
provider: openai
|
|
8
|
+
model: text-embedding-3-small
|
|
9
|
+
store:
|
|
10
|
+
provider: memory
|
|
11
|
+
dimension: 1536
|
|
12
|
+
generator:
|
|
13
|
+
provider: openai
|
|
14
|
+
model: gpt-4o
|
|
15
|
+
|
|
16
|
+
development:
|
|
17
|
+
<<: *default
|
|
18
|
+
|
|
19
|
+
test:
|
|
20
|
+
<<: *default
|
|
21
|
+
store:
|
|
22
|
+
provider: memory
|
|
23
|
+
dimension: 1536
|
|
24
|
+
|
|
25
|
+
production:
|
|
26
|
+
<<: *default
|
|
27
|
+
store:
|
|
28
|
+
provider: zvec
|
|
29
|
+
path: db/vectors
|
|
30
|
+
dimension: 1536
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
module Indexable
|
|
5
|
+
def self.included(base)
|
|
6
|
+
base.extend ClassMethods
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
module ClassMethods
|
|
10
|
+
def rag_index(column, metadata: nil, chunk_strategy: :recursive_character, on: [:create, :update])
|
|
11
|
+
class_attribute :rag_column, default: column
|
|
12
|
+
class_attribute :rag_metadata_proc, default: metadata
|
|
13
|
+
class_attribute :rag_chunk_strategy, default: chunk_strategy
|
|
14
|
+
|
|
15
|
+
if on.include?(:create)
|
|
16
|
+
after_create :rag_index_record
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
if on.include?(:update)
|
|
20
|
+
after_update :rag_index_record, if: -> { saved_change_to_attribute?(column) }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if on.include?(:destroy) || on.include?(:delete)
|
|
24
|
+
after_destroy :rag_remove_record
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def rag_index_record
|
|
32
|
+
content = public_send(self.class.rag_column).to_s
|
|
33
|
+
return if content.strip.empty?
|
|
34
|
+
|
|
35
|
+
metadata = {}
|
|
36
|
+
if self.class.rag_metadata_proc
|
|
37
|
+
metadata = self.class.rag_metadata_proc.call(self)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
doc = Document.new(
|
|
41
|
+
content: content,
|
|
42
|
+
metadata: metadata.merge(model: self.class.name, record_id: id),
|
|
43
|
+
source: "#{self.class.name}##{id}"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
RagRuby.pipeline.ingest(doc.source, loader: InlineLoader.new(doc))
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def rag_remove_record
|
|
50
|
+
# Remove from store by metadata filter
|
|
51
|
+
# Implementation depends on store capabilities
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
class InlineLoader < Loaders::Base
|
|
55
|
+
def initialize(document)
|
|
56
|
+
@document = document
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def load(_source)
|
|
60
|
+
[@document]
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
class Railtie < ::Rails::Railtie
|
|
5
|
+
initializer "rag_ruby.configure" do |app|
|
|
6
|
+
config_path = app.root.join("config", "rag.yml")
|
|
7
|
+
|
|
8
|
+
if config_path.exist?
|
|
9
|
+
require "yaml"
|
|
10
|
+
require "erb"
|
|
11
|
+
|
|
12
|
+
yaml = ERB.new(config_path.read).result
|
|
13
|
+
all_config = YAML.safe_load(yaml, aliases: true) || {}
|
|
14
|
+
env_config = all_config[Rails.env] || all_config["default"] || {}
|
|
15
|
+
|
|
16
|
+
RagRuby.configure_from_hash(env_config)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
class Source
|
|
5
|
+
attr_reader :chunk, :score, :document_source
|
|
6
|
+
|
|
7
|
+
def initialize(chunk:, score:)
|
|
8
|
+
@chunk = chunk
|
|
9
|
+
@score = score
|
|
10
|
+
@document_source = chunk.document_source
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def text
|
|
14
|
+
chunk.text
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def metadata
|
|
18
|
+
chunk.metadata
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_h
|
|
22
|
+
{
|
|
23
|
+
text: text,
|
|
24
|
+
score: score,
|
|
25
|
+
document_source: document_source,
|
|
26
|
+
metadata: metadata
|
|
27
|
+
}
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
module Stores
|
|
5
|
+
class Base
|
|
6
|
+
def add(id, embedding:, metadata: {})
|
|
7
|
+
raise NotImplementedError, "#{self.class}#add must be implemented"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def search(embedding, top_k:, filter: nil)
|
|
11
|
+
raise NotImplementedError, "#{self.class}#search must be implemented"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def delete(id)
|
|
15
|
+
raise NotImplementedError, "#{self.class}#delete must be implemented"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def count
|
|
19
|
+
raise NotImplementedError, "#{self.class}#count must be implemented"
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|