rag-ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +9 -0
- data/LICENSE +21 -0
- data/README.md +288 -0
- data/Rakefile +11 -0
- data/lib/rag_ruby/answer.rb +29 -0
- data/lib/rag_ruby/chunk.rb +27 -0
- data/lib/rag_ruby/configuration.rb +90 -0
- data/lib/rag_ruby/document.rb +25 -0
- data/lib/rag_ruby/embedders/base.rb +19 -0
- data/lib/rag_ruby/embedders/cohere.rb +50 -0
- data/lib/rag_ruby/embedders/onnx.rb +42 -0
- data/lib/rag_ruby/embedders/openai.rb +64 -0
- data/lib/rag_ruby/generators/base.rb +11 -0
- data/lib/rag_ruby/generators/openai.rb +60 -0
- data/lib/rag_ruby/generators/ruby_llm.rb +34 -0
- data/lib/rag_ruby/loaders/active_record.rb +37 -0
- data/lib/rag_ruby/loaders/base.rb +11 -0
- data/lib/rag_ruby/loaders/directory.rb +29 -0
- data/lib/rag_ruby/loaders/file.rb +32 -0
- data/lib/rag_ruby/loaders/url.rb +55 -0
- data/lib/rag_ruby/pipeline.rb +164 -0
- data/lib/rag_ruby/prompt_template.rb +32 -0
- data/lib/rag_ruby/rails/generators/install_generator.rb +32 -0
- data/lib/rag_ruby/rails/generators/templates/initializer.rb +10 -0
- data/lib/rag_ruby/rails/generators/templates/rag.yml +30 -0
- data/lib/rag_ruby/rails/indexable.rb +64 -0
- data/lib/rag_ruby/rails/railtie.rb +20 -0
- data/lib/rag_ruby/source.rb +30 -0
- data/lib/rag_ruby/stores/base.rb +23 -0
- data/lib/rag_ruby/stores/memory.rb +57 -0
- data/lib/rag_ruby/stores/zvec.rb +44 -0
- data/lib/rag_ruby/version.rb +5 -0
- data/lib/rag_ruby.rb +92 -0
- data/rag-ruby.gemspec +36 -0
- metadata +135 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
module Stores
|
|
5
|
+
class Memory < Base
|
|
6
|
+
Entry = Struct.new(:id, :embedding, :metadata, :chunk, keyword_init: true)
|
|
7
|
+
|
|
8
|
+
def initialize(dimension: nil)
|
|
9
|
+
@dimension = dimension
|
|
10
|
+
@entries = {}
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def add(id, embedding:, metadata: {}, chunk: nil)
|
|
14
|
+
@entries[id] = Entry.new(id: id, embedding: embedding, metadata: metadata, chunk: chunk)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def search(embedding, top_k: 5, filter: nil)
|
|
18
|
+
results = @entries.values
|
|
19
|
+
|
|
20
|
+
if filter
|
|
21
|
+
results = results.select do |entry|
|
|
22
|
+
filter.all? { |k, v| entry.metadata[k] == v }
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
results
|
|
27
|
+
.map { |entry| [entry, cosine_similarity(embedding, entry.embedding)] }
|
|
28
|
+
.sort_by { |_, score| -score }
|
|
29
|
+
.first(top_k)
|
|
30
|
+
.map { |entry, score| { id: entry.id, score: score, metadata: entry.metadata, chunk: entry.chunk } }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def delete(id)
|
|
34
|
+
@entries.delete(id)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def count
|
|
38
|
+
@entries.size
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def clear
|
|
42
|
+
@entries.clear
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def cosine_similarity(a, b)
|
|
48
|
+
dot = a.zip(b).sum { |x, y| x * y }
|
|
49
|
+
mag_a = Math.sqrt(a.sum { |x| x * x })
|
|
50
|
+
mag_b = Math.sqrt(b.sum { |x| x * x })
|
|
51
|
+
return 0.0 if mag_a == 0 || mag_b == 0
|
|
52
|
+
|
|
53
|
+
dot / (mag_a * mag_b)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RagRuby
|
|
4
|
+
module Stores
|
|
5
|
+
class Zvec < Base
|
|
6
|
+
def initialize(path:, dimension:)
|
|
7
|
+
@path = path
|
|
8
|
+
@dimension = dimension
|
|
9
|
+
|
|
10
|
+
begin
|
|
11
|
+
require "zvec"
|
|
12
|
+
rescue LoadError
|
|
13
|
+
raise LoadError, "zvec-ruby gem is required for Zvec store. Add `gem 'zvec-ruby'` to your Gemfile."
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
@index = ::Zvec::Index.new(path: path, dimension: dimension)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def add(id, embedding:, metadata: {}, chunk: nil)
|
|
20
|
+
@index.add(id, embedding: embedding, metadata: metadata)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def search(embedding, top_k: 5, filter: nil)
|
|
24
|
+
results = @index.search(embedding, top_k: top_k)
|
|
25
|
+
|
|
26
|
+
if filter
|
|
27
|
+
results = results.select do |r|
|
|
28
|
+
filter.all? { |k, v| r[:metadata][k] == v }
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
results
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def delete(id)
|
|
36
|
+
@index.delete(id)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def count
|
|
40
|
+
@index.count
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
data/lib/rag_ruby.rb
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "rag_ruby/version"
|
|
4
|
+
require_relative "rag_ruby/document"
|
|
5
|
+
require_relative "rag_ruby/chunk"
|
|
6
|
+
require_relative "rag_ruby/source"
|
|
7
|
+
require_relative "rag_ruby/answer"
|
|
8
|
+
require_relative "rag_ruby/prompt_template"
|
|
9
|
+
|
|
10
|
+
require_relative "rag_ruby/loaders/base"
|
|
11
|
+
require_relative "rag_ruby/loaders/file"
|
|
12
|
+
require_relative "rag_ruby/loaders/directory"
|
|
13
|
+
require_relative "rag_ruby/loaders/url"
|
|
14
|
+
require_relative "rag_ruby/loaders/active_record"
|
|
15
|
+
|
|
16
|
+
require_relative "rag_ruby/embedders/base"
|
|
17
|
+
require_relative "rag_ruby/embedders/openai"
|
|
18
|
+
|
|
19
|
+
require_relative "rag_ruby/stores/base"
|
|
20
|
+
require_relative "rag_ruby/stores/memory"
|
|
21
|
+
|
|
22
|
+
require_relative "rag_ruby/generators/base"
|
|
23
|
+
require_relative "rag_ruby/generators/openai"
|
|
24
|
+
|
|
25
|
+
require_relative "rag_ruby/configuration"
|
|
26
|
+
require_relative "rag_ruby/pipeline"
|
|
27
|
+
|
|
28
|
+
module RagRuby
|
|
29
|
+
class Error < StandardError; end
|
|
30
|
+
|
|
31
|
+
class << self
|
|
32
|
+
def pipeline
|
|
33
|
+
@pipeline ||= Pipeline.new
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def configure(&block)
|
|
37
|
+
@pipeline = Pipeline.new(&block)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def configure_from_hash(hash)
|
|
41
|
+
@pipeline = Pipeline.new do |config|
|
|
42
|
+
if (chunker_config = hash["chunker"])
|
|
43
|
+
config.chunker(
|
|
44
|
+
(chunker_config["strategy"] || "recursive_character").to_sym,
|
|
45
|
+
chunk_size: chunker_config["chunk_size"] || 1000,
|
|
46
|
+
chunk_overlap: chunker_config["chunk_overlap"] || 200
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
if (embedder_config = hash["embedder"])
|
|
51
|
+
provider = (embedder_config["provider"] || "openai").to_sym
|
|
52
|
+
opts = embedder_config.reject { |k, _| k == "provider" }
|
|
53
|
+
.transform_keys(&:to_sym)
|
|
54
|
+
config.embedder(provider, **opts)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
if (store_config = hash["store"])
|
|
58
|
+
provider = (store_config["provider"] || "memory").to_sym
|
|
59
|
+
opts = store_config.reject { |k, _| k == "provider" }
|
|
60
|
+
.transform_keys(&:to_sym)
|
|
61
|
+
config.store(provider, **opts)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
if (gen_config = hash["generator"])
|
|
65
|
+
provider = (gen_config["provider"] || "openai").to_sym
|
|
66
|
+
opts = gen_config.reject { |k, _| k == "provider" }
|
|
67
|
+
.transform_keys(&:to_sym)
|
|
68
|
+
config.generator(provider, **opts)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def search(query, top_k: 5, filter: nil)
|
|
74
|
+
embedding = pipeline.config.embedder_instance.embed(query)
|
|
75
|
+
pipeline.config.store_instance.search(embedding, top_k: top_k, filter: filter)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def ask(question, **opts)
|
|
79
|
+
pipeline.query(question, **opts)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def reset!
|
|
83
|
+
@pipeline = nil
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Auto-load Rails integration when Rails is present
|
|
89
|
+
if defined?(Rails)
|
|
90
|
+
require_relative "rag_ruby/rails/railtie"
|
|
91
|
+
require_relative "rag_ruby/rails/indexable"
|
|
92
|
+
end
|
data/rag-ruby.gemspec
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/rag_ruby/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "rag-ruby"
|
|
7
|
+
spec.version = RagRuby::VERSION
|
|
8
|
+
spec.authors = ["Johannes Dwi Cahyo"]
|
|
9
|
+
spec.email = ["johannes@example.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "RAG (Retrieval-Augmented Generation) pipeline framework for Ruby and Rails"
|
|
12
|
+
spec.description = "A batteries-included RAG framework that orchestrates document loading, " \
|
|
13
|
+
"chunking, embedding, vector storage, retrieval, and generation. " \
|
|
14
|
+
"Think LangChain for Ruby — simpler, more opinionated, and Rails-native."
|
|
15
|
+
spec.homepage = "https://github.com/johannesdwicahyo/rag-ruby"
|
|
16
|
+
spec.license = "MIT"
|
|
17
|
+
spec.required_ruby_version = ">= 3.1.0"
|
|
18
|
+
|
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
20
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
|
21
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
|
22
|
+
|
|
23
|
+
spec.files = Dir.chdir(__dir__) do
|
|
24
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
25
|
+
(File.expand_path(f) == __FILE__) ||
|
|
26
|
+
f.start_with?("test/", "spec/", "examples/", ".git")
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
spec.require_paths = ["lib"]
|
|
30
|
+
|
|
31
|
+
spec.add_dependency "chunker-ruby", "~> 0.1"
|
|
32
|
+
|
|
33
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
|
34
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
35
|
+
spec.add_development_dependency "webmock", "~> 3.0"
|
|
36
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: rag-ruby
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Johannes Dwi Cahyo
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: chunker-ruby
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0.1'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0.1'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: minitest
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '5.0'
|
|
33
|
+
type: :development
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '5.0'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: rake
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '13.0'
|
|
47
|
+
type: :development
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '13.0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: webmock
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - "~>"
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '3.0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - "~>"
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '3.0'
|
|
68
|
+
description: A batteries-included RAG framework that orchestrates document loading,
|
|
69
|
+
chunking, embedding, vector storage, retrieval, and generation. Think LangChain
|
|
70
|
+
for Ruby — simpler, more opinionated, and Rails-native.
|
|
71
|
+
email:
|
|
72
|
+
- johannes@example.com
|
|
73
|
+
executables: []
|
|
74
|
+
extensions: []
|
|
75
|
+
extra_rdoc_files: []
|
|
76
|
+
files:
|
|
77
|
+
- Gemfile
|
|
78
|
+
- LICENSE
|
|
79
|
+
- README.md
|
|
80
|
+
- Rakefile
|
|
81
|
+
- lib/rag_ruby.rb
|
|
82
|
+
- lib/rag_ruby/answer.rb
|
|
83
|
+
- lib/rag_ruby/chunk.rb
|
|
84
|
+
- lib/rag_ruby/configuration.rb
|
|
85
|
+
- lib/rag_ruby/document.rb
|
|
86
|
+
- lib/rag_ruby/embedders/base.rb
|
|
87
|
+
- lib/rag_ruby/embedders/cohere.rb
|
|
88
|
+
- lib/rag_ruby/embedders/onnx.rb
|
|
89
|
+
- lib/rag_ruby/embedders/openai.rb
|
|
90
|
+
- lib/rag_ruby/generators/base.rb
|
|
91
|
+
- lib/rag_ruby/generators/openai.rb
|
|
92
|
+
- lib/rag_ruby/generators/ruby_llm.rb
|
|
93
|
+
- lib/rag_ruby/loaders/active_record.rb
|
|
94
|
+
- lib/rag_ruby/loaders/base.rb
|
|
95
|
+
- lib/rag_ruby/loaders/directory.rb
|
|
96
|
+
- lib/rag_ruby/loaders/file.rb
|
|
97
|
+
- lib/rag_ruby/loaders/url.rb
|
|
98
|
+
- lib/rag_ruby/pipeline.rb
|
|
99
|
+
- lib/rag_ruby/prompt_template.rb
|
|
100
|
+
- lib/rag_ruby/rails/generators/install_generator.rb
|
|
101
|
+
- lib/rag_ruby/rails/generators/templates/initializer.rb
|
|
102
|
+
- lib/rag_ruby/rails/generators/templates/rag.yml
|
|
103
|
+
- lib/rag_ruby/rails/indexable.rb
|
|
104
|
+
- lib/rag_ruby/rails/railtie.rb
|
|
105
|
+
- lib/rag_ruby/source.rb
|
|
106
|
+
- lib/rag_ruby/stores/base.rb
|
|
107
|
+
- lib/rag_ruby/stores/memory.rb
|
|
108
|
+
- lib/rag_ruby/stores/zvec.rb
|
|
109
|
+
- lib/rag_ruby/version.rb
|
|
110
|
+
- rag-ruby.gemspec
|
|
111
|
+
homepage: https://github.com/johannesdwicahyo/rag-ruby
|
|
112
|
+
licenses:
|
|
113
|
+
- MIT
|
|
114
|
+
metadata:
|
|
115
|
+
homepage_uri: https://github.com/johannesdwicahyo/rag-ruby
|
|
116
|
+
source_code_uri: https://github.com/johannesdwicahyo/rag-ruby
|
|
117
|
+
changelog_uri: https://github.com/johannesdwicahyo/rag-ruby/blob/main/CHANGELOG.md
|
|
118
|
+
rdoc_options: []
|
|
119
|
+
require_paths:
|
|
120
|
+
- lib
|
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
122
|
+
requirements:
|
|
123
|
+
- - ">="
|
|
124
|
+
- !ruby/object:Gem::Version
|
|
125
|
+
version: 3.1.0
|
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
|
+
requirements:
|
|
128
|
+
- - ">="
|
|
129
|
+
- !ruby/object:Gem::Version
|
|
130
|
+
version: '0'
|
|
131
|
+
requirements: []
|
|
132
|
+
rubygems_version: 3.6.9
|
|
133
|
+
specification_version: 4
|
|
134
|
+
summary: RAG (Retrieval-Augmented Generation) pipeline framework for Ruby and Rails
|
|
135
|
+
test_files: []
|