rag-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RagRuby
4
+ module Stores
5
+ class Memory < Base
6
+ Entry = Struct.new(:id, :embedding, :metadata, :chunk, keyword_init: true)
7
+
8
+ def initialize(dimension: nil)
9
+ @dimension = dimension
10
+ @entries = {}
11
+ end
12
+
13
+ def add(id, embedding:, metadata: {}, chunk: nil)
14
+ @entries[id] = Entry.new(id: id, embedding: embedding, metadata: metadata, chunk: chunk)
15
+ end
16
+
17
+ def search(embedding, top_k: 5, filter: nil)
18
+ results = @entries.values
19
+
20
+ if filter
21
+ results = results.select do |entry|
22
+ filter.all? { |k, v| entry.metadata[k] == v }
23
+ end
24
+ end
25
+
26
+ results
27
+ .map { |entry| [entry, cosine_similarity(embedding, entry.embedding)] }
28
+ .sort_by { |_, score| -score }
29
+ .first(top_k)
30
+ .map { |entry, score| { id: entry.id, score: score, metadata: entry.metadata, chunk: entry.chunk } }
31
+ end
32
+
33
+ def delete(id)
34
+ @entries.delete(id)
35
+ end
36
+
37
+ def count
38
+ @entries.size
39
+ end
40
+
41
+ def clear
42
+ @entries.clear
43
+ end
44
+
45
+ private
46
+
47
+ def cosine_similarity(a, b)
48
+ dot = a.zip(b).sum { |x, y| x * y }
49
+ mag_a = Math.sqrt(a.sum { |x| x * x })
50
+ mag_b = Math.sqrt(b.sum { |x| x * x })
51
+ return 0.0 if mag_a == 0 || mag_b == 0
52
+
53
+ dot / (mag_a * mag_b)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RagRuby
4
+ module Stores
5
+ class Zvec < Base
6
+ def initialize(path:, dimension:)
7
+ @path = path
8
+ @dimension = dimension
9
+
10
+ begin
11
+ require "zvec"
12
+ rescue LoadError
13
+ raise LoadError, "zvec-ruby gem is required for Zvec store. Add `gem 'zvec-ruby'` to your Gemfile."
14
+ end
15
+
16
+ @index = ::Zvec::Index.new(path: path, dimension: dimension)
17
+ end
18
+
19
+ def add(id, embedding:, metadata: {}, chunk: nil)
20
+ @index.add(id, embedding: embedding, metadata: metadata)
21
+ end
22
+
23
+ def search(embedding, top_k: 5, filter: nil)
24
+ results = @index.search(embedding, top_k: top_k)
25
+
26
+ if filter
27
+ results = results.select do |r|
28
+ filter.all? { |k, v| r[:metadata][k] == v }
29
+ end
30
+ end
31
+
32
+ results
33
+ end
34
+
35
+ def delete(id)
36
+ @index.delete(id)
37
+ end
38
+
39
+ def count
40
+ @index.count
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RagRuby
4
+ VERSION = "0.1.0"
5
+ end
data/lib/rag_ruby.rb ADDED
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "rag_ruby/version"
4
+ require_relative "rag_ruby/document"
5
+ require_relative "rag_ruby/chunk"
6
+ require_relative "rag_ruby/source"
7
+ require_relative "rag_ruby/answer"
8
+ require_relative "rag_ruby/prompt_template"
9
+
10
+ require_relative "rag_ruby/loaders/base"
11
+ require_relative "rag_ruby/loaders/file"
12
+ require_relative "rag_ruby/loaders/directory"
13
+ require_relative "rag_ruby/loaders/url"
14
+ require_relative "rag_ruby/loaders/active_record"
15
+
16
+ require_relative "rag_ruby/embedders/base"
17
+ require_relative "rag_ruby/embedders/openai"
18
+
19
+ require_relative "rag_ruby/stores/base"
20
+ require_relative "rag_ruby/stores/memory"
21
+
22
+ require_relative "rag_ruby/generators/base"
23
+ require_relative "rag_ruby/generators/openai"
24
+
25
+ require_relative "rag_ruby/configuration"
26
+ require_relative "rag_ruby/pipeline"
27
+
28
+ module RagRuby
29
+ class Error < StandardError; end
30
+
31
+ class << self
32
+ def pipeline
33
+ @pipeline ||= Pipeline.new
34
+ end
35
+
36
+ def configure(&block)
37
+ @pipeline = Pipeline.new(&block)
38
+ end
39
+
40
+ def configure_from_hash(hash)
41
+ @pipeline = Pipeline.new do |config|
42
+ if (chunker_config = hash["chunker"])
43
+ config.chunker(
44
+ (chunker_config["strategy"] || "recursive_character").to_sym,
45
+ chunk_size: chunker_config["chunk_size"] || 1000,
46
+ chunk_overlap: chunker_config["chunk_overlap"] || 200
47
+ )
48
+ end
49
+
50
+ if (embedder_config = hash["embedder"])
51
+ provider = (embedder_config["provider"] || "openai").to_sym
52
+ opts = embedder_config.reject { |k, _| k == "provider" }
53
+ .transform_keys(&:to_sym)
54
+ config.embedder(provider, **opts)
55
+ end
56
+
57
+ if (store_config = hash["store"])
58
+ provider = (store_config["provider"] || "memory").to_sym
59
+ opts = store_config.reject { |k, _| k == "provider" }
60
+ .transform_keys(&:to_sym)
61
+ config.store(provider, **opts)
62
+ end
63
+
64
+ if (gen_config = hash["generator"])
65
+ provider = (gen_config["provider"] || "openai").to_sym
66
+ opts = gen_config.reject { |k, _| k == "provider" }
67
+ .transform_keys(&:to_sym)
68
+ config.generator(provider, **opts)
69
+ end
70
+ end
71
+ end
72
+
73
+ def search(query, top_k: 5, filter: nil)
74
+ embedding = pipeline.config.embedder_instance.embed(query)
75
+ pipeline.config.store_instance.search(embedding, top_k: top_k, filter: filter)
76
+ end
77
+
78
+ def ask(question, **opts)
79
+ pipeline.query(question, **opts)
80
+ end
81
+
82
+ def reset!
83
+ @pipeline = nil
84
+ end
85
+ end
86
+ end
87
+
88
+ # Auto-load Rails integration when Rails is present
89
+ if defined?(Rails)
90
+ require_relative "rag_ruby/rails/railtie"
91
+ require_relative "rag_ruby/rails/indexable"
92
+ end
data/rag-ruby.gemspec ADDED
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/rag_ruby/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "rag-ruby"
7
+ spec.version = RagRuby::VERSION
8
+ spec.authors = ["Johannes Dwi Cahyo"]
9
+ spec.email = ["johannes@example.com"]
10
+
11
+ spec.summary = "RAG (Retrieval-Augmented Generation) pipeline framework for Ruby and Rails"
12
+ spec.description = "A batteries-included RAG framework that orchestrates document loading, " \
13
+ "chunking, embedding, vector storage, retrieval, and generation. " \
14
+ "Think LangChain for Ruby — simpler, more opinionated, and Rails-native."
15
+ spec.homepage = "https://github.com/johannesdwicahyo/rag-ruby"
16
+ spec.license = "MIT"
17
+ spec.required_ruby_version = ">= 3.1.0"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = spec.homepage
21
+ spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
22
+
23
+ spec.files = Dir.chdir(__dir__) do
24
+ `git ls-files -z`.split("\x0").reject do |f|
25
+ (File.expand_path(f) == __FILE__) ||
26
+ f.start_with?("test/", "spec/", "examples/", ".git")
27
+ end
28
+ end
29
+ spec.require_paths = ["lib"]
30
+
31
+ spec.add_dependency "chunker-ruby", "~> 0.1"
32
+
33
+ spec.add_development_dependency "minitest", "~> 5.0"
34
+ spec.add_development_dependency "rake", "~> 13.0"
35
+ spec.add_development_dependency "webmock", "~> 3.0"
36
+ end
metadata ADDED
@@ -0,0 +1,135 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rag-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Johannes Dwi Cahyo
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: chunker-ruby
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.1'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.1'
26
+ - !ruby/object:Gem::Dependency
27
+ name: minitest
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '5.0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '5.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rake
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '13.0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '13.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: webmock
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.0'
68
+ description: A batteries-included RAG framework that orchestrates document loading,
69
+ chunking, embedding, vector storage, retrieval, and generation. Think LangChain
70
+ for Ruby — simpler, more opinionated, and Rails-native.
71
+ email:
72
+ - johannes@example.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - Gemfile
78
+ - LICENSE
79
+ - README.md
80
+ - Rakefile
81
+ - lib/rag_ruby.rb
82
+ - lib/rag_ruby/answer.rb
83
+ - lib/rag_ruby/chunk.rb
84
+ - lib/rag_ruby/configuration.rb
85
+ - lib/rag_ruby/document.rb
86
+ - lib/rag_ruby/embedders/base.rb
87
+ - lib/rag_ruby/embedders/cohere.rb
88
+ - lib/rag_ruby/embedders/onnx.rb
89
+ - lib/rag_ruby/embedders/openai.rb
90
+ - lib/rag_ruby/generators/base.rb
91
+ - lib/rag_ruby/generators/openai.rb
92
+ - lib/rag_ruby/generators/ruby_llm.rb
93
+ - lib/rag_ruby/loaders/active_record.rb
94
+ - lib/rag_ruby/loaders/base.rb
95
+ - lib/rag_ruby/loaders/directory.rb
96
+ - lib/rag_ruby/loaders/file.rb
97
+ - lib/rag_ruby/loaders/url.rb
98
+ - lib/rag_ruby/pipeline.rb
99
+ - lib/rag_ruby/prompt_template.rb
100
+ - lib/rag_ruby/rails/generators/install_generator.rb
101
+ - lib/rag_ruby/rails/generators/templates/initializer.rb
102
+ - lib/rag_ruby/rails/generators/templates/rag.yml
103
+ - lib/rag_ruby/rails/indexable.rb
104
+ - lib/rag_ruby/rails/railtie.rb
105
+ - lib/rag_ruby/source.rb
106
+ - lib/rag_ruby/stores/base.rb
107
+ - lib/rag_ruby/stores/memory.rb
108
+ - lib/rag_ruby/stores/zvec.rb
109
+ - lib/rag_ruby/version.rb
110
+ - rag-ruby.gemspec
111
+ homepage: https://github.com/johannesdwicahyo/rag-ruby
112
+ licenses:
113
+ - MIT
114
+ metadata:
115
+ homepage_uri: https://github.com/johannesdwicahyo/rag-ruby
116
+ source_code_uri: https://github.com/johannesdwicahyo/rag-ruby
117
+ changelog_uri: https://github.com/johannesdwicahyo/rag-ruby/blob/main/CHANGELOG.md
118
+ rdoc_options: []
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: 3.1.0
126
+ required_rubygems_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ requirements: []
132
+ rubygems_version: 3.6.9
133
+ specification_version: 4
134
+ summary: RAG (Retrieval-Augmented Generation) pipeline framework for Ruby and Rails
135
+ test_files: []