simple_rag 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3152b26c0832a438b14e23a4c871e93d946b40bc6ea8bf974a415d1ec47baa6a
4
+ data.tar.gz: 71caac3f3e0c7549fc4d99e4eb2ad13791004537ee66abb1efee72c82dc04d25
5
+ SHA512:
6
+ metadata.gz: 807cad84683e4ac9079b07e2d610c788b3badcb7acbc9b7eabc96a293f0f44f76422f4f1ec3549ff568b02586c3f62942a421122add4034bed8ff355ec81d1bd
7
+ data.tar.gz: 9186c52f2f19ae30f4a9d1ec1be15ed40d558d584662b62199731cec18be257969fabb37a911e362cd1204f3d446cfb7eeb238b509b047a90d67da2e472778de
data/exe/simple_rag ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "simple_rag"
4
+
5
+ SimpleRag::CLI.start(ARGV)
@@ -0,0 +1,8 @@
1
+ module SimpleRag
2
+ class CLI
3
+ def self.start(args)
4
+ puts "Hello from SimpleRag!"
5
+ SimpleRag::Engine.new.run
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,13 @@
1
+ module SimpleRag
2
+ class Embed
3
+ def self.embed_text(client, input)
4
+ embeddings_batch_response = client.embeddings({model: "mistral-embed", input: input})
5
+ embeddings_batch_response.dig("data", 0, "embedding")
6
+ end
7
+
8
+ def self.embed_chunks(client, chunks)
9
+ text_embeddings = chunks.map { |chunk| embed_text(client, chunk) }
10
+ Numo::DFloat[*text_embeddings]
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ module SimpleRag
2
+ class Generate
3
+ def prompt(query, retrieved_chunks)
4
+ prompt = <<~PROMPT
5
+ Context information is below.
6
+ ---------------------
7
+ #{retrieved_chunks.join("\n---------------------\n")}
8
+ ---------------------
9
+ Given the context information and not prior knowledge, answer the query.
10
+ Query: #{query}
11
+ Answer:
12
+ PROMPT
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,32 @@
1
+ module SimpleRag
2
+ class Index
3
+ def initialize(client)
4
+ @text = nil
5
+ @client = client
6
+ end
7
+
8
+ def load(url)
9
+ response = HTTParty.get(url)
10
+ text = response.body
11
+ File.write("data/essay.txt", text)
12
+ @text = text
13
+ text
14
+ end
15
+
16
+ def chunk(text)
17
+ chunk_size = 2048
18
+ text.chars.each_slice(chunk_size).map(&:join)
19
+ end
20
+
21
+ def embed_chunks(chunks)
22
+ SimpleRag::Embed.embed_chunks(@client, chunks)
23
+ end
24
+
25
+ def save(text_embeddings)
26
+ d = text_embeddings.shape[1]
27
+ index = Faiss::IndexFlatL2.new(d)
28
+ index.add(text_embeddings)
29
+ index
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,29 @@
1
+ module SimpleRag
2
+ class Retrieve
3
+ attr_accessor :chunks
4
+ def initialize(client)
5
+ @client = client
6
+ # @chunks = nil
7
+ @index = nil
8
+ end
9
+
10
+ def save_chunks(chunks)
11
+ @chunks = chunks
12
+ end
13
+
14
+ def save_index(index)
15
+ @index = index
16
+ end
17
+
18
+ def embed_query(query)
19
+ query_embedding = SimpleRag::Embed.embed_text(@client, query)
20
+ question_embeddings = Numo::DFloat[query_embedding]
21
+ end
22
+
23
+ def similarity_search(question_embeddings, k_neighbors_count)
24
+ distances, indices = @index.search(question_embeddings, k_neighbors_count)
25
+ index_array = indices.to_a[0]
26
+ index_array.map { |i| @chunks[i] }
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SimpleRag
4
+ VERSION = "0.1.0"
5
+ end
data/lib/simple_rag.rb ADDED
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "httparty"
4
+ require "numo/narray"
5
+ require "faiss"
6
+ require "matrix"
7
+ require "io/console"
8
+ require "mistral-ai"
9
+ require "zeitwerk"
10
+ require "dotenv/load"
11
+ require "byebug"
12
+ require_relative "simple_rag/version"
13
+ require_relative "simple_rag/cli"
14
+
15
+ loader = Zeitwerk::Loader.for_gem
16
+ loader.setup
17
+
18
+ module SimpleRag
19
+ class Error < StandardError; end
20
+
21
+ class Engine
22
+ DEFAULT_URL = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
23
+
24
+ def run_mistral(client, user_message, model: "mistral-medium-latest")
25
+ messages = [{role: "user", content: user_message}]
26
+ chat_response = client.chat_completions({model: model, messages: messages})
27
+ chat_response.dig("choices", 0, "message", "content")
28
+ end
29
+
30
+ def prompt_user_for_url
31
+ print "Specify a URL to an HTML document you would like to ask questions of (Default: What I Worked On by Paul Graham): "
32
+ input_url = gets.chomp
33
+ input_url.empty? ? DEFAULT_URL : input_url
34
+ end
35
+
36
+ def valid_url?(url)
37
+ uri = URI.parse(url)
38
+ uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
39
+ rescue URI::InvalidURIError
40
+ false
41
+ end
42
+
43
+ def get_url
44
+ url = prompt_user_for_url
45
+ until valid_url?(url)
46
+ puts "The URL provided is invalid. Please try again."
47
+ url = prompt_user_for_url
48
+ end
49
+ url
50
+ end
51
+
52
+ def run
53
+ url = get_url
54
+ puts "Document Downloaded"
55
+
56
+ # Setup LLM of choice
57
+ api_key = ENV["MISTRAL_AI_KEY"] || STDIN.getpass("Type your API Key: ")
58
+ raise "Missing API Key" unless api_key
59
+
60
+ client = Mistral.new(
61
+ credentials: {api_key: api_key},
62
+ options: {server_sent_events: true}
63
+ )
64
+
65
+ # Indexing
66
+ puts "Initialize indexing"
67
+ index_instance = SimpleRag::Index.new(client)
68
+ puts "Loading url"
69
+ text = index_instance.load(url)
70
+ puts "Chunk text"
71
+ chunks = index_instance.chunk(text)
72
+ puts "Embed chunks"
73
+ text_embeddings = index_instance.embed_chunks(chunks)
74
+ index = index_instance.save(text_embeddings)
75
+
76
+ retrieve_instance = SimpleRag::Retrieve.new(client)
77
+ retrieve_instance.save_index(index)
78
+ retrieve_instance.save_chunks(chunks)
79
+
80
+ loop do
81
+ print "Enter your query (or type 'exit' to quit): "
82
+ query = gets.chomp
83
+ break if query.downcase == "exit"
84
+ puts
85
+
86
+ # Retrieval/Search
87
+ question_embedding = retrieve_instance.embed_query(query)
88
+ retrieved_chunks = retrieve_instance.similarity_search(question_embedding, 2)
89
+
90
+ # Generation
91
+ prompt = SimpleRag::Generate.new.prompt(query, retrieved_chunks)
92
+
93
+ puts run_mistral(client, prompt)
94
+ puts
95
+ end
96
+ end
97
+ end
98
+ end
metadata ADDED
@@ -0,0 +1,52 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_rag
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Landon Gray
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-08-12 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - landon.gray@hey.com
16
+ executables:
17
+ - simple_rag
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - exe/simple_rag
22
+ - lib/simple_rag.rb
23
+ - lib/simple_rag/cli.rb
24
+ - lib/simple_rag/embed.rb
25
+ - lib/simple_rag/generate.rb
26
+ - lib/simple_rag/index.rb
27
+ - lib/simple_rag/retrieve.rb
28
+ - lib/simple_rag/version.rb
29
+ homepage:
30
+ licenses: []
31
+ metadata: {}
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ requirements: []
47
+ rubygems_version: 3.3.7
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: Simple Rag is a lightweight library that transforms any Ruby project into
51
+ a simple RAG application.
52
+ test_files: []