simple_rag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/exe/simple_rag +5 -0
- data/lib/simple_rag/cli.rb +8 -0
- data/lib/simple_rag/embed.rb +13 -0
- data/lib/simple_rag/generate.rb +15 -0
- data/lib/simple_rag/index.rb +32 -0
- data/lib/simple_rag/retrieve.rb +29 -0
- data/lib/simple_rag/version.rb +5 -0
- data/lib/simple_rag.rb +98 -0
- metadata +52 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3152b26c0832a438b14e23a4c871e93d946b40bc6ea8bf974a415d1ec47baa6a
|
4
|
+
data.tar.gz: 71caac3f3e0c7549fc4d99e4eb2ad13791004537ee66abb1efee72c82dc04d25
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 807cad84683e4ac9079b07e2d610c788b3badcb7acbc9b7eabc96a293f0f44f76422f4f1ec3549ff568b02586c3f62942a421122add4034bed8ff355ec81d1bd
|
7
|
+
data.tar.gz: 9186c52f2f19ae30f4a9d1ec1be15ed40d558d584662b62199731cec18be257969fabb37a911e362cd1204f3d446cfb7eeb238b509b047a90d67da2e472778de
|
data/exe/simple_rag
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
module SimpleRag
|
2
|
+
class Embed
|
3
|
+
def self.embed_text(client, input)
|
4
|
+
embeddings_batch_response = client.embeddings({model: "mistral-embed", input: input})
|
5
|
+
embeddings_batch_response.dig("data", 0, "embedding")
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.embed_chunks(client, chunks)
|
9
|
+
text_embeddings = chunks.map { |chunk| embed_text(client, chunk) }
|
10
|
+
Numo::DFloat[*text_embeddings]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module SimpleRag
|
2
|
+
class Generate
|
3
|
+
def prompt(query, retrieved_chunks)
|
4
|
+
prompt = <<~PROMPT
|
5
|
+
Context information is below.
|
6
|
+
---------------------
|
7
|
+
#{retrieved_chunks.join("\n---------------------\n")}
|
8
|
+
---------------------
|
9
|
+
Given the context information and not prior knowledge, answer the query.
|
10
|
+
Query: #{query}
|
11
|
+
Answer:
|
12
|
+
PROMPT
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module SimpleRag
|
2
|
+
class Index
|
3
|
+
def initialize(client)
|
4
|
+
@text = nil
|
5
|
+
@client = client
|
6
|
+
end
|
7
|
+
|
8
|
+
def load(url)
|
9
|
+
response = HTTParty.get(url)
|
10
|
+
text = response.body
|
11
|
+
File.write("data/essay.txt", text)
|
12
|
+
@text = text
|
13
|
+
text
|
14
|
+
end
|
15
|
+
|
16
|
+
def chunk(text)
|
17
|
+
chunk_size = 2048
|
18
|
+
text.chars.each_slice(chunk_size).map(&:join)
|
19
|
+
end
|
20
|
+
|
21
|
+
def embed_chunks(chunks)
|
22
|
+
SimpleRag::Embed.embed_chunks(@client, chunks)
|
23
|
+
end
|
24
|
+
|
25
|
+
def save(text_embeddings)
|
26
|
+
d = text_embeddings.shape[1]
|
27
|
+
index = Faiss::IndexFlatL2.new(d)
|
28
|
+
index.add(text_embeddings)
|
29
|
+
index
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module SimpleRag
|
2
|
+
class Retrieve
|
3
|
+
attr_accessor :chunks
|
4
|
+
def initialize(client)
|
5
|
+
@client = client
|
6
|
+
# @chunks = nil
|
7
|
+
@index = nil
|
8
|
+
end
|
9
|
+
|
10
|
+
def save_chunks(chunks)
|
11
|
+
@chunks = chunks
|
12
|
+
end
|
13
|
+
|
14
|
+
def save_index(index)
|
15
|
+
@index = index
|
16
|
+
end
|
17
|
+
|
18
|
+
def embed_query(query)
|
19
|
+
query_embedding = SimpleRag::Embed.embed_text(@client, query)
|
20
|
+
question_embeddings = Numo::DFloat[query_embedding]
|
21
|
+
end
|
22
|
+
|
23
|
+
def similarity_search(question_embeddings, k_neighbors_count)
|
24
|
+
distances, indices = @index.search(question_embeddings, k_neighbors_count)
|
25
|
+
index_array = indices.to_a[0]
|
26
|
+
index_array.map { |i| @chunks[i] }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/simple_rag.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "httparty"
|
4
|
+
require "numo/narray"
|
5
|
+
require "faiss"
|
6
|
+
require "matrix"
|
7
|
+
require "io/console"
|
8
|
+
require "mistral-ai"
|
9
|
+
require "zeitwerk"
|
10
|
+
require "dotenv/load"
|
11
|
+
require "byebug"
|
12
|
+
require_relative "simple_rag/version"
|
13
|
+
require_relative "simple_rag/cli"
|
14
|
+
|
15
|
+
loader = Zeitwerk::Loader.for_gem
|
16
|
+
loader.setup
|
17
|
+
|
18
|
+
module SimpleRag
|
19
|
+
class Error < StandardError; end
|
20
|
+
|
21
|
+
class Engine
|
22
|
+
DEFAULT_URL = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
|
23
|
+
|
24
|
+
def run_mistral(client, user_message, model: "mistral-medium-latest")
|
25
|
+
messages = [{role: "user", content: user_message}]
|
26
|
+
chat_response = client.chat_completions({model: model, messages: messages})
|
27
|
+
chat_response.dig("choices", 0, "message", "content")
|
28
|
+
end
|
29
|
+
|
30
|
+
def prompt_user_for_url
|
31
|
+
print "Specify a URL to an HTML document you would like to ask questions of (Default: What I Worked On by Paul Graham): "
|
32
|
+
input_url = gets.chomp
|
33
|
+
input_url.empty? ? DEFAULT_URL : input_url
|
34
|
+
end
|
35
|
+
|
36
|
+
def valid_url?(url)
|
37
|
+
uri = URI.parse(url)
|
38
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
39
|
+
rescue URI::InvalidURIError
|
40
|
+
false
|
41
|
+
end
|
42
|
+
|
43
|
+
def get_url
|
44
|
+
url = prompt_user_for_url
|
45
|
+
until valid_url?(url)
|
46
|
+
puts "The URL provided is invalid. Please try again."
|
47
|
+
url = prompt_user_for_url
|
48
|
+
end
|
49
|
+
url
|
50
|
+
end
|
51
|
+
|
52
|
+
def run
|
53
|
+
url = get_url
|
54
|
+
puts "Document Downloaded"
|
55
|
+
|
56
|
+
# Setup LLM of choice
|
57
|
+
api_key = ENV["MISTRAL_AI_KEY"] || STDIN.getpass("Type your API Key: ")
|
58
|
+
raise "Missing API Key" unless api_key
|
59
|
+
|
60
|
+
client = Mistral.new(
|
61
|
+
credentials: {api_key: api_key},
|
62
|
+
options: {server_sent_events: true}
|
63
|
+
)
|
64
|
+
|
65
|
+
# Indexing
|
66
|
+
puts "Initialize indexing"
|
67
|
+
index_instance = SimpleRag::Index.new(client)
|
68
|
+
puts "Loading url"
|
69
|
+
text = index_instance.load(url)
|
70
|
+
puts "Chunk text"
|
71
|
+
chunks = index_instance.chunk(text)
|
72
|
+
puts "Embed chunks"
|
73
|
+
text_embeddings = index_instance.embed_chunks(chunks)
|
74
|
+
index = index_instance.save(text_embeddings)
|
75
|
+
|
76
|
+
retrieve_instance = SimpleRag::Retrieve.new(client)
|
77
|
+
retrieve_instance.save_index(index)
|
78
|
+
retrieve_instance.save_chunks(chunks)
|
79
|
+
|
80
|
+
loop do
|
81
|
+
print "Enter your query (or type 'exit' to quit): "
|
82
|
+
query = gets.chomp
|
83
|
+
break if query.downcase == "exit"
|
84
|
+
puts
|
85
|
+
|
86
|
+
# Retrieval/Search
|
87
|
+
question_embedding = retrieve_instance.embed_query(query)
|
88
|
+
retrieved_chunks = retrieve_instance.similarity_search(question_embedding, 2)
|
89
|
+
|
90
|
+
# Generation
|
91
|
+
prompt = SimpleRag::Generate.new.prompt(query, retrieved_chunks)
|
92
|
+
|
93
|
+
puts run_mistral(client, prompt)
|
94
|
+
puts
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
metadata
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simple_rag
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Landon Gray
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-08-12 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email:
|
15
|
+
- landon.gray@hey.com
|
16
|
+
executables:
|
17
|
+
- simple_rag
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- exe/simple_rag
|
22
|
+
- lib/simple_rag.rb
|
23
|
+
- lib/simple_rag/cli.rb
|
24
|
+
- lib/simple_rag/embed.rb
|
25
|
+
- lib/simple_rag/generate.rb
|
26
|
+
- lib/simple_rag/index.rb
|
27
|
+
- lib/simple_rag/retrieve.rb
|
28
|
+
- lib/simple_rag/version.rb
|
29
|
+
homepage:
|
30
|
+
licenses: []
|
31
|
+
metadata: {}
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
require_paths:
|
35
|
+
- lib
|
36
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
requirements: []
|
47
|
+
rubygems_version: 3.3.7
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: Simple Rag is a lightweight library that transforms any Ruby project into
|
51
|
+
a simple RAG application.
|
52
|
+
test_files: []
|