simple-rag-zc 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/example_config.json +0 -0
- data/exe/public/q.html +0 -12
- data/exe/run-index +0 -0
- data/exe/run-server +104 -100
- data/lib/simple_rag/version.rb +1 -1
- data/lib/simple_rag.rb +0 -0
- data/llm/embedding.rb +0 -0
- data/llm/http.rb +0 -0
- data/llm/ollama.rb +0 -0
- data/llm/openai.rb +1 -1
- data/readers/check-reader.rb +0 -0
- data/readers/note.rb +0 -0
- data/readers/reader.rb +0 -0
- data/readers/text.rb +0 -0
- data/server/cache.rb +0 -0
- data/server/discuss.rb +0 -0
- data/server/retriever.rb +13 -6
- data/server/synthesizer.rb +0 -0
- data/storage/mem.rb +0 -0
- metadata +19 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1066044473e95484b129ea587a827a02f3f28adac32251df79ed9f8ee6ebaa08
|
4
|
+
data.tar.gz: 947e9528045e8ac0d0e0845e4649c05020ad17198771453382ad6088f6e81e16
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f301b244bab50ae13b7163dfef9fd884a9b2f25b4a5b3b1b83216221b7955dbfbd82b196b55ffbf224cf8041177276b1426ef11ec6765e43281506ceec902030
|
7
|
+
data.tar.gz: 586ff8501e40e2cbdd7f3f17ae5cbe055946534fc8b115a6a72644d257807e6a8ba6cacc8191b6e53dc7a319ea912137dd0bc0590fc94305eca83052181558e6
|
data/README.md
CHANGED
@@ -18,11 +18,11 @@ To release a new version to [RubyGems](https://rubygems.org), run:
|
|
18
18
|
|
19
19
|
```bash
|
20
20
|
gem build simple-rag.gemspec
|
21
|
-
gem push simple-rag-$(ruby -Ilib -e 'require "simple_rag/version"; puts SimpleRag::VERSION').gem
|
21
|
+
gem push simple-rag-zc-$(ruby -Ilib -e 'require "simple_rag/version"; puts SimpleRag::VERSION').gem
|
22
22
|
```
|
23
23
|
|
24
24
|
Install the gem directly:
|
25
25
|
|
26
26
|
```bash
|
27
|
-
gem install simple-rag
|
27
|
+
gem install simple-rag-zc
|
28
28
|
```
|
data/example_config.json
CHANGED
File without changes
|
data/exe/public/q.html
CHANGED
@@ -230,7 +230,6 @@
|
|
230
230
|
|
231
231
|
function performAgentSearch() {
|
232
232
|
const query = searchInput.value;
|
233
|
-
const configExperiment = configExperimentCheckbox.checked
|
234
233
|
const checkedPaths = Array.from(pathsList.querySelectorAll('input[type="checkbox"]:checked'))
|
235
234
|
.map(checkbox => checkbox.name);
|
236
235
|
|
@@ -242,7 +241,6 @@
|
|
242
241
|
body: JSON.stringify({
|
243
242
|
q: query,
|
244
243
|
paths: checkedPaths,
|
245
|
-
experiment: configExperiment,
|
246
244
|
})
|
247
245
|
})
|
248
246
|
.then(response => response.json())
|
@@ -267,16 +265,6 @@
|
|
267
265
|
responseContainer.appendChild(div);
|
268
266
|
}
|
269
267
|
|
270
|
-
if (!!resp.eval) {
|
271
|
-
const div = document.createElement('div');
|
272
|
-
div.className = 'response-item';
|
273
|
-
div.style.backgroundColor = textToLightColor("experiment");
|
274
|
-
div.innerHTML = `
|
275
|
-
<div class="markdown-content">${marked.parse(resp.eval)}</div>
|
276
|
-
`;
|
277
|
-
responseContainer.appendChild(div);
|
278
|
-
}
|
279
|
-
|
280
268
|
resp.data.forEach(item => {
|
281
269
|
const div = document.createElement('div');
|
282
270
|
div.className = 'response-item';
|
data/exe/run-index
CHANGED
File without changes
|
data/exe/run-server
CHANGED
@@ -9,7 +9,7 @@
|
|
9
9
|
|
10
10
|
require "json"
|
11
11
|
require "ostruct"
|
12
|
-
require
|
12
|
+
require 'sinatra/base'
|
13
13
|
|
14
14
|
require_relative "../server/retriever"
|
15
15
|
require_relative "../server/synthesizer"
|
@@ -32,33 +32,86 @@ if OPENAI_KEY.empty?
|
|
32
32
|
exit 9
|
33
33
|
end
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
class SimpleRagServer < Sinatra::Application
|
36
|
+
# list all the paths that can be searched
|
37
|
+
get '/paths' do
|
38
|
+
content_type :json
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
40
|
+
resp = []
|
41
|
+
CONFIG.paths.each do |p|
|
42
|
+
resp << { "name": p.name }
|
43
|
+
end
|
44
|
+
resp.to_json
|
42
45
|
end
|
43
|
-
resp.to_json
|
44
|
-
end
|
45
46
|
|
46
|
-
# query within the paths
|
47
|
-
post '/q' do
|
48
|
-
|
47
|
+
# query within the paths
|
48
|
+
post '/q' do
|
49
|
+
content_type :json
|
49
50
|
|
50
|
-
|
51
|
+
data = JSON.parse(request.body.read)
|
51
52
|
|
52
|
-
|
53
|
-
|
53
|
+
lookup_paths = (data["paths"] || CONFIG.paths_map.keys).map do |name|
|
54
|
+
CONFIG.path_map[name]
|
55
|
+
end
|
56
|
+
|
57
|
+
topN = (data["topN"] || 20).to_i
|
58
|
+
|
59
|
+
q = data["q"]
|
60
|
+
entries = retrieve_by_embedding(lookup_paths, q)
|
61
|
+
if q.to_s.strip.length < 5 && q.to_s.split(/\s+/).length < 5
|
62
|
+
entries.concat(retrieve_by_text(lookup_paths, q))
|
63
|
+
|
64
|
+
unique = {}
|
65
|
+
entries.each do |e|
|
66
|
+
key = [e["path"], e["chunk"]]
|
67
|
+
if unique[key]
|
68
|
+
unique[key]["score"] = (unique[key]["score"] || 0) + (e["score"] || 0)
|
69
|
+
else
|
70
|
+
unique[key] = e
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
entries = unique.values
|
75
|
+
end
|
76
|
+
entries = entries.sort_by { |item| -item["score"] }.take(topN)
|
77
|
+
|
78
|
+
resp = {
|
79
|
+
data: [],
|
80
|
+
}
|
81
|
+
|
82
|
+
entries.each do |item|
|
83
|
+
resp[:data] << {
|
84
|
+
path: item["path"],
|
85
|
+
lookup: item["lookup"],
|
86
|
+
id: item["id"],
|
87
|
+
url: item["url"],
|
88
|
+
text: item["reader"].load.get_chunk(item["chunk"]),
|
89
|
+
score: item["score"],
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
93
|
+
resp.to_json
|
54
94
|
end
|
55
95
|
|
56
|
-
|
96
|
+
# agentic query - expand the query using LLM before searching
|
97
|
+
post '/q_plus' do
|
98
|
+
content_type :json
|
99
|
+
|
100
|
+
data = JSON.parse(request.body.read)
|
101
|
+
|
102
|
+
lookup_paths = (data["paths"] || CONFIG.paths_map.keys).map do |name|
|
103
|
+
CONFIG.path_map[name]
|
104
|
+
end
|
105
|
+
|
106
|
+
topN = (data["topN"] || 20).to_i
|
107
|
+
|
108
|
+
expanded_q = expand_query(data["q"])
|
109
|
+
variants = expand_variants(data["q"])
|
57
110
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
entries.concat(retrieve_by_text(lookup_paths,
|
111
|
+
entries = []
|
112
|
+
entries.concat(retrieve_by_embedding(lookup_paths, data["q"]))
|
113
|
+
entries.concat(retrieve_by_embedding(lookup_paths, expanded_q))
|
114
|
+
variants.each { |v| entries.concat(retrieve_by_text(lookup_paths, v)) }
|
62
115
|
|
63
116
|
unique = {}
|
64
117
|
entries.each do |e|
|
@@ -70,98 +123,49 @@ post '/q' do
|
|
70
123
|
end
|
71
124
|
end
|
72
125
|
|
73
|
-
|
74
|
-
end
|
75
|
-
entries = entries.sort_by { |item| -item["score"] }.take(topN)
|
76
|
-
|
77
|
-
resp = {
|
78
|
-
data: [],
|
79
|
-
}
|
80
|
-
|
81
|
-
entries.each do |item|
|
82
|
-
resp[:data] << {
|
83
|
-
path: item["path"],
|
84
|
-
lookup: item["lookup"],
|
85
|
-
id: item["id"],
|
86
|
-
url: item["url"],
|
87
|
-
text: item["reader"].load.get_chunk(item["chunk"]),
|
88
|
-
score: item["score"],
|
89
|
-
}
|
90
|
-
end
|
91
|
-
|
92
|
-
resp.to_json
|
93
|
-
end
|
126
|
+
ordered = unique.values.sort_by { |item| -item["score"] }.take(topN)
|
94
127
|
|
95
|
-
|
96
|
-
|
97
|
-
|
128
|
+
resp = {
|
129
|
+
data: [],
|
130
|
+
expanded: expanded_q,
|
131
|
+
variants: variants,
|
132
|
+
}
|
98
133
|
|
99
|
-
|
134
|
+
ordered.each do |item|
|
135
|
+
resp[:data] << {
|
136
|
+
path: item["path"],
|
137
|
+
lookup: item["lookup"],
|
138
|
+
id: item["id"],
|
139
|
+
url: item["url"],
|
140
|
+
text: item["reader"].load.get_chunk(item["chunk"]),
|
141
|
+
score: item["score"],
|
142
|
+
}
|
143
|
+
end
|
100
144
|
|
101
|
-
|
102
|
-
CONFIG.path_map[name]
|
145
|
+
resp.to_json
|
103
146
|
end
|
104
147
|
|
105
|
-
|
148
|
+
# synthesize notes into a summary
|
149
|
+
post '/synthesize' do
|
150
|
+
content_type :json
|
106
151
|
|
107
|
-
|
108
|
-
variants = expand_variants(data["q"])
|
152
|
+
data = JSON.parse(request.body.read)
|
109
153
|
|
110
|
-
|
111
|
-
entries.concat(retrieve_by_embedding(lookup_paths, data["q"]))
|
112
|
-
entries.concat(retrieve_by_embedding(lookup_paths, expanded_q))
|
113
|
-
variants.each { |v| entries.concat(retrieve_by_text(lookup_paths, v)) }
|
154
|
+
summary = synthesize_notes(data["notes"])
|
114
155
|
|
115
|
-
|
116
|
-
entries.each do |e|
|
117
|
-
key = [e["path"], e["chunk"]]
|
118
|
-
if unique[key]
|
119
|
-
unique[key]["score"] = (unique[key]["score"] || 0) + (e["score"] || 0)
|
120
|
-
else
|
121
|
-
unique[key] = e
|
122
|
-
end
|
156
|
+
{ note: summary }.to_json
|
123
157
|
end
|
124
158
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
data: [],
|
129
|
-
expanded: expanded_q,
|
130
|
-
variants: variants,
|
131
|
-
}
|
132
|
-
|
133
|
-
ordered.each do |item|
|
134
|
-
resp[:data] << {
|
135
|
-
path: item["path"],
|
136
|
-
lookup: item["lookup"],
|
137
|
-
id: item["id"],
|
138
|
-
url: item["url"],
|
139
|
-
text: item["reader"].load.get_chunk(item["chunk"]),
|
140
|
-
score: item["score"],
|
141
|
-
}
|
142
|
-
end
|
143
|
-
|
144
|
-
resp.to_json
|
145
|
-
end
|
146
|
-
|
147
|
-
# synthesize notes into a summary
|
148
|
-
post '/synthesize' do
|
149
|
-
content_type :json
|
159
|
+
# generate discussion for a single note
|
160
|
+
post '/discuss' do
|
161
|
+
content_type :json
|
150
162
|
|
151
|
-
|
163
|
+
data = JSON.parse(request.body.read)
|
152
164
|
|
153
|
-
|
165
|
+
discussion = discuss_note(data["note"])
|
154
166
|
|
155
|
-
|
167
|
+
{ discussion: discussion }.to_json
|
168
|
+
end
|
156
169
|
end
|
157
170
|
|
158
|
-
|
159
|
-
post '/discuss' do
|
160
|
-
content_type :json
|
161
|
-
|
162
|
-
data = JSON.parse(request.body.read)
|
163
|
-
|
164
|
-
discussion = discuss_note(data["note"])
|
165
|
-
|
166
|
-
{ discussion: discussion }.to_json
|
167
|
-
end
|
171
|
+
SimpleRagServer.run!
|
data/lib/simple_rag/version.rb
CHANGED
data/lib/simple_rag.rb
CHANGED
File without changes
|
data/llm/embedding.rb
CHANGED
File without changes
|
data/llm/http.rb
CHANGED
File without changes
|
data/llm/ollama.rb
CHANGED
File without changes
|
data/llm/openai.rb
CHANGED
data/readers/check-reader.rb
CHANGED
File without changes
|
data/readers/note.rb
CHANGED
File without changes
|
data/readers/reader.rb
CHANGED
File without changes
|
data/readers/text.rb
CHANGED
File without changes
|
data/server/cache.rb
CHANGED
File without changes
|
data/server/discuss.rb
CHANGED
File without changes
|
data/server/retriever.rb
CHANGED
@@ -8,8 +8,8 @@ require_relative "../llm/embedding"
|
|
8
8
|
require_relative "../readers/reader"
|
9
9
|
|
10
10
|
AGENT_PROMPT = <<~PROMPT
|
11
|
-
|
12
|
-
documents. Return only the expanded query in a single line.
|
11
|
+
Expand the user input to a better search query so it is easier to retrieve related markdown
|
12
|
+
documents using embedding. Return only the expanded query in a single line.
|
13
13
|
PROMPT
|
14
14
|
|
15
15
|
def expand_query(q)
|
@@ -17,7 +17,11 @@ def expand_query(q)
|
|
17
17
|
{ role: ROLE_SYSTEM, content: AGENT_PROMPT },
|
18
18
|
{ role: ROLE_USER, content: q },
|
19
19
|
]
|
20
|
-
|
20
|
+
|
21
|
+
query = chat(msgs).strip
|
22
|
+
STDOUT << "Expand query: #{query}\n"
|
23
|
+
|
24
|
+
query
|
21
25
|
end
|
22
26
|
|
23
27
|
def retrieve_by_embedding(lookup_paths, q)
|
@@ -78,8 +82,8 @@ def extract_url(file_path, url)
|
|
78
82
|
end
|
79
83
|
|
80
84
|
VARIANT_PROMPT = <<~PROMPT
|
81
|
-
|
82
|
-
Return
|
85
|
+
Generate three alternative search keywords based on the user input to retrieve related markdown using exact keyword matches.
|
86
|
+
Return the search keywords in one CSV line.
|
83
87
|
PROMPT
|
84
88
|
|
85
89
|
def expand_variants(q)
|
@@ -87,7 +91,10 @@ def expand_variants(q)
|
|
87
91
|
{ role: ROLE_SYSTEM, content: VARIANT_PROMPT },
|
88
92
|
{ role: ROLE_USER, content: q },
|
89
93
|
]
|
90
|
-
|
94
|
+
|
95
|
+
variants = chat(msgs).split(',')
|
96
|
+
STDOUT << "Expand variants: #{variants}\n"
|
97
|
+
variants
|
91
98
|
end
|
92
99
|
|
93
100
|
def retrieve_by_text(lookup_paths, q)
|
data/server/synthesizer.rb
CHANGED
File without changes
|
data/storage/mem.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-rag-zc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zhuochun
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
11
|
date: 2025-06-07 00:00:00.000000000 Z
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '4.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rackup
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.2'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: puma
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,7 +85,7 @@ homepage: https://github.com/zhuochun/simple-rag
|
|
71
85
|
licenses:
|
72
86
|
- MIT
|
73
87
|
metadata: {}
|
74
|
-
post_install_message:
|
88
|
+
post_install_message:
|
75
89
|
rdoc_options: []
|
76
90
|
require_paths:
|
77
91
|
- lib
|
@@ -86,8 +100,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
100
|
- !ruby/object:Gem::Version
|
87
101
|
version: '0'
|
88
102
|
requirements: []
|
89
|
-
rubygems_version: 3.
|
90
|
-
signing_key:
|
103
|
+
rubygems_version: 3.4.10
|
104
|
+
signing_key:
|
91
105
|
specification_version: 4
|
92
106
|
summary: RAG on Markdown Files
|
93
107
|
test_files: []
|