simple-rag-zc 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/example_config.json +5 -2
- data/exe/public/q.html +1 -58
- data/exe/public/setup.html +136 -0
- data/exe/run-index +1 -2
- data/exe/run-server +12 -5
- data/exe/run-setup +55 -0
- data/lib/simple_rag/version.rb +1 -1
- data/lib/simple_rag.rb +1 -2
- data/llm/llm.rb +57 -0
- data/llm/ollama.rb +24 -4
- data/llm/openai.rb +7 -14
- data/readers/journal.rb +69 -0
- data/readers/reader.rb +14 -5
- data/readers/text.rb +16 -3
- data/server/discuss.rb +1 -1
- data/server/retriever.rb +1 -3
- data/server/synthesizer.rb +1 -1
- metadata +6 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 906d584b90596bde4fef5efef3f82cceb300284705d6c33023f84dff903f4d2e
|
4
|
+
data.tar.gz: 8d1c292cefc14246e918e06d44cdef48bf31548fd5f1aeaa375b527ee4603458
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 94d4c13cd41807bf416882f8241f2ea103d00bacf6662b9b901c92a2f4e65463bdae64d204d955850fcfa9703f36ade459a5d23df837be430e4b7373fa70aa1a
|
7
|
+
data.tar.gz: 79f7b78fa5f363b0c5c430f6a4d2af78485cf68b533e26006ef1548a535397a5ffed7e8f19f4d7a6602de1f9ccfdfe73c0f50687fe279796b4ab53d271ad7123
|
data/example_config.json
CHANGED
@@ -15,7 +15,10 @@
|
|
15
15
|
"reader": "text",
|
16
16
|
"threshold": 0.3,
|
17
17
|
"dir": "D:\\Studies\\tmp\\learning",
|
18
|
-
"out": "D:\\Studies\\tmp\\learning
|
18
|
+
"out": "D:\\Studies\\tmp\\learning\\learning.dt",
|
19
|
+
"nameMatch": "talks-*.md",
|
20
|
+
"url": "",
|
21
|
+
"searchDefault": false
|
19
22
|
}
|
20
23
|
]
|
21
|
-
}
|
24
|
+
}
|
data/exe/public/q.html
CHANGED
@@ -114,7 +114,7 @@
|
|
114
114
|
checkbox.type = 'checkbox';
|
115
115
|
checkbox.id = item.name;
|
116
116
|
checkbox.name = item.name;
|
117
|
-
checkbox.checked =
|
117
|
+
checkbox.checked = !!item.searchDefault;
|
118
118
|
|
119
119
|
const label = document.createElement('label');
|
120
120
|
label.htmlFor = item.name;
|
@@ -228,63 +228,6 @@
|
|
228
228
|
.catch(error => console.error('Error performing agent search:', error));
|
229
229
|
}
|
230
230
|
|
231
|
-
function performAgentSearch() {
|
232
|
-
const query = searchInput.value;
|
233
|
-
const checkedPaths = Array.from(pathsList.querySelectorAll('input[type="checkbox"]:checked'))
|
234
|
-
.map(checkbox => checkbox.name);
|
235
|
-
|
236
|
-
fetch('http://localhost:4567/q_plus', {
|
237
|
-
method: 'POST',
|
238
|
-
headers: {
|
239
|
-
'Content-Type': 'application/json',
|
240
|
-
},
|
241
|
-
body: JSON.stringify({
|
242
|
-
q: query,
|
243
|
-
paths: checkedPaths,
|
244
|
-
})
|
245
|
-
})
|
246
|
-
.then(response => response.json())
|
247
|
-
.then(resp => {
|
248
|
-
responseContainer.innerHTML = '';
|
249
|
-
|
250
|
-
if (!!resp.expanded) {
|
251
|
-
const div = document.createElement('div');
|
252
|
-
div.className = 'response-item';
|
253
|
-
div.style.backgroundColor = textToLightColor("expanded");
|
254
|
-
div.innerHTML = `<div><strong>Expanded Query:</strong> ${resp.expanded}</div>`;
|
255
|
-
responseContainer.appendChild(div);
|
256
|
-
}
|
257
|
-
|
258
|
-
if (resp.variants && resp.variants.length > 0) {
|
259
|
-
const div = document.createElement('div');
|
260
|
-
div.className = 'response-item';
|
261
|
-
div.style.backgroundColor = textToLightColor("variants");
|
262
|
-
div.innerHTML = `
|
263
|
-
<div><strong>Variants:</strong> ${resp.variants.join(', ')}</div>
|
264
|
-
`;
|
265
|
-
responseContainer.appendChild(div);
|
266
|
-
}
|
267
|
-
|
268
|
-
resp.data.forEach(item => {
|
269
|
-
const div = document.createElement('div');
|
270
|
-
div.className = 'response-item';
|
271
|
-
div.style.backgroundColor = textToLightColor(item.lookup);
|
272
|
-
div.dataset.note = item.text;
|
273
|
-
div.innerHTML = `
|
274
|
-
<div><strong>Path:</strong> <a href="${item.url}">${item.id}</a></div>
|
275
|
-
<div><strong>Score:</strong> ${item.score}</div>
|
276
|
-
<div class="markdown-content">${marked.parse(item.text)}</div>
|
277
|
-
`;
|
278
|
-
const btn = document.createElement('button');
|
279
|
-
btn.className = 'discuss-button';
|
280
|
-
btn.textContent = 'Discuss';
|
281
|
-
btn.addEventListener('click', () => discussCard(div));
|
282
|
-
div.appendChild(btn);
|
283
|
-
responseContainer.appendChild(div);
|
284
|
-
});
|
285
|
-
})
|
286
|
-
.catch(error => console.error('Error performing agent search:', error));
|
287
|
-
}
|
288
231
|
|
289
232
|
function textToLightColor(text) {
|
290
233
|
// Generate a hash from the text
|
@@ -0,0 +1,136 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<title>Setup SimpleRag</title>
|
6
|
+
<style>
|
7
|
+
body { font-family: Arial, sans-serif; margin: 20px; }
|
8
|
+
.path-item { margin-bottom: 20px; padding: 15px; border: 1px solid #ccc; }
|
9
|
+
.path-item input[type="text"], .path-item select { width: 300px; margin-bottom: 10px; }
|
10
|
+
.path-item label { display: block; margin-bottom: 5px; }
|
11
|
+
</style>
|
12
|
+
</head>
|
13
|
+
<body>
|
14
|
+
<h1>Setup SimpleRag Config</h1>
|
15
|
+
<form id="config-form">
|
16
|
+
<h2>Paths</h2>
|
17
|
+
<div id="paths"></div>
|
18
|
+
<button type="button" onclick="addPath()">Add Path</button>
|
19
|
+
<h2>Chat</h2>
|
20
|
+
<label>Provider: <input id="chat_provider" value="openai"></label><br>
|
21
|
+
<label>URL: <input id="chat_url" value=""></label><br>
|
22
|
+
<label>Model: <input id="chat_model" value="gpt-3.5-turbo-16k"></label>
|
23
|
+
<h2>Embedding</h2>
|
24
|
+
<label>Provider: <input id="emb_provider" value="openai"></label><br>
|
25
|
+
<label>URL: <input id="emb_url" value=""></label><br>
|
26
|
+
<label>Model: <input id="emb_model" value="text-embedding-3-small"></label>
|
27
|
+
<br><br>
|
28
|
+
<button type="submit">Save</button>
|
29
|
+
</form>
|
30
|
+
<script>
|
31
|
+
let READERS = [];
|
32
|
+
|
33
|
+
function fillReaderSelect(select, value){
|
34
|
+
select.innerHTML = READERS.map(r=>`<option value="${r}">${r}</option>`).join('');
|
35
|
+
if(value){ select.value = value; }
|
36
|
+
}
|
37
|
+
|
38
|
+
function createPathDiv(p){
|
39
|
+
const idx = document.querySelectorAll('.path-item').length;
|
40
|
+
const div = document.createElement('div');
|
41
|
+
div.className = 'path-item';
|
42
|
+
div.innerHTML = `
|
43
|
+
<label>Dir: <input type="text" class="pdir" id="dir_${idx}" value="${p?.dir||''}">
|
44
|
+
<input type="file" webkitdirectory directory style="display:none" id="dirsel_${idx}">
|
45
|
+
<button type="button" onclick="document.getElementById('dirsel_${idx}').click()">Select Folder</button></label>
|
46
|
+
<label>Name: <input type="text" class="pname" value="${p?.name||''}"></label>
|
47
|
+
<label>Reader: <select class="preader"></select></label>
|
48
|
+
<label>Threshold: <input type="text" class="pthreshold" value="${p?.threshold||0.3}"></label>
|
49
|
+
<label>Out: <input type="text" class="pout" value="${p?.out||''}"></label>
|
50
|
+
<label>NameMatch: <input type="text" class="pnamematch" value="${p?.nameMatch||''}"></label>
|
51
|
+
<label>URL: <input type="text" class="purl" value="${p?.url||''}"></label>
|
52
|
+
<label>Search Default: <input type="checkbox" class="psearchdefault" ${p?.searchDefault?'checked':''}></label>
|
53
|
+
<button type="button" onclick="this.parentNode.remove()">Remove</button>
|
54
|
+
`;
|
55
|
+
const dirInput = div.querySelector('#dir_'+idx);
|
56
|
+
const nameInput = div.querySelector('.pname');
|
57
|
+
const outInput = div.querySelector('.pout');
|
58
|
+
|
59
|
+
function updateNameOut(){
|
60
|
+
if(!dirInput.value) return;
|
61
|
+
const parts = dirInput.value.replace(/\\/g,'/').split('/').filter(Boolean);
|
62
|
+
const name = parts[parts.length-1] || '';
|
63
|
+
nameInput.value = name;
|
64
|
+
outInput.value = dirInput.value.replace(/[/\\]$/, '') + '/' + name + '.dt';
|
65
|
+
}
|
66
|
+
|
67
|
+
div.querySelector('#dirsel_'+idx).addEventListener('change', function(){
|
68
|
+
if(this.files.length>0){
|
69
|
+
const rel = this.files[0].webkitRelativePath;
|
70
|
+
const dir = rel.split('/')[0];
|
71
|
+
dirInput.value = dir;
|
72
|
+
updateNameOut();
|
73
|
+
}
|
74
|
+
});
|
75
|
+
dirInput.addEventListener('change', updateNameOut);
|
76
|
+
fillReaderSelect(div.querySelector('.preader'), p?.reader||'text');
|
77
|
+
return div;
|
78
|
+
}
|
79
|
+
|
80
|
+
function addPath(p){
|
81
|
+
document.getElementById('paths').appendChild(createPathDiv(p));
|
82
|
+
}
|
83
|
+
|
84
|
+
function loadConfig(readers){
|
85
|
+
READERS = readers;
|
86
|
+
fetch('/config').then(r=>r.json()).then(cfg=>{
|
87
|
+
if(cfg.chat){
|
88
|
+
document.getElementById('chat_provider').value = cfg.chat.provider||'openai';
|
89
|
+
document.getElementById('chat_url').value = cfg.chat.url||'';
|
90
|
+
document.getElementById('chat_model').value = cfg.chat.model||'gpt-3.5-turbo-16k';
|
91
|
+
}
|
92
|
+
if(cfg.embedding){
|
93
|
+
document.getElementById('emb_provider').value = cfg.embedding.provider||'openai';
|
94
|
+
document.getElementById('emb_url').value = cfg.embedding.url||'';
|
95
|
+
document.getElementById('emb_model').value = cfg.embedding.model||'text-embedding-3-small';
|
96
|
+
}
|
97
|
+
if(cfg.paths && cfg.paths.length>0){
|
98
|
+
cfg.paths.forEach(p=>addPath(p));
|
99
|
+
}else{
|
100
|
+
addPath();
|
101
|
+
}
|
102
|
+
});
|
103
|
+
}
|
104
|
+
|
105
|
+
fetch('/readers').then(r=>r.json()).then(loadConfig);
|
106
|
+
|
107
|
+
document.getElementById('config-form').addEventListener('submit', function(e){
|
108
|
+
e.preventDefault();
|
109
|
+
const paths=[];
|
110
|
+
document.querySelectorAll('.path-item').forEach(div=>{
|
111
|
+
paths.push({
|
112
|
+
dir: div.querySelector('.pdir').value,
|
113
|
+
name: div.querySelector('.pname').value,
|
114
|
+
reader: div.querySelector('.preader').value,
|
115
|
+
threshold: parseFloat(div.querySelector('.pthreshold').value)||0,
|
116
|
+
out: div.querySelector('.pout').value,
|
117
|
+
nameMatch: div.querySelector('.pnamematch').value,
|
118
|
+
url: div.querySelector('.purl').value,
|
119
|
+
searchDefault: div.querySelector('.psearchdefault').checked
|
120
|
+
});
|
121
|
+
});
|
122
|
+
const config={
|
123
|
+
chat:{provider:document.getElementById('chat_provider').value,
|
124
|
+
url:document.getElementById('chat_url').value,
|
125
|
+
model:document.getElementById('chat_model').value},
|
126
|
+
embedding:{provider:document.getElementById('emb_provider').value,
|
127
|
+
url:document.getElementById('emb_url').value,
|
128
|
+
model:document.getElementById('emb_model').value},
|
129
|
+
paths:paths
|
130
|
+
};
|
131
|
+
fetch('/save',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(config)})
|
132
|
+
.then(()=>alert('Saved'));
|
133
|
+
});
|
134
|
+
</script>
|
135
|
+
</body>
|
136
|
+
</html>
|
data/exe/run-index
CHANGED
data/exe/run-server
CHANGED
@@ -23,6 +23,7 @@ end
|
|
23
23
|
config = JSON.parse(File.read(ARGV[0]))
|
24
24
|
CONFIG = OpenStruct.new(config)
|
25
25
|
CONFIG.paths = CONFIG.paths.map { |p| OpenStruct.new(p) }
|
26
|
+
CONFIG.paths.each { |p| p.searchDefault = !!p.searchDefault }
|
26
27
|
CONFIG.path_map = {}
|
27
28
|
CONFIG.paths.each { |p| CONFIG.path_map[p.name] = p }
|
28
29
|
|
@@ -39,7 +40,7 @@ class SimpleRagServer < Sinatra::Application
|
|
39
40
|
|
40
41
|
resp = []
|
41
42
|
CONFIG.paths.each do |p|
|
42
|
-
resp << {
|
43
|
+
resp << { name: p.name, searchDefault: p.searchDefault }
|
43
44
|
end
|
44
45
|
resp.to_json
|
45
46
|
end
|
@@ -50,9 +51,12 @@ class SimpleRagServer < Sinatra::Application
|
|
50
51
|
|
51
52
|
data = JSON.parse(request.body.read)
|
52
53
|
|
53
|
-
|
54
|
-
|
54
|
+
selected = data["paths"]
|
55
|
+
if !selected || selected.empty?
|
56
|
+
selected = CONFIG.paths.select { |p| p.searchDefault }.map(&:name)
|
57
|
+
selected = CONFIG.path_map.keys if selected.empty?
|
55
58
|
end
|
59
|
+
lookup_paths = selected.map { |name| CONFIG.path_map[name] }
|
56
60
|
|
57
61
|
topN = (data["topN"] || 20).to_i
|
58
62
|
|
@@ -99,9 +103,12 @@ class SimpleRagServer < Sinatra::Application
|
|
99
103
|
|
100
104
|
data = JSON.parse(request.body.read)
|
101
105
|
|
102
|
-
|
103
|
-
|
106
|
+
selected = data["paths"]
|
107
|
+
if !selected || selected.empty?
|
108
|
+
selected = CONFIG.paths.select { |p| p.searchDefault }.map(&:name)
|
109
|
+
selected = CONFIG.path_map.keys if selected.empty?
|
104
110
|
end
|
111
|
+
lookup_paths = selected.map { |name| CONFIG.path_map[name] }
|
105
112
|
|
106
113
|
topN = (data["topN"] || 20).to_i
|
107
114
|
|
data/exe/run-setup
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
# Setup a config JSON interactively via a local web page
|
5
|
+
#
|
6
|
+
# Usage: run-setup config.json
|
7
|
+
|
8
|
+
require "json"
|
9
|
+
require 'sinatra/base'
|
10
|
+
require_relative '../readers/reader'
|
11
|
+
|
12
|
+
if ARGV.length != 1
|
13
|
+
STDOUT << "Invalid arguments received, need a config file\n"
|
14
|
+
exit 1
|
15
|
+
end
|
16
|
+
|
17
|
+
config_path = File.expand_path(ARGV[0])
|
18
|
+
|
19
|
+
class SetupServer < Sinatra::Base
|
20
|
+
set :bind, '0.0.0.0'
|
21
|
+
set :port, 4568
|
22
|
+
set :public_folder, File.expand_path('public', __dir__)
|
23
|
+
set :config_path, nil
|
24
|
+
|
25
|
+
get '/' do
|
26
|
+
send_file File.join(settings.public_folder, 'setup.html')
|
27
|
+
end
|
28
|
+
|
29
|
+
get '/readers' do
|
30
|
+
content_type :json
|
31
|
+
READERS.to_json
|
32
|
+
end
|
33
|
+
|
34
|
+
get '/config' do
|
35
|
+
content_type :json
|
36
|
+
|
37
|
+
if File.exist?(settings.config_path)
|
38
|
+
File.read(settings.config_path)
|
39
|
+
else
|
40
|
+
{}.to_json
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
post '/save' do
|
45
|
+
content_type :json
|
46
|
+
|
47
|
+
data = JSON.parse(request.body.read)
|
48
|
+
File.write(settings.config_path, JSON.pretty_generate(data))
|
49
|
+
|
50
|
+
{ status: 'ok' }.to_json
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
SetupServer.set :config_path, config_path
|
55
|
+
SetupServer.run!
|
data/lib/simple_rag/version.rb
CHANGED
data/lib/simple_rag.rb
CHANGED
data/llm/llm.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative "openai"
|
2
|
+
require_relative "ollama"
|
3
|
+
|
4
|
+
ROLE_SYSTEM = "system"
|
5
|
+
ROLE_USER = "user"
|
6
|
+
ROLE_ASSISTANT = "assistant"
|
7
|
+
NEXT_ROLE = ->(role) { role != ROLE_USER ? ROLE_USER : ROLE_ASSISTANT }
|
8
|
+
|
9
|
+
# Fetch configuration value with defaults
|
10
|
+
# Supports Hash or OpenStruct configuration objects
|
11
|
+
|
12
|
+
def cfg(section, key, default)
|
13
|
+
return default unless defined?(CONFIG)
|
14
|
+
sec = CONFIG.send(section) if CONFIG.respond_to?(section)
|
15
|
+
return default unless sec
|
16
|
+
|
17
|
+
if sec.is_a?(Hash)
|
18
|
+
sec.fetch(key, default)
|
19
|
+
elsif sec.respond_to?(key)
|
20
|
+
val = sec.send(key)
|
21
|
+
val.nil? ? default : val
|
22
|
+
else
|
23
|
+
default
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Route chat requests based on provider configuration
|
28
|
+
|
29
|
+
def chat(messages, opts = {})
|
30
|
+
provider = cfg(:chat, 'provider', 'openai').downcase
|
31
|
+
case provider
|
32
|
+
when 'ollama'
|
33
|
+
model = cfg(:chat, 'model', 'llama2')
|
34
|
+
url = cfg(:chat, 'url', 'http://localhost:11434/api/chat')
|
35
|
+
ollama_chat(messages, model, url, opts)
|
36
|
+
else
|
37
|
+
model = cfg(:chat, 'model', 'gpt-4.1-mini')
|
38
|
+
url = cfg(:chat, 'url', 'https://api.openai.com/v1/chat/completions')
|
39
|
+
openai_chat(messages, model, url, opts)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Route embedding requests based on provider configuration
|
44
|
+
|
45
|
+
def embedding(txts, opts = {})
|
46
|
+
provider = cfg(:embedding, 'provider', 'openai').downcase
|
47
|
+
case provider
|
48
|
+
when 'ollama'
|
49
|
+
model = cfg(:embedding, 'model', 'nomic-embed-text')
|
50
|
+
url = cfg(:embedding, 'url', 'http://localhost:11434/api/embeddings')
|
51
|
+
ollama_embedding(txts, model, url, opts)
|
52
|
+
else
|
53
|
+
model = cfg(:embedding, 'model', 'text-embedding-3-small')
|
54
|
+
url = cfg(:embedding, 'url', 'https://api.openai.com/v1/embeddings')
|
55
|
+
openai_embedding(txts, model, url, opts)
|
56
|
+
end
|
57
|
+
end
|
data/llm/ollama.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
require_relative "http"
|
2
2
|
|
3
|
-
def
|
3
|
+
def ollama_embedding(txts, model, url, opts = {})
|
4
4
|
data = {
|
5
|
-
"model" =>
|
5
|
+
"model" => model,
|
6
6
|
"prompt" => txts
|
7
7
|
}.merge(opts)
|
8
8
|
|
9
|
-
|
10
|
-
response = http_post(uri, nil, data)
|
9
|
+
response = http_post(url, nil, data)
|
11
10
|
|
12
11
|
if response.code != "200"
|
13
12
|
STDOUT << "Embedding error: #{response}\n"
|
@@ -16,4 +15,25 @@ def embedding_ollama(txts, opts = {})
|
|
16
15
|
|
17
16
|
result = JSON.parse(response.body)
|
18
17
|
result["embedding"]
|
18
|
+
end
|
19
|
+
|
20
|
+
def ollama_chat(messages, model, url, opts = {})
|
21
|
+
data = {
|
22
|
+
"model" => model,
|
23
|
+
"messages" => messages
|
24
|
+
}.merge(opts)
|
25
|
+
|
26
|
+
response = http_post(url, nil, data)
|
27
|
+
|
28
|
+
if response.code != "200"
|
29
|
+
STDOUT << "Chat error: #{response}\n"
|
30
|
+
exit 1
|
31
|
+
end
|
32
|
+
|
33
|
+
result = JSON.parse(response.body)
|
34
|
+
if result.is_a?(Hash) && result["message"]
|
35
|
+
result["message"]["content"]
|
36
|
+
else
|
37
|
+
result["choices"][0]["message"]["content"]
|
38
|
+
end
|
19
39
|
end
|
data/llm/openai.rb
CHANGED
@@ -1,18 +1,12 @@
|
|
1
1
|
require_relative "http"
|
2
2
|
|
3
|
-
|
4
|
-
ROLE_USER = "user"
|
5
|
-
ROLE_ASSISTANT = "assistant"
|
6
|
-
NEXT_ROLE = ->(role) { role != ROLE_USER ? ROLE_USER : ROLE_ASSISTANT }
|
7
|
-
|
8
|
-
def chat(messages, opts = {})
|
3
|
+
def openai_chat(messages, model, url, opts = {})
|
9
4
|
data = {
|
10
|
-
"model" =>
|
5
|
+
"model" => model,
|
11
6
|
"messages" => messages
|
12
7
|
}.merge(opts)
|
13
8
|
|
14
|
-
|
15
|
-
response = http_post(uri, OPENAI_KEY, data)
|
9
|
+
response = http_post(url, OPENAI_KEY, data)
|
16
10
|
|
17
11
|
if response.code != "200"
|
18
12
|
STDOUT << "Chat error: #{response}\n"
|
@@ -25,14 +19,13 @@ def chat(messages, opts = {})
|
|
25
19
|
result["choices"][0]["message"]["content"]
|
26
20
|
end
|
27
21
|
|
28
|
-
def
|
22
|
+
def openai_embedding(txts, model, url, opts = {})
|
29
23
|
data = {
|
30
|
-
"model" =>
|
24
|
+
"model" => model,
|
31
25
|
"input" => txts
|
32
26
|
}.merge(opts)
|
33
27
|
|
34
|
-
|
35
|
-
response = http_post(uri, OPENAI_KEY, data)
|
28
|
+
response = http_post(url, OPENAI_KEY, data)
|
36
29
|
|
37
30
|
if response.code != "200"
|
38
31
|
STDOUT << "Embedding error: #{response.body}\n"
|
@@ -41,4 +34,4 @@ def embedding(txts, opts = {})
|
|
41
34
|
|
42
35
|
result = JSON.parse(response.body)
|
43
36
|
result["data"][0]["embedding"]
|
44
|
-
end
|
37
|
+
end
|
data/readers/journal.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
class JournalReader
|
2
|
+
SKIP_HEADINGS = ["\u7CBE\u529B", "\u611F\u6069"]
|
3
|
+
|
4
|
+
attr_accessor :file, :chunks
|
5
|
+
|
6
|
+
def initialize(file)
|
7
|
+
@file = file
|
8
|
+
@loaded = false
|
9
|
+
@chunks = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def load
|
13
|
+
return self if @loaded
|
14
|
+
|
15
|
+
parse_journal
|
16
|
+
|
17
|
+
@loaded = true
|
18
|
+
self
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_chunk(idx)
|
22
|
+
@chunks[idx || 0]
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def parse_journal
|
28
|
+
started = false
|
29
|
+
heading = nil
|
30
|
+
lines = []
|
31
|
+
|
32
|
+
File.foreach(@file) do |line|
|
33
|
+
line = line.chomp
|
34
|
+
next if line.strip.empty?
|
35
|
+
|
36
|
+
if !started
|
37
|
+
next unless line.start_with?("## ")
|
38
|
+
started = true
|
39
|
+
heading = line[3..].strip
|
40
|
+
lines = [clean_line(line)]
|
41
|
+
next
|
42
|
+
end
|
43
|
+
|
44
|
+
if line.start_with?("## ")
|
45
|
+
push_chunk(heading, lines)
|
46
|
+
heading = line[3..].strip
|
47
|
+
lines = [clean_line(line)]
|
48
|
+
next
|
49
|
+
end
|
50
|
+
|
51
|
+
next if line.lstrip.start_with?("<")
|
52
|
+
|
53
|
+
lines << clean_line(line)
|
54
|
+
end
|
55
|
+
|
56
|
+
push_chunk(heading, lines) if started
|
57
|
+
end
|
58
|
+
|
59
|
+
def push_chunk(heading, lines)
|
60
|
+
return if SKIP_HEADINGS.any? { |k| heading.include?(k) }
|
61
|
+
return if lines.length < 3
|
62
|
+
|
63
|
+
@chunks << lines.join("\n")
|
64
|
+
end
|
65
|
+
|
66
|
+
def clean_line(line)
|
67
|
+
line.gsub(/\[([^\]]+)\]\(([^\)]+)\)/, '\\1')
|
68
|
+
end
|
69
|
+
end
|
data/readers/reader.rb
CHANGED
@@ -1,12 +1,21 @@
|
|
1
|
+
READERS = %w[text note journal]
|
2
|
+
|
1
3
|
def get_reader(name)
|
2
|
-
case name.downcase
|
4
|
+
case name.to_s.downcase
|
3
5
|
when "text"
|
4
6
|
require_relative "text"
|
5
|
-
|
7
|
+
TextReader
|
6
8
|
when "note"
|
7
9
|
require_relative "note"
|
8
|
-
|
10
|
+
NoteReader
|
11
|
+
when "journal"
|
12
|
+
require_relative "journal"
|
13
|
+
JournalReader
|
9
14
|
else
|
10
|
-
|
15
|
+
nil
|
11
16
|
end
|
12
|
-
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def available_readers
|
20
|
+
READERS
|
21
|
+
end
|
data/readers/text.rb
CHANGED
@@ -12,13 +12,26 @@ class TextReader
|
|
12
12
|
return self if @loaded
|
13
13
|
|
14
14
|
chunk = ""
|
15
|
+
in_frontmatter = false
|
15
16
|
File.foreach(@file) do |line|
|
16
|
-
|
17
|
+
stripped = line.strip
|
18
|
+
|
19
|
+
if in_frontmatter
|
20
|
+
if stripped == '---' || stripped == '...'
|
21
|
+
in_frontmatter = false
|
22
|
+
end
|
23
|
+
next
|
24
|
+
elsif stripped == '---'
|
25
|
+
in_frontmatter = true
|
26
|
+
next
|
27
|
+
end
|
28
|
+
|
29
|
+
if line.start_with?('- ') && line.include?(':') || line.start_with?(' - [[')
|
17
30
|
next
|
18
|
-
elsif line.start_with?('<')
|
31
|
+
elsif line.start_with?('<')
|
19
32
|
next
|
20
33
|
else
|
21
|
-
chunk << line unless
|
34
|
+
chunk << line unless stripped.empty?
|
22
35
|
end
|
23
36
|
end
|
24
37
|
|
data/server/discuss.rb
CHANGED
data/server/retriever.rb
CHANGED
data/server/synthesizer.rb
CHANGED
@@ -2,7 +2,7 @@ SUM_PROMPT = """You are an expert at combining notes.
|
|
2
2
|
Given a collection of notes, synthesize them into a concise new note capturing the key points.
|
3
3
|
"""
|
4
4
|
|
5
|
-
require_relative "../llm/
|
5
|
+
require_relative "../llm/llm"
|
6
6
|
|
7
7
|
# notes: array of strings
|
8
8
|
# Returns summary text
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-rag-zc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zhuochun
|
@@ -58,21 +58,26 @@ email:
|
|
58
58
|
executables:
|
59
59
|
- run-index
|
60
60
|
- run-server
|
61
|
+
- run-setup
|
61
62
|
extensions: []
|
62
63
|
extra_rdoc_files: []
|
63
64
|
files:
|
64
65
|
- README.md
|
65
66
|
- example_config.json
|
66
67
|
- exe/public/q.html
|
68
|
+
- exe/public/setup.html
|
67
69
|
- exe/run-index
|
68
70
|
- exe/run-server
|
71
|
+
- exe/run-setup
|
69
72
|
- lib/simple_rag.rb
|
70
73
|
- lib/simple_rag/version.rb
|
71
74
|
- llm/embedding.rb
|
72
75
|
- llm/http.rb
|
76
|
+
- llm/llm.rb
|
73
77
|
- llm/ollama.rb
|
74
78
|
- llm/openai.rb
|
75
79
|
- readers/check-reader.rb
|
80
|
+
- readers/journal.rb
|
76
81
|
- readers/note.rb
|
77
82
|
- readers/reader.rb
|
78
83
|
- readers/text.rb
|