simple-rag-zc 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1066044473e95484b129ea587a827a02f3f28adac32251df79ed9f8ee6ebaa08
4
- data.tar.gz: 947e9528045e8ac0d0e0845e4649c05020ad17198771453382ad6088f6e81e16
3
+ metadata.gz: 906d584b90596bde4fef5efef3f82cceb300284705d6c33023f84dff903f4d2e
4
+ data.tar.gz: 8d1c292cefc14246e918e06d44cdef48bf31548fd5f1aeaa375b527ee4603458
5
5
  SHA512:
6
- metadata.gz: f301b244bab50ae13b7163dfef9fd884a9b2f25b4a5b3b1b83216221b7955dbfbd82b196b55ffbf224cf8041177276b1426ef11ec6765e43281506ceec902030
7
- data.tar.gz: 586ff8501e40e2cbdd7f3f17ae5cbe055946534fc8b115a6a72644d257807e6a8ba6cacc8191b6e53dc7a319ea912137dd0bc0590fc94305eca83052181558e6
6
+ metadata.gz: 94d4c13cd41807bf416882f8241f2ea103d00bacf6662b9b901c92a2f4e65463bdae64d204d955850fcfa9703f36ade459a5d23df837be430e4b7373fa70aa1a
7
+ data.tar.gz: 79f7b78fa5f363b0c5c430f6a4d2af78485cf68b533e26006ef1548a535397a5ffed7e8f19f4d7a6602de1f9ccfdfe73c0f50687fe279796b4ab53d271ad7123
data/example_config.json CHANGED
@@ -15,7 +15,10 @@
15
15
  "reader": "text",
16
16
  "threshold": 0.3,
17
17
  "dir": "D:\\Studies\\tmp\\learning",
18
- "out": "D:\\Studies\\tmp\\learning-gpt1.dt"
18
+ "out": "D:\\Studies\\tmp\\learning\\learning.dt",
19
+ "nameMatch": "talks-*.md",
20
+ "url": "",
21
+ "searchDefault": false
19
22
  }
20
23
  ]
21
- }
24
+ }
data/exe/public/q.html CHANGED
@@ -114,7 +114,7 @@
114
114
  checkbox.type = 'checkbox';
115
115
  checkbox.id = item.name;
116
116
  checkbox.name = item.name;
117
- checkbox.checked = true;
117
+ checkbox.checked = !!item.searchDefault;
118
118
 
119
119
  const label = document.createElement('label');
120
120
  label.htmlFor = item.name;
@@ -228,63 +228,6 @@
228
228
  .catch(error => console.error('Error performing agent search:', error));
229
229
  }
230
230
 
231
- function performAgentSearch() {
232
- const query = searchInput.value;
233
- const checkedPaths = Array.from(pathsList.querySelectorAll('input[type="checkbox"]:checked'))
234
- .map(checkbox => checkbox.name);
235
-
236
- fetch('http://localhost:4567/q_plus', {
237
- method: 'POST',
238
- headers: {
239
- 'Content-Type': 'application/json',
240
- },
241
- body: JSON.stringify({
242
- q: query,
243
- paths: checkedPaths,
244
- })
245
- })
246
- .then(response => response.json())
247
- .then(resp => {
248
- responseContainer.innerHTML = '';
249
-
250
- if (!!resp.expanded) {
251
- const div = document.createElement('div');
252
- div.className = 'response-item';
253
- div.style.backgroundColor = textToLightColor("expanded");
254
- div.innerHTML = `<div><strong>Expanded Query:</strong> ${resp.expanded}</div>`;
255
- responseContainer.appendChild(div);
256
- }
257
-
258
- if (resp.variants && resp.variants.length > 0) {
259
- const div = document.createElement('div');
260
- div.className = 'response-item';
261
- div.style.backgroundColor = textToLightColor("variants");
262
- div.innerHTML = `
263
- <div><strong>Variants:</strong> ${resp.variants.join(', ')}</div>
264
- `;
265
- responseContainer.appendChild(div);
266
- }
267
-
268
- resp.data.forEach(item => {
269
- const div = document.createElement('div');
270
- div.className = 'response-item';
271
- div.style.backgroundColor = textToLightColor(item.lookup);
272
- div.dataset.note = item.text;
273
- div.innerHTML = `
274
- <div><strong>Path:</strong> <a href="${item.url}">${item.id}</a></div>
275
- <div><strong>Score:</strong> ${item.score}</div>
276
- <div class="markdown-content">${marked.parse(item.text)}</div>
277
- `;
278
- const btn = document.createElement('button');
279
- btn.className = 'discuss-button';
280
- btn.textContent = 'Discuss';
281
- btn.addEventListener('click', () => discussCard(div));
282
- div.appendChild(btn);
283
- responseContainer.appendChild(div);
284
- });
285
- })
286
- .catch(error => console.error('Error performing agent search:', error));
287
- }
288
231
 
289
232
  function textToLightColor(text) {
290
233
  // Generate a hash from the text
@@ -0,0 +1,136 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Setup SimpleRag</title>
6
+ <style>
7
+ body { font-family: Arial, sans-serif; margin: 20px; }
8
+ .path-item { margin-bottom: 20px; padding: 15px; border: 1px solid #ccc; }
9
+ .path-item input[type="text"], .path-item select { width: 300px; margin-bottom: 10px; }
10
+ .path-item label { display: block; margin-bottom: 5px; }
11
+ </style>
12
+ </head>
13
+ <body>
14
+ <h1>Setup SimpleRag Config</h1>
15
+ <form id="config-form">
16
+ <h2>Paths</h2>
17
+ <div id="paths"></div>
18
+ <button type="button" onclick="addPath()">Add Path</button>
19
+ <h2>Chat</h2>
20
+ <label>Provider: <input id="chat_provider" value="openai"></label><br>
21
+ <label>URL: <input id="chat_url" value=""></label><br>
22
+ <label>Model: <input id="chat_model" value="gpt-3.5-turbo-16k"></label>
23
+ <h2>Embedding</h2>
24
+ <label>Provider: <input id="emb_provider" value="openai"></label><br>
25
+ <label>URL: <input id="emb_url" value=""></label><br>
26
+ <label>Model: <input id="emb_model" value="text-embedding-3-small"></label>
27
+ <br><br>
28
+ <button type="submit">Save</button>
29
+ </form>
30
+ <script>
31
+ let READERS = [];
32
+
33
+ function fillReaderSelect(select, value){
34
+ select.innerHTML = READERS.map(r=>`<option value="${r}">${r}</option>`).join('');
35
+ if(value){ select.value = value; }
36
+ }
37
+
38
+ function createPathDiv(p){
39
+ const idx = document.querySelectorAll('.path-item').length;
40
+ const div = document.createElement('div');
41
+ div.className = 'path-item';
42
+ div.innerHTML = `
43
+ <label>Dir: <input type="text" class="pdir" id="dir_${idx}" value="${p?.dir||''}">
44
+ <input type="file" webkitdirectory directory style="display:none" id="dirsel_${idx}">
45
+ <button type="button" onclick="document.getElementById('dirsel_${idx}').click()">Select Folder</button></label>
46
+ <label>Name: <input type="text" class="pname" value="${p?.name||''}"></label>
47
+ <label>Reader: <select class="preader"></select></label>
48
+ <label>Threshold: <input type="text" class="pthreshold" value="${p?.threshold||0.3}"></label>
49
+ <label>Out: <input type="text" class="pout" value="${p?.out||''}"></label>
50
+ <label>NameMatch: <input type="text" class="pnamematch" value="${p?.nameMatch||''}"></label>
51
+ <label>URL: <input type="text" class="purl" value="${p?.url||''}"></label>
52
+ <label>Search Default: <input type="checkbox" class="psearchdefault" ${p?.searchDefault?'checked':''}></label>
53
+ <button type="button" onclick="this.parentNode.remove()">Remove</button>
54
+ `;
55
+ const dirInput = div.querySelector('#dir_'+idx);
56
+ const nameInput = div.querySelector('.pname');
57
+ const outInput = div.querySelector('.pout');
58
+
59
+ function updateNameOut(){
60
+ if(!dirInput.value) return;
61
+ const parts = dirInput.value.replace(/\\/g,'/').split('/').filter(Boolean);
62
+ const name = parts[parts.length-1] || '';
63
+ nameInput.value = name;
64
+ outInput.value = dirInput.value.replace(/[/\\]$/, '') + '/' + name + '.dt';
65
+ }
66
+
67
+ div.querySelector('#dirsel_'+idx).addEventListener('change', function(){
68
+ if(this.files.length>0){
69
+ const rel = this.files[0].webkitRelativePath;
70
+ const dir = rel.split('/')[0];
71
+ dirInput.value = dir;
72
+ updateNameOut();
73
+ }
74
+ });
75
+ dirInput.addEventListener('change', updateNameOut);
76
+ fillReaderSelect(div.querySelector('.preader'), p?.reader||'text');
77
+ return div;
78
+ }
79
+
80
+ function addPath(p){
81
+ document.getElementById('paths').appendChild(createPathDiv(p));
82
+ }
83
+
84
+ function loadConfig(readers){
85
+ READERS = readers;
86
+ fetch('/config').then(r=>r.json()).then(cfg=>{
87
+ if(cfg.chat){
88
+ document.getElementById('chat_provider').value = cfg.chat.provider||'openai';
89
+ document.getElementById('chat_url').value = cfg.chat.url||'';
90
+ document.getElementById('chat_model').value = cfg.chat.model||'gpt-3.5-turbo-16k';
91
+ }
92
+ if(cfg.embedding){
93
+ document.getElementById('emb_provider').value = cfg.embedding.provider||'openai';
94
+ document.getElementById('emb_url').value = cfg.embedding.url||'';
95
+ document.getElementById('emb_model').value = cfg.embedding.model||'text-embedding-3-small';
96
+ }
97
+ if(cfg.paths && cfg.paths.length>0){
98
+ cfg.paths.forEach(p=>addPath(p));
99
+ }else{
100
+ addPath();
101
+ }
102
+ });
103
+ }
104
+
105
+ fetch('/readers').then(r=>r.json()).then(loadConfig);
106
+
107
+ document.getElementById('config-form').addEventListener('submit', function(e){
108
+ e.preventDefault();
109
+ const paths=[];
110
+ document.querySelectorAll('.path-item').forEach(div=>{
111
+ paths.push({
112
+ dir: div.querySelector('.pdir').value,
113
+ name: div.querySelector('.pname').value,
114
+ reader: div.querySelector('.preader').value,
115
+ threshold: parseFloat(div.querySelector('.pthreshold').value)||0,
116
+ out: div.querySelector('.pout').value,
117
+ nameMatch: div.querySelector('.pnamematch').value,
118
+ url: div.querySelector('.purl').value,
119
+ searchDefault: div.querySelector('.psearchdefault').checked
120
+ });
121
+ });
122
+ const config={
123
+ chat:{provider:document.getElementById('chat_provider').value,
124
+ url:document.getElementById('chat_url').value,
125
+ model:document.getElementById('chat_model').value},
126
+ embedding:{provider:document.getElementById('emb_provider').value,
127
+ url:document.getElementById('emb_url').value,
128
+ model:document.getElementById('emb_model').value},
129
+ paths:paths
130
+ };
131
+ fetch('/save',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(config)})
132
+ .then(()=>alert('Saved'));
133
+ });
134
+ </script>
135
+ </body>
136
+ </html>
data/exe/run-index CHANGED
@@ -11,8 +11,7 @@ require "json"
11
11
  require "ostruct"
12
12
  require "digest"
13
13
 
14
- require_relative "../llm/openai"
15
- require_relative "../llm/embedding"
14
+ require_relative "../llm/llm"
16
15
  require_relative "../readers/reader"
17
16
 
18
17
  if ARGV.length != 1
data/exe/run-server CHANGED
@@ -23,6 +23,7 @@ end
23
23
  config = JSON.parse(File.read(ARGV[0]))
24
24
  CONFIG = OpenStruct.new(config)
25
25
  CONFIG.paths = CONFIG.paths.map { |p| OpenStruct.new(p) }
26
+ CONFIG.paths.each { |p| p.searchDefault = !!p.searchDefault }
26
27
  CONFIG.path_map = {}
27
28
  CONFIG.paths.each { |p| CONFIG.path_map[p.name] = p }
28
29
 
@@ -39,7 +40,7 @@ class SimpleRagServer < Sinatra::Application
39
40
 
40
41
  resp = []
41
42
  CONFIG.paths.each do |p|
42
- resp << { "name": p.name }
43
+ resp << { name: p.name, searchDefault: p.searchDefault }
43
44
  end
44
45
  resp.to_json
45
46
  end
@@ -50,9 +51,12 @@ class SimpleRagServer < Sinatra::Application
50
51
 
51
52
  data = JSON.parse(request.body.read)
52
53
 
53
- lookup_paths = (data["paths"] || CONFIG.paths_map.keys).map do |name|
54
- CONFIG.path_map[name]
54
+ selected = data["paths"]
55
+ if !selected || selected.empty?
56
+ selected = CONFIG.paths.select { |p| p.searchDefault }.map(&:name)
57
+ selected = CONFIG.path_map.keys if selected.empty?
55
58
  end
59
+ lookup_paths = selected.map { |name| CONFIG.path_map[name] }
56
60
 
57
61
  topN = (data["topN"] || 20).to_i
58
62
 
@@ -99,9 +103,12 @@ class SimpleRagServer < Sinatra::Application
99
103
 
100
104
  data = JSON.parse(request.body.read)
101
105
 
102
- lookup_paths = (data["paths"] || CONFIG.paths_map.keys).map do |name|
103
- CONFIG.path_map[name]
106
+ selected = data["paths"]
107
+ if !selected || selected.empty?
108
+ selected = CONFIG.paths.select { |p| p.searchDefault }.map(&:name)
109
+ selected = CONFIG.path_map.keys if selected.empty?
104
110
  end
111
+ lookup_paths = selected.map { |name| CONFIG.path_map[name] }
105
112
 
106
113
  topN = (data["topN"] || 20).to_i
107
114
 
data/exe/run-setup ADDED
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ # Setup a config JSON interactively via a local web page
5
+ #
6
+ # Usage: run-setup config.json
7
+
8
+ require "json"
9
+ require 'sinatra/base'
10
+ require_relative '../readers/reader'
11
+
12
+ if ARGV.length != 1
13
+ STDOUT << "Invalid arguments received, need a config file\n"
14
+ exit 1
15
+ end
16
+
17
+ config_path = File.expand_path(ARGV[0])
18
+
19
+ class SetupServer < Sinatra::Base
20
+ set :bind, '0.0.0.0'
21
+ set :port, 4568
22
+ set :public_folder, File.expand_path('public', __dir__)
23
+ set :config_path, nil
24
+
25
+ get '/' do
26
+ send_file File.join(settings.public_folder, 'setup.html')
27
+ end
28
+
29
+ get '/readers' do
30
+ content_type :json
31
+ READERS.to_json
32
+ end
33
+
34
+ get '/config' do
35
+ content_type :json
36
+
37
+ if File.exist?(settings.config_path)
38
+ File.read(settings.config_path)
39
+ else
40
+ {}.to_json
41
+ end
42
+ end
43
+
44
+ post '/save' do
45
+ content_type :json
46
+
47
+ data = JSON.parse(request.body.read)
48
+ File.write(settings.config_path, JSON.pretty_generate(data))
49
+
50
+ { status: 'ok' }.to_json
51
+ end
52
+ end
53
+
54
+ SetupServer.set :config_path, config_path
55
+ SetupServer.run!
@@ -1,3 +1,3 @@
1
1
  module SimpleRag
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
data/lib/simple_rag.rb CHANGED
@@ -8,8 +8,7 @@ $LOAD_PATH.unshift File.expand_path("..", __dir__)
8
8
  module SimpleRag
9
9
  end
10
10
 
11
- require "llm/openai"
12
- require "llm/embedding"
11
+ require "llm/llm"
13
12
  require "readers/reader"
14
13
  require "server/retriever"
15
14
  require "server/synthesizer"
data/llm/llm.rb ADDED
@@ -0,0 +1,57 @@
1
+ require_relative "openai"
2
+ require_relative "ollama"
3
+
4
+ ROLE_SYSTEM = "system"
5
+ ROLE_USER = "user"
6
+ ROLE_ASSISTANT = "assistant"
7
+ NEXT_ROLE = ->(role) { role != ROLE_USER ? ROLE_USER : ROLE_ASSISTANT }
8
+
9
+ # Fetch configuration value with defaults
10
+ # Supports Hash or OpenStruct configuration objects
11
+
12
+ def cfg(section, key, default)
13
+ return default unless defined?(CONFIG)
14
+ sec = CONFIG.send(section) if CONFIG.respond_to?(section)
15
+ return default unless sec
16
+
17
+ if sec.is_a?(Hash)
18
+ sec.fetch(key, default)
19
+ elsif sec.respond_to?(key)
20
+ val = sec.send(key)
21
+ val.nil? ? default : val
22
+ else
23
+ default
24
+ end
25
+ end
26
+
27
+ # Route chat requests based on provider configuration
28
+
29
+ def chat(messages, opts = {})
30
+ provider = cfg(:chat, 'provider', 'openai').downcase
31
+ case provider
32
+ when 'ollama'
33
+ model = cfg(:chat, 'model', 'llama2')
34
+ url = cfg(:chat, 'url', 'http://localhost:11434/api/chat')
35
+ ollama_chat(messages, model, url, opts)
36
+ else
37
+ model = cfg(:chat, 'model', 'gpt-4.1-mini')
38
+ url = cfg(:chat, 'url', 'https://api.openai.com/v1/chat/completions')
39
+ openai_chat(messages, model, url, opts)
40
+ end
41
+ end
42
+
43
+ # Route embedding requests based on provider configuration
44
+
45
+ def embedding(txts, opts = {})
46
+ provider = cfg(:embedding, 'provider', 'openai').downcase
47
+ case provider
48
+ when 'ollama'
49
+ model = cfg(:embedding, 'model', 'nomic-embed-text')
50
+ url = cfg(:embedding, 'url', 'http://localhost:11434/api/embeddings')
51
+ ollama_embedding(txts, model, url, opts)
52
+ else
53
+ model = cfg(:embedding, 'model', 'text-embedding-3-small')
54
+ url = cfg(:embedding, 'url', 'https://api.openai.com/v1/embeddings')
55
+ openai_embedding(txts, model, url, opts)
56
+ end
57
+ end
data/llm/ollama.rb CHANGED
@@ -1,13 +1,12 @@
1
1
  require_relative "http"
2
2
 
3
- def embedding_ollama(txts, opts = {})
3
+ def ollama_embedding(txts, model, url, opts = {})
4
4
  data = {
5
- "model" => "nomic-embed-text",
5
+ "model" => model,
6
6
  "prompt" => txts
7
7
  }.merge(opts)
8
8
 
9
- uri = "http://localhost:11434/api/embeddings"
10
- response = http_post(uri, nil, data)
9
+ response = http_post(url, nil, data)
11
10
 
12
11
  if response.code != "200"
13
12
  STDOUT << "Embedding error: #{response}\n"
@@ -16,4 +15,25 @@ def embedding_ollama(txts, opts = {})
16
15
 
17
16
  result = JSON.parse(response.body)
18
17
  result["embedding"]
18
+ end
19
+
20
+ def ollama_chat(messages, model, url, opts = {})
21
+ data = {
22
+ "model" => model,
23
+ "messages" => messages
24
+ }.merge(opts)
25
+
26
+ response = http_post(url, nil, data)
27
+
28
+ if response.code != "200"
29
+ STDOUT << "Chat error: #{response}\n"
30
+ exit 1
31
+ end
32
+
33
+ result = JSON.parse(response.body)
34
+ if result.is_a?(Hash) && result["message"]
35
+ result["message"]["content"]
36
+ else
37
+ result["choices"][0]["message"]["content"]
38
+ end
19
39
  end
data/llm/openai.rb CHANGED
@@ -1,18 +1,12 @@
1
1
  require_relative "http"
2
2
 
3
- ROLE_SYSTEM = "system"
4
- ROLE_USER = "user"
5
- ROLE_ASSISTANT = "assistant"
6
- NEXT_ROLE = ->(role) { role != ROLE_USER ? ROLE_USER : ROLE_ASSISTANT }
7
-
8
- def chat(messages, opts = {})
3
+ def openai_chat(messages, model, url, opts = {})
9
4
  data = {
10
- "model" => "gpt-4.1-mini",
5
+ "model" => model,
11
6
  "messages" => messages
12
7
  }.merge(opts)
13
8
 
14
- uri = "https://api.openai.com/v1/chat/completions"
15
- response = http_post(uri, OPENAI_KEY, data)
9
+ response = http_post(url, OPENAI_KEY, data)
16
10
 
17
11
  if response.code != "200"
18
12
  STDOUT << "Chat error: #{response}\n"
@@ -25,14 +19,13 @@ def chat(messages, opts = {})
25
19
  result["choices"][0]["message"]["content"]
26
20
  end
27
21
 
28
- def embedding(txts, opts = {})
22
+ def openai_embedding(txts, model, url, opts = {})
29
23
  data = {
30
- "model" => "text-embedding-3-small",
24
+ "model" => model,
31
25
  "input" => txts
32
26
  }.merge(opts)
33
27
 
34
- uri = "https://api.openai.com/v1/embeddings"
35
- response = http_post(uri, OPENAI_KEY, data)
28
+ response = http_post(url, OPENAI_KEY, data)
36
29
 
37
30
  if response.code != "200"
38
31
  STDOUT << "Embedding error: #{response.body}\n"
@@ -41,4 +34,4 @@ def embedding(txts, opts = {})
41
34
 
42
35
  result = JSON.parse(response.body)
43
36
  result["data"][0]["embedding"]
44
- end
37
+ end
@@ -0,0 +1,69 @@
1
+ class JournalReader
2
+ SKIP_HEADINGS = ["\u7CBE\u529B", "\u611F\u6069"]
3
+
4
+ attr_accessor :file, :chunks
5
+
6
+ def initialize(file)
7
+ @file = file
8
+ @loaded = false
9
+ @chunks = []
10
+ end
11
+
12
+ def load
13
+ return self if @loaded
14
+
15
+ parse_journal
16
+
17
+ @loaded = true
18
+ self
19
+ end
20
+
21
+ def get_chunk(idx)
22
+ @chunks[idx || 0]
23
+ end
24
+
25
+ private
26
+
27
+ def parse_journal
28
+ started = false
29
+ heading = nil
30
+ lines = []
31
+
32
+ File.foreach(@file) do |line|
33
+ line = line.chomp
34
+ next if line.strip.empty?
35
+
36
+ if !started
37
+ next unless line.start_with?("## ")
38
+ started = true
39
+ heading = line[3..].strip
40
+ lines = [clean_line(line)]
41
+ next
42
+ end
43
+
44
+ if line.start_with?("## ")
45
+ push_chunk(heading, lines)
46
+ heading = line[3..].strip
47
+ lines = [clean_line(line)]
48
+ next
49
+ end
50
+
51
+ next if line.lstrip.start_with?("<")
52
+
53
+ lines << clean_line(line)
54
+ end
55
+
56
+ push_chunk(heading, lines) if started
57
+ end
58
+
59
+ def push_chunk(heading, lines)
60
+ return if SKIP_HEADINGS.any? { |k| heading.include?(k) }
61
+ return if lines.length < 3
62
+
63
+ @chunks << lines.join("\n")
64
+ end
65
+
66
+ def clean_line(line)
67
+ line.gsub(/\[([^\]]+)\]\(([^\)]+)\)/, '\\1')
68
+ end
69
+ end
data/readers/reader.rb CHANGED
@@ -1,12 +1,21 @@
1
+ READERS = %w[text note journal]
2
+
1
3
  def get_reader(name)
2
- case name.downcase
4
+ case name.to_s.downcase
3
5
  when "text"
4
6
  require_relative "text"
5
- return TextReader
7
+ TextReader
6
8
  when "note"
7
9
  require_relative "note"
8
- return NoteReader
10
+ NoteReader
11
+ when "journal"
12
+ require_relative "journal"
13
+ JournalReader
9
14
  else
10
- return nil
15
+ nil
11
16
  end
12
- end
17
+ end
18
+
19
+ def available_readers
20
+ READERS
21
+ end
data/readers/text.rb CHANGED
@@ -12,13 +12,26 @@ class TextReader
12
12
  return self if @loaded
13
13
 
14
14
  chunk = ""
15
+ in_frontmatter = false
15
16
  File.foreach(@file) do |line|
16
- if line.start_with?(/- .+:/) || line.start_with?(' - [[') # yaml like
17
+ stripped = line.strip
18
+
19
+ if in_frontmatter
20
+ if stripped == '---' || stripped == '...'
21
+ in_frontmatter = false
22
+ end
23
+ next
24
+ elsif stripped == '---'
25
+ in_frontmatter = true
26
+ next
27
+ end
28
+
29
+ if line.start_with?('- ') && line.include?(':') || line.start_with?(' - [[')
17
30
  next
18
- elsif line.start_with?('<') # html like
31
+ elsif line.start_with?('<')
19
32
  next
20
33
  else
21
- chunk << line unless line.strip.empty?
34
+ chunk << line unless stripped.empty?
22
35
  end
23
36
  end
24
37
 
data/server/discuss.rb CHANGED
@@ -3,7 +3,7 @@ You provide a short discussion of a note from multiple perspectives.
3
3
  Focus on explaining key concepts succinctly.
4
4
  PROMPT
5
5
 
6
- require_relative "../llm/openai"
6
+ require_relative "../llm/llm"
7
7
 
8
8
  # note: string
9
9
  # Returns discussion text
data/server/retriever.rb CHANGED
@@ -1,10 +1,8 @@
1
1
  require "pathname"
2
2
 
3
3
  require_relative "cache"
4
-
5
- require_relative "../llm/openai"
4
+ require_relative "../llm/llm"
6
5
  require_relative "../llm/embedding"
7
-
8
6
  require_relative "../readers/reader"
9
7
 
10
8
  AGENT_PROMPT = <<~PROMPT
@@ -2,7 +2,7 @@ SUM_PROMPT = """You are an expert at combining notes.
2
2
  Given a collection of notes, synthesize them into a concise new note capturing the key points.
3
3
  """
4
4
 
5
- require_relative "../llm/openai"
5
+ require_relative "../llm/llm"
6
6
 
7
7
  # notes: array of strings
8
8
  # Returns summary text
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-rag-zc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zhuochun
@@ -58,21 +58,26 @@ email:
58
58
  executables:
59
59
  - run-index
60
60
  - run-server
61
+ - run-setup
61
62
  extensions: []
62
63
  extra_rdoc_files: []
63
64
  files:
64
65
  - README.md
65
66
  - example_config.json
66
67
  - exe/public/q.html
68
+ - exe/public/setup.html
67
69
  - exe/run-index
68
70
  - exe/run-server
71
+ - exe/run-setup
69
72
  - lib/simple_rag.rb
70
73
  - lib/simple_rag/version.rb
71
74
  - llm/embedding.rb
72
75
  - llm/http.rb
76
+ - llm/llm.rb
73
77
  - llm/ollama.rb
74
78
  - llm/openai.rb
75
79
  - readers/check-reader.rb
80
+ - readers/journal.rb
76
81
  - readers/note.rb
77
82
  - readers/reader.rb
78
83
  - readers/text.rb