robot_lab-document_store 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe6aeaf2a0fd6a0c1dd5b827a9382e3804cf9cf6e8573fdd9c75063fccd87a36
4
- data.tar.gz: 350a44adfb40048c467166a1bfc37dd169941a35c4f5e3dd56ace185e39ba27b
3
+ metadata.gz: 5add23edc59a87fac16aaab24cc061392c241b6d0e0065aa36832dc5681342d6
4
+ data.tar.gz: 9fb1b9c37b3dcdee87ede9c3e0ce43e6013b6964d1268c39c9cb83a1c0a389ff
5
5
  SHA512:
6
- metadata.gz: 05bc51a7b278d57d7bf8b1bb8535d5520ac41093112b0080c0f2f6e7eb554101d212b8c3065564f25a031ce3439f7b8ad9562fa93a9ef9a4091d78f3d20bdea2
7
- data.tar.gz: 62694a390bbcd9a2492466eef6102fd821ae670ca30b694283d25f73f4fa7c504e9c62e2e32509ebc2f3f7700d63499e74153111bd781bf190ea39c76f714137
6
+ metadata.gz: 3834e2af8b84030daa4b8641f1e86697dbb9d88d37e9ddc975f81a40df3d6b632e48a0459da012466dc2a695b1e9857ea7fbdb0f5524724578e9754b3b6556bc
7
+ data.tar.gz: c9d07c4c8b6d0f5bffc7c1e7a27c7d5c434a4ad85e6ed2336b6e751bd6e23c6a5ef1c6612c2650947a90928aa30561e0b6e5725ebb5bb2ed8611a23123633ff1
data/.rubocop.yml ADDED
@@ -0,0 +1,173 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ SuggestExtensions: false
4
+ TargetRubyVersion: 4.0
5
+ Exclude:
6
+ - 'examples/**/*'
7
+ - 'vendor/**/*'
8
+ - 'dead_code/**/*'
9
+
10
+ # ── Style: disabled cops ───────────────────────────────────────────────────
11
+ Style/StringLiterals:
12
+ Enabled: false
13
+
14
+ Style/StringLiteralsInInterpolation:
15
+ Enabled: false
16
+
17
+ Style/Documentation:
18
+ Enabled: false
19
+
20
+ # Ruby 4.0 freezes string literals by default
21
+ Style/FrozenStringLiteralComment:
22
+ Enabled: false
23
+
24
+ Style/IfUnlessModifier:
25
+ Enabled: false
26
+
27
+ Style/RescueModifier:
28
+ Enabled: false
29
+
30
+ Style/TrivialAccessors:
31
+ Enabled: false
32
+
33
+ Style/MultilineTernaryOperator:
34
+ Enabled: false
35
+
36
+ Style/SafeNavigation:
37
+ Enabled: false
38
+
39
+ Style/EmptyClassDefinition:
40
+ Enabled: false
41
+
42
+ Style/ClassAndModuleChildren:
43
+ Enabled: false
44
+
45
+ Style/RescueStandardError:
46
+ Enabled: false
47
+
48
+ Style/OneClassPerFile:
49
+ Enabled: false
50
+
51
+ # Both % and format/sprintf are acceptable
52
+ Style/FormatString:
53
+ Enabled: false
54
+
55
+ # String concatenation and interpolation are both acceptable
56
+ Style/StringConcatenation:
57
+ Enabled: false
58
+
59
+ # ── Layout ─────────────────────────────────────────────────────────────────
60
+ Layout/LineLength:
61
+ Max: 140
62
+
63
+ Layout/ExtraSpacing:
64
+ Enabled: false
65
+
66
+ Layout/HashAlignment:
67
+ Enabled: false
68
+
69
+ Layout/FirstHashElementIndentation:
70
+ Enabled: false
71
+
72
+ Layout/EmptyLineAfterGuardClause:
73
+ Enabled: false
74
+
75
+ # ── Naming ─────────────────────────────────────────────────────────────────
76
+ # Single-char params (c, e, n) are acceptable throughout
77
+ Naming/MethodParameterName:
78
+ Enabled: false
79
+
80
+ Naming/VariableNumber:
81
+ Exclude:
82
+ - 'test/**/*'
83
+
84
+ Naming/RescuedExceptionsVariableName:
85
+ Enabled: false
86
+
87
+ # set_results and similar explicit setters are clear and conventional
88
+ Naming/AccessorMethodName:
89
+ Enabled: false
90
+
91
+
92
+ # has_tool_calls? and similar are clear and conventional
93
+ Naming/PredicatePrefix:
94
+ Enabled: false
95
+
96
+ # Test helper methods don't need to follow predicate naming rules
97
+ Naming/PredicateMethod:
98
+ Exclude:
99
+ - 'test/**/*'
100
+
101
+ # ── Lint: relax noisy cops on intentional patterns ─────────────────────────
102
+ # Library and framework methods commonly accept args for API/documentation purposes
103
+ Lint/UnusedMethodArgument:
104
+ Enabled: false
105
+
106
+
107
+ Lint/EmptyBlock:
108
+ Exclude:
109
+ - 'test/**/*'
110
+
111
+ Lint/ConstantDefinitionInBlock:
112
+ Exclude:
113
+ - 'Rakefile'
114
+ - 'test/**/*'
115
+
116
+ # ── Gemspec ────────────────────────────────────────────────────────────────
117
+ Gemspec/DevelopmentDependencies:
118
+ EnforcedStyle: Gemfile
119
+
120
+ Gemspec/RequiredRubyVersion:
121
+ Enabled: false
122
+
123
+ Gemspec/OrderedDependencies:
124
+ Enabled: false
125
+
126
+ # ── Metrics ────────────────────────────────────────────────────────────────
127
+ # Framework-level code (routers, parsers, orchestrators) is inherently complex.
128
+ # Flog is the primary complexity gate — these RuboCop thresholds catch only
129
+ # egregious outliers without false-positiving every dispatch method.
130
+
131
+ Metrics/MethodLength:
132
+ Max: 35
133
+ CountAsOne:
134
+ - heredoc
135
+ - array
136
+ - hash
137
+ Exclude:
138
+ - 'test/**/*'
139
+
140
+ Metrics/AbcSize:
141
+ Max: 40
142
+ Exclude:
143
+ - 'test/**/*'
144
+
145
+ Metrics/ClassLength:
146
+ Max: 600
147
+ Exclude:
148
+ - 'test/**/*'
149
+
150
+ Metrics/ModuleLength:
151
+ Max: 200
152
+ Exclude:
153
+ - 'test/**/*'
154
+
155
+ Metrics/CyclomaticComplexity:
156
+ Max: 20
157
+ Exclude:
158
+ - 'test/**/*'
159
+
160
+ Metrics/PerceivedComplexity:
161
+ Max: 20
162
+ Exclude:
163
+ - 'test/**/*'
164
+
165
+ # Long method signatures with keyword args are a Ruby framework idiom
166
+ Metrics/ParameterLists:
167
+ Enabled: false
168
+
169
+ Metrics/BlockLength:
170
+ Exclude:
171
+ - 'Rakefile'
172
+ - '*.gemspec'
173
+ - 'test/**/*'
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ### Fixed
4
+ - Model name in README and docs corrected to `BAAI/bge-small-en-v1.5` (was incorrectly listed as `bge-base`)
5
+ - `register_extension` call guarded with `defined?(RobotLab) && RobotLab.respond_to?(:register_extension)` so the file loads safely without robot_lab core
6
+ - Instance variable `@fastembed_model` renamed from `@model` to eliminate shadowing risk
7
+ - `FASTEMBED_AVAILABLE` constant moved into `DocumentStore` class (was at module level)
8
+ - `STOP_WORDS` constant moved before `private` keyword (was defined after it)
9
+ - `sparse_cosine` parameter names corrected to `vec_a`/`vec_b`; uses `each_value` for the second vector
10
+
11
+ ### Added
12
+ - Full test suite covering fastembed path, TF-IDF fallback path, and cosine edge cases (27 tests, 44 assertions)
13
+ - SimpleCov branch coverage with thresholds (line: 95%, branch: 75%)
14
+ - `quality` Rake task: runs tests + coverage, RuboCop, and Flog in sequence
15
+ - Complete RBS type signatures in `sig/robot_lab/document_store.rbs`
16
+ - Example script `examples/01_basic_usage.rb` with companion Markdown documents
17
+
18
+ ### Changed
19
+ - Development dependencies moved from gemspec to Gemfile (per `Gemspec/DevelopmentDependencies` cop)
20
+ - Example renamed from `26_document_store.rb` to `01_basic_usage.rb`
21
+
3
22
  ## [0.1.0] - 2026-05-07
4
23
 
5
24
  - Initial release
data/README.md CHANGED
@@ -49,7 +49,7 @@ store = RobotLab::DocumentStore.new(
49
49
  )
50
50
  ```
51
51
 
52
- The default model is `"BAAI/bge-base-en-v1.5"`.
52
+ The default model is `"BAAI/bge-small-en-v1.5"`.
53
53
 
54
54
  ## Using with RobotLab Robots
55
55
 
data/Rakefile CHANGED
@@ -1,8 +1,116 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "bundler/gem_tasks"
4
- require "minitest/test_task"
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
5
5
 
6
- Minitest::TestTask.create
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << 'test'
8
+ t.libs << 'lib'
9
+ t.test_files = FileList['test/**/*_test.rb', 'test/**/test_*.rb'].exclude('**/*_helper.rb')
10
+ t.verbose = true
11
+ t.ruby_opts << '-rtest_helper'
12
+ end
7
13
 
8
14
  task default: :test
15
+
16
+ desc 'Run tests with verbose output'
17
+ task :test_verbose do
18
+ ENV['TESTOPTS'] = '--verbose'
19
+ Rake::Task[:test].invoke
20
+ end
21
+
22
+ desc 'Run a single test file'
23
+ task :test_file, [:file] do |_t, args|
24
+ ruby "test/#{args[:file]}"
25
+ end
26
+
27
+ desc 'Check code style with RuboCop'
28
+ task :rubocop do
29
+ sh 'bundle exec rubocop'
30
+ end
31
+
32
+ desc 'Auto-correct RuboCop offenses'
33
+ task :rubocop_fix do
34
+ sh 'bundle exec rubocop -a'
35
+ end
36
+
37
+ desc 'Check code complexity with Flog (warn >=20, fail >=50)'
38
+ task :flog_check do
39
+ require 'flog'
40
+
41
+ method_warn = 20.0
42
+ method_fail = 50.0
43
+
44
+ flogger = Flog.new(all: true)
45
+ flogger.flog(*Dir.glob('lib/**/*.rb'))
46
+
47
+ warnings = []
48
+ failures = []
49
+
50
+ flogger.each_by_score do |method, score|
51
+ next if method.end_with?('#none')
52
+
53
+ if score > method_fail
54
+ failures << "#{format('%.1f', score)}: #{method}"
55
+ elsif score > method_warn
56
+ warnings << "#{format('%.1f', score)}: #{method}"
57
+ end
58
+ end
59
+
60
+ unless warnings.empty?
61
+ puts "\nFlog warnings (#{method_warn}–#{method_fail}) — target for future refactoring:"
62
+ warnings.each { |v| puts " #{v}" }
63
+ end
64
+
65
+ if failures.empty?
66
+ puts "\nFlog: no methods exceed the failure threshold (>=#{method_fail})"
67
+ else
68
+ puts "\nFlog failures (>=#{method_fail}) — must be refactored:"
69
+ failures.each { |v| puts " #{v}" }
70
+ abort "\nFlog quality gate failed: #{failures.size} method(s) exceed #{method_fail}"
71
+ end
72
+ end
73
+
74
+ desc 'Run all quality checks: tests (with coverage), RuboCop, and Flog'
75
+ task :quality do
76
+ results = {}
77
+
78
+ puts "\n#{'=' * 60}"
79
+ puts 'Quality Gate: Tests + Coverage'
80
+ puts '=' * 60
81
+ results[:tests] = system('bundle exec rake test') ? :pass : :fail
82
+
83
+ puts "\n#{'=' * 60}"
84
+ puts 'Quality Gate: RuboCop'
85
+ puts '=' * 60
86
+ results[:rubocop] = system('bundle exec rubocop') ? :pass : :fail
87
+
88
+ puts "\n#{'=' * 60}"
89
+ puts 'Quality Gate: Flog Complexity'
90
+ puts '=' * 60
91
+ results[:flog] = system('bundle exec rake flog_check') ? :pass : :fail
92
+
93
+ puts "\n#{'=' * 60}"
94
+ puts 'Quality Summary'
95
+ puts '=' * 60
96
+ results.each do |gate, status|
97
+ icon = status == :pass ? 'PASS' : 'FAIL'
98
+ puts " [#{icon}] #{gate}"
99
+ end
100
+ puts '=' * 60
101
+
102
+ abort "\nQuality gate failed" if results.values.any?(:fail)
103
+ puts "\nAll quality gates passed."
104
+ end
105
+
106
+ namespace :docs do
107
+ desc 'Build MkDocs documentation'
108
+ task :build do
109
+ sh 'mkdocs build'
110
+ end
111
+
112
+ desc 'Serve MkDocs documentation locally on http://localhost:8000'
113
+ task :serve do
114
+ sh 'mkdocs serve'
115
+ end
116
+ end
@@ -0,0 +1,186 @@
1
+ # API Reference
2
+
3
+ All public methods of `RobotLab::DocumentStore`.
4
+
5
+ ## Constructor
6
+
7
+ ### `new(model_name: DEFAULT_MODEL)`
8
+
9
+ Creates a new, empty document store.
10
+
11
+ | Parameter | Type | Default | Description |
12
+ |-----------|------|---------|-------------|
13
+ | `model_name` | `String` | `"BAAI/bge-small-en-v1.5"` | fastembed model name. Ignored when fastembed is unavailable. |
14
+
15
+ ```ruby
16
+ # Default model
17
+ store = RobotLab::DocumentStore.new
18
+
19
+ # Custom model
20
+ store = RobotLab::DocumentStore.new(model_name: "BAAI/bge-base-en-v1.5")
21
+ ```
22
+
23
+ The embedding model is initialised lazily — no download or computation happens
24
+ at construction time.
25
+
26
+ ---
27
+
28
+ ## Writing Documents
29
+
30
+ ### `store(key, text) → self`
31
+
32
+ Embeds `text` and stores it under `key`. If a document already exists under that
33
+ key it is replaced. Embedding happens synchronously before the method returns.
34
+
35
+ | Parameter | Type | Description |
36
+ |-----------|------|-------------|
37
+ | `key` | `Symbol` \| `String` | Identifier for the document. Strings are converted to `Symbol` internally. |
38
+ | `text` | `String` | The document text to embed and store. |
39
+
40
+ **Returns:** `self` — supports method chaining.
41
+
42
+ ```ruby
43
+ store.store(:readme, File.read("README.md"))
44
+ .store(:changelog, File.read("CHANGELOG.md"))
45
+ .store(:guide, File.read("GUIDE.md"))
46
+ ```
47
+
48
+ ---
49
+
50
+ ## Searching
51
+
52
+ ### `search(query, limit: 5) → Array<Hash>`
53
+
54
+ Embeds `query` and returns the `limit` most similar documents ranked by cosine
55
+ similarity score descending.
56
+
57
+ | Parameter | Type | Default | Description |
58
+ |-----------|------|---------|-------------|
59
+ | `query` | `String` | — | Natural-language search query. |
60
+ | `limit` | `Integer` | `5` | Maximum number of results to return. |
61
+
62
+ **Returns:** `Array` of result hashes, each containing:
63
+
64
+ | Key | Type | Description |
65
+ |-----|------|-------------|
66
+ | `:key` | `Symbol` | The document key |
67
+ | `:text` | `String` | The stored document text |
68
+ | `:score` | `Float` | Cosine similarity score, range `0.0..1.0` |
69
+
70
+ Results are sorted by `:score` descending (most similar first). Returns `[]` if
71
+ the store is empty.
72
+
73
+ ```ruby
74
+ results = store.search("database connection pool exhausted", limit: 3)
75
+
76
+ results.each do |r|
77
+ puts "#{r[:key].to_s.ljust(24)} score=#{r[:score].round(3)}"
78
+ puts " #{r[:text][0, 80]}…"
79
+ end
80
+ ```
81
+
82
+ !!! tip "Score interpretation"
83
+ Scores above `0.7` indicate strong semantic similarity. Scores below `0.3`
84
+ typically indicate weak or no relationship. The exact thresholds depend on
85
+ the model and your document corpus.
86
+
87
+ ---
88
+
89
+ ## Reading Metadata
90
+
91
+ ### `size → Integer`
92
+
93
+ Returns the number of stored documents.
94
+
95
+ ```ruby
96
+ store.size # => 0
97
+ store.store(:a, "text")
98
+ store.size # => 1
99
+ ```
100
+
101
+ ### `keys → Array<Symbol>`
102
+
103
+ Returns the keys of all stored documents in insertion order.
104
+
105
+ ```ruby
106
+ store.store(:alpha, "…")
107
+ store.store(:beta, "…")
108
+ store.keys # => [:alpha, :beta]
109
+ ```
110
+
111
+ ### `empty? → Boolean`
112
+
113
+ Returns `true` if no documents are stored.
114
+
115
+ ```ruby
116
+ store.empty? # => true
117
+ store.store(:a, "text")
118
+ store.empty? # => false
119
+ ```
120
+
121
+ ---
122
+
123
+ ## Removing Documents
124
+
125
+ ### `delete(key) → self`
126
+
127
+ Removes the document stored under `key`. No-op if the key does not exist.
128
+
129
+ | Parameter | Type | Description |
130
+ |-----------|------|-------------|
131
+ | `key` | `Symbol` \| `String` | Key to remove. |
132
+
133
+ **Returns:** `self`.
134
+
135
+ ```ruby
136
+ store.delete(:outdated_doc)
137
+ store.delete("also_works_with_strings")
138
+ ```
139
+
140
+ ### `clear → self`
141
+
142
+ Removes all stored documents.
143
+
144
+ **Returns:** `self`.
145
+
146
+ ```ruby
147
+ store.clear
148
+ store.empty? # => true
149
+ ```
150
+
151
+ ---
152
+
153
+ ## Constants
154
+
155
+ ### `DEFAULT_MODEL`
156
+
157
+ ```ruby
158
+ RobotLab::DocumentStore::DEFAULT_MODEL # => "BAAI/bge-small-en-v1.5"
159
+ ```
160
+
161
+ The fastembed model used when no `model_name:` is specified.
162
+
163
+ ### `STOP_WORDS`
164
+
165
+ A frozen `Set<String>` of common English words excluded from TF-IDF indexing
166
+ (`a`, `an`, `the`, `is`, `are`, …). Only relevant when fastembed is unavailable.
167
+
168
+ ---
169
+
170
+ ## Thread Safety
171
+
172
+ All public methods are thread-safe. An internal `Mutex` serialises access to
173
+ the document hash. You can safely share a single `DocumentStore` instance across
174
+ Puma threads, Sidekiq workers, or Ractor-based agents.
175
+
176
+ ```ruby
177
+ # Safe: multiple threads can store and search concurrently
178
+ store = RobotLab::DocumentStore.new
179
+
180
+ threads = 10.times.map do |i|
181
+ Thread.new { store.store(:"doc_#{i}", "Document #{i} text content") }
182
+ end
183
+ threads.each(&:join)
184
+
185
+ store.size # => 10
186
+ ```
@@ -0,0 +1,140 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 820 540" font-family="Roboto Mono, monospace">
2
+ <!-- transparent background -->
3
+
4
+ <!-- ── Title ── -->
5
+ <text x="410" y="32" text-anchor="middle" font-size="16" font-weight="bold" fill="#e2e8f0">DocumentStore — Embedding Pipeline</text>
6
+
7
+ <!-- ══════════════════════════════════════════════════════
8
+ LEFT COLUMN — STORE PATH
9
+ ═══════════════════════════════════════════════════════ -->
10
+
11
+ <!-- store() label -->
12
+ <text x="160" y="72" text-anchor="middle" font-size="13" font-weight="bold" fill="#94a3b8">store(key, text)</text>
13
+
14
+ <!-- Input text box -->
15
+ <rect x="60" y="82" width="200" height="44" rx="6" fill="#1e293b" stroke="#475569" stroke-width="1.5"/>
16
+ <text x="160" y="100" text-anchor="middle" font-size="11" fill="#94a3b8">Document Text</text>
17
+ <text x="160" y="116" text-anchor="middle" font-size="10" fill="#64748b">"Postgres query slow…"</text>
18
+
19
+ <!-- arrow down -->
20
+ <line x1="160" y1="126" x2="160" y2="150" stroke="#475569" stroke-width="1.5" marker-end="url(#arr)"/>
21
+
22
+ <!-- fastembed decision diamond -->
23
+ <polygon points="160,152 210,176 160,200 110,176" fill="#1e3a5f" stroke="#3b82f6" stroke-width="1.5"/>
24
+ <text x="160" y="172" text-anchor="middle" font-size="10" fill="#93c5fd">fastembed</text>
25
+ <text x="160" y="185" text-anchor="middle" font-size="10" fill="#93c5fd">available?</text>
26
+
27
+ <!-- YES branch → passage_embed -->
28
+ <line x1="210" y1="176" x2="270" y2="176" stroke="#22c55e" stroke-width="1.5" marker-end="url(#arrGreen)"/>
29
+ <text x="237" y="169" text-anchor="middle" font-size="9" fill="#22c55e">yes</text>
30
+
31
+ <rect x="270" y="158" width="130" height="36" rx="6" fill="#14532d" stroke="#22c55e" stroke-width="1.5"/>
32
+ <text x="335" y="173" text-anchor="middle" font-size="10" fill="#86efac">passage_embed()</text>
33
+ <text x="335" y="187" text-anchor="middle" font-size="9" fill="#4ade80">dense Float[] (384d)</text>
34
+
35
+ <!-- NO branch → fallback_vector -->
36
+ <line x1="160" y1="200" x2="160" y2="224" stroke="#f59e0b" stroke-width="1.5" marker-end="url(#arrAmber)"/>
37
+ <text x="172" y="216" font-size="9" fill="#f59e0b">no</text>
38
+
39
+ <rect x="60" y="224" width="200" height="36" rx="6" fill="#451a03" stroke="#f59e0b" stroke-width="1.5"/>
40
+ <text x="160" y="239" text-anchor="middle" font-size="10" fill="#fcd34d">fallback_vector()</text>
41
+ <text x="160" y="253" text-anchor="middle" font-size="9" fill="#fbbf24">sparse Hash TF-IDF L2</text>
42
+
43
+ <!-- merge to store -->
44
+ <line x1="335" y1="194" x2="335" y2="310" stroke="#22c55e" stroke-width="1.2" stroke-dasharray="4,3"/>
45
+ <line x1="160" y1="260" x2="160" y2="310" stroke="#f59e0b" stroke-width="1.2" stroke-dasharray="4,3"/>
46
+ <line x1="160" y1="310" x2="248" y2="310" stroke="#475569" stroke-width="1.2"/>
47
+ <line x1="335" y1="310" x2="248" y2="310" stroke="#475569" stroke-width="1.2"/>
48
+ <line x1="248" y1="310" x2="248" y2="326" stroke="#475569" stroke-width="1.5" marker-end="url(#arr)"/>
49
+
50
+ <!-- @documents store -->
51
+ <rect x="148" y="326" width="200" height="44" rx="6" fill="#1e1b4b" stroke="#818cf8" stroke-width="1.5"/>
52
+ <text x="248" y="344" text-anchor="middle" font-size="11" fill="#a5b4fc">@documents</text>
53
+ <text x="248" y="360" text-anchor="middle" font-size="9" fill="#6366f1">{ key → { text, vector } }</text>
54
+
55
+ <!-- Mutex badge -->
56
+ <rect x="334" y="330" width="50" height="18" rx="4" fill="#312e81" stroke="#6366f1" stroke-width="1"/>
57
+ <text x="359" y="343" text-anchor="middle" font-size="9" fill="#c7d2fe">Mutex</text>
58
+
59
+ <!-- ══════════════════════════════════════════════════════
60
+ RIGHT COLUMN — SEARCH PATH
61
+ ═══════════════════════════════════════════════════════ -->
62
+
63
+ <!-- search() label -->
64
+ <text x="640" y="72" text-anchor="middle" font-size="13" font-weight="bold" fill="#94a3b8">search(query, limit:)</text>
65
+
66
+ <!-- Query text box -->
67
+ <rect x="540" y="82" width="200" height="44" rx="6" fill="#1e293b" stroke="#475569" stroke-width="1.5"/>
68
+ <text x="640" y="100" text-anchor="middle" font-size="11" fill="#94a3b8">Query String</text>
69
+ <text x="640" y="116" text-anchor="middle" font-size="10" fill="#64748b">"Why is my query slow?"</text>
70
+
71
+ <!-- arrow down -->
72
+ <line x1="640" y1="126" x2="640" y2="150" stroke="#475569" stroke-width="1.5" marker-end="url(#arr)"/>
73
+
74
+ <!-- decision diamond -->
75
+ <polygon points="640,152 690,176 640,200 590,176" fill="#1e3a5f" stroke="#3b82f6" stroke-width="1.5"/>
76
+ <text x="640" y="172" text-anchor="middle" font-size="10" fill="#93c5fd">fastembed</text>
77
+ <text x="640" y="185" text-anchor="middle" font-size="10" fill="#93c5fd">available?</text>
78
+
79
+ <!-- YES → query_embed -->
80
+ <line x1="690" y1="176" x2="750" y2="176" stroke="#22c55e" stroke-width="1.5" marker-end="url(#arrGreen)"/>
81
+ <text x="717" y="169" text-anchor="middle" font-size="9" fill="#22c55e">yes</text>
82
+
83
+ <rect x="750" y="158" width="56" height="36" rx="6" fill="#14532d" stroke="#22c55e" stroke-width="1.5"/>
84
+ <text x="778" y="173" text-anchor="middle" font-size="10" fill="#86efac">query_</text>
85
+ <text x="778" y="187" text-anchor="middle" font-size="10" fill="#86efac">embed()</text>
86
+
87
+ <!-- NO → fallback_vector -->
88
+ <line x1="640" y1="200" x2="640" y2="224" stroke="#f59e0b" stroke-width="1.5" marker-end="url(#arrAmber)"/>
89
+ <text x="652" y="216" font-size="9" fill="#f59e0b">no</text>
90
+
91
+ <rect x="540" y="224" width="200" height="36" rx="6" fill="#451a03" stroke="#f59e0b" stroke-width="1.5"/>
92
+ <text x="640" y="239" text-anchor="middle" font-size="10" fill="#fcd34d">fallback_vector()</text>
93
+ <text x="640" y="253" text-anchor="middle" font-size="9" fill="#fbbf24">sparse Hash TF-IDF L2</text>
94
+
95
+ <!-- query_vec arrow to cosine_similarity -->
96
+ <line x1="640" y1="260" x2="640" y2="326" stroke="#475569" stroke-width="1.5" marker-end="url(#arr)"/>
97
+ <line x1="778" y1="194" x2="778" y2="310" stroke="#22c55e" stroke-width="1.2" stroke-dasharray="4,3"/>
98
+ <line x1="778" y1="310" x2="700" y2="310" stroke="#475569" stroke-width="1.2"/>
99
+ <line x1="700" y1="310" x2="700" y2="326" stroke="#475569" stroke-width="1.5" marker-end="url(#arr)"/>
100
+
101
+ <!-- cosine_similarity box -->
102
+ <rect x="530" y="326" width="240" height="44" rx="6" fill="#1c1917" stroke="#d97706" stroke-width="1.5"/>
103
+ <text x="650" y="344" text-anchor="middle" font-size="11" fill="#fbbf24">cosine_similarity()</text>
104
+ <text x="650" y="360" text-anchor="middle" font-size="9" fill="#92400e">dot(q,p) / (‖q‖ · ‖p‖) → score 0..1</text>
105
+
106
+ <!-- @documents feeds cosine via arrow -->
107
+ <line x1="348" y1="348" x2="530" y2="348" stroke="#818cf8" stroke-width="1.5" marker-end="url(#arrPurple)"/>
108
+ <text x="435" y="341" text-anchor="middle" font-size="9" fill="#818cf8">stored vectors</text>
109
+
110
+ <!-- Results box -->
111
+ <rect x="530" y="412" width="240" height="44" rx="6" fill="#1e293b" stroke="#38bdf8" stroke-width="1.5"/>
112
+ <text x="650" y="430" text-anchor="middle" font-size="11" fill="#7dd3fc">Ranked Results</text>
113
+ <text x="650" y="446" text-anchor="middle" font-size="9" fill="#38bdf8">[{ key:, text:, score: }, …] sorted desc</text>
114
+
115
+ <line x1="650" y1="370" x2="650" y2="412" stroke="#475569" stroke-width="1.5" marker-end="url(#arr)"/>
116
+
117
+ <!-- ══════════════════════════════════════════════════════
118
+ BOTTOM — FALLBACK DETAIL
119
+ ═══════════════════════════════════════════════════════ -->
120
+
121
+ <text x="410" y="490" text-anchor="middle" font-size="11" fill="#64748b">
122
+ TF-IDF fallback: tokenise → strip stop words → Porter stem → L2-normalise → sparse cosine dot product
123
+ </text>
124
+
125
+ <!-- ── arrowhead markers ── -->
126
+ <defs>
127
+ <marker id="arr" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
128
+ <path d="M0,0 L0,6 L8,3 z" fill="#475569"/>
129
+ </marker>
130
+ <marker id="arrGreen" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
131
+ <path d="M0,0 L0,6 L8,3 z" fill="#22c55e"/>
132
+ </marker>
133
+ <marker id="arrAmber" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
134
+ <path d="M0,0 L0,6 L8,3 z" fill="#f59e0b"/>
135
+ </marker>
136
+ <marker id="arrPurple" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
137
+ <path d="M0,0 L0,6 L8,3 z" fill="#818cf8"/>
138
+ </marker>
139
+ </defs>
140
+ </svg>