rails_mcp_code_search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +176 -0
- data/Rakefile +10 -0
- data/exe/rails-mcp-code-search +5 -0
- data/lib/rails_mcp_code_search/background_worker.rb +150 -0
- data/lib/rails_mcp_code_search/chunk.rb +7 -0
- data/lib/rails_mcp_code_search/database.rb +96 -0
- data/lib/rails_mcp_code_search/embeddings/adapter.rb +13 -0
- data/lib/rails_mcp_code_search/embeddings/local_adapter.rb +29 -0
- data/lib/rails_mcp_code_search/embeddings/openai_adapter.rb +59 -0
- data/lib/rails_mcp_code_search/erb_parser.rb +89 -0
- data/lib/rails_mcp_code_search/indexer.rb +172 -0
- data/lib/rails_mcp_code_search/ruby_parser.rb +104 -0
- data/lib/rails_mcp_code_search/runtime.rb +63 -0
- data/lib/rails_mcp_code_search/server.rb +25 -0
- data/lib/rails_mcp_code_search/sliding_window_parser.rb +39 -0
- data/lib/rails_mcp_code_search/tools/base_tool.rb +26 -0
- data/lib/rails_mcp_code_search/tools/reindex_tool.rb +51 -0
- data/lib/rails_mcp_code_search/tools/search_tool.rb +128 -0
- data/lib/rails_mcp_code_search/tools/status_tool.rb +64 -0
- data/lib/rails_mcp_code_search/version.rb +3 -0
- data/lib/rails_mcp_code_search.rb +26 -0
- metadata +186 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 6e1c670e94a061746f627ec212078736dca6e7398a6ca1260872eb829ee53d31
|
|
4
|
+
data.tar.gz: 38cfa6d2f88765590d1db57432e5639637c0f4ef3a4fa075f9e668d54a423bb4
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 4e06c7f98f2bdaca1c3169d48d9753fdd0456ca3cc3e42f1e7951afd36c88dd704e2e00efe2b2ac268650dce37509c4488a9ee5e9039d9a0562e430dae01e845
|
|
7
|
+
data.tar.gz: 20a0b55ad4f2a8e1498697d3b16941a3361c1165b1e17a6eb1ef15aca82d5cf85f777de415810bf042c847733b7553f2df5911bcd7c6948acff94520db48c942
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Daniel Lopez Prat
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
<h1 style="margin-top: 10px;">rails_mcp_code_search</h1>
|
|
4
|
+
|
|
5
|
+
<h3>Semantic codebase search for Claude Code via MCP</h3>
|
|
6
|
+
|
|
7
|
+
<p>Think Cursor's codebase indexing, but for Claude Code.</p>
|
|
8
|
+
|
|
9
|
+
<div align="center">
|
|
10
|
+
<a href="https://github.com/6temes/rails_mcp_code_search/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/badge/license-MIT-green"/></a>
|
|
11
|
+
<a href="https://rubygems.org/gems/rails_mcp_code_search"><img alt="Gem Version" src="https://img.shields.io/gem/v/rails_mcp_code_search"/></a>
|
|
12
|
+
<a href="https://github.com/6temes/rails_mcp_code_search/actions"><img alt="CI" src="https://github.com/6temes/rails_mcp_code_search/actions/workflows/ci.yml/badge.svg"/></a>
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
<p>
|
|
16
|
+
<a href="#how-it-works">How It Works</a> ◆
|
|
17
|
+
<a href="#quick-start">Quick Start</a> ◆
|
|
18
|
+
<a href="#tools">Tools</a> ◆
|
|
19
|
+
<a href="#configuration">Configuration</a> ◆
|
|
20
|
+
<a href="#architecture">Architecture</a>
|
|
21
|
+
</p>
|
|
22
|
+
|
|
23
|
+
</div>
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## How It Works
|
|
28
|
+
|
|
29
|
+
- **Ruby files** — Parsed with [Prism](https://ruby.github.io/prism/) into classes, modules, and methods
|
|
30
|
+
- **ERB templates** — Parsed with [Herb](https://github.com/marcoroth/herb) into blocks, conditionals, and HTML elements
|
|
31
|
+
- **Other files** (JS, TS, YAML, Markdown) — Sliding window chunking
|
|
32
|
+
- **Embeddings** — Generated locally with [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) (384 dimensions, zero config)
|
|
33
|
+
- **Vector search** — SQLite + [sqlite-vec](https://alexgarcia.xyz/sqlite-vec/) for cosine similarity
|
|
34
|
+
- **Background indexing** — Starts automatically, search is available immediately
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
```sh
|
|
39
|
+
gem install rails_mcp_code_search
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Requires Ruby 4.0+. The first run downloads the embedding model (~80 MB) to `~/.cache/informers/`.
|
|
43
|
+
|
|
44
|
+
Add to `~/.claude/settings.json`:
|
|
45
|
+
|
|
46
|
+
```json
|
|
47
|
+
{
|
|
48
|
+
"mcpServers": {
|
|
49
|
+
"code-search": {
|
|
50
|
+
"command": "rails-mcp-code-search"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
That's it. Claude Code launches the server automatically when you open a project.
|
|
57
|
+
|
|
58
|
+
## Tools
|
|
59
|
+
|
|
60
|
+
### search
|
|
61
|
+
|
|
62
|
+
Search the codebase by concept or behavior using natural language.
|
|
63
|
+
|
|
64
|
+
```text
|
|
65
|
+
query: "user authentication logic"
|
|
66
|
+
limit: 10
|
|
67
|
+
file_pattern: "app/models/**/*.rb"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Returns ranked results with file path, line range, similarity score, and code content. On each search, changed files are automatically re-indexed with a 200ms time budget.
|
|
71
|
+
|
|
72
|
+
### reindex
|
|
73
|
+
|
|
74
|
+
Trigger a manual reindex. Returns immediately — runs in the background.
|
|
75
|
+
|
|
76
|
+
```text
|
|
77
|
+
full: true # Rebuild entire index
|
|
78
|
+
full: false # Incremental (default, only changed files)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### status
|
|
82
|
+
|
|
83
|
+
Check index health, chunk count, embedding provider, and search stats.
|
|
84
|
+
|
|
85
|
+
## Configuration
|
|
86
|
+
|
|
87
|
+
### Environment Variables
|
|
88
|
+
|
|
89
|
+
| Variable | Default | Description |
|
|
90
|
+
|---|---|---|
|
|
91
|
+
| `RAILS_MCP_CODE_SEARCH_PROVIDER` | `local` | Embedding provider: `local` or `openai` |
|
|
92
|
+
| `RAILS_MCP_CODE_SEARCH_DB_PATH` | auto | Override database file path |
|
|
93
|
+
| `RAILS_MCP_CODE_SEARCH_LOG_LEVEL` | `info` | Log level: `debug`, `info`, `warn`, `error` |
|
|
94
|
+
| `RAILS_MCP_CODE_SEARCH_OPENAI_API_KEY` | — | Required when provider is `openai` |
|
|
95
|
+
|
|
96
|
+
### OpenAI Provider
|
|
97
|
+
|
|
98
|
+
For faster indexing or higher-dimensional embeddings, use OpenAI's `text-embedding-3-small` (1536 dimensions):
|
|
99
|
+
|
|
100
|
+
```sh
|
|
101
|
+
export RAILS_MCP_CODE_SEARCH_PROVIDER=openai
|
|
102
|
+
export RAILS_MCP_CODE_SEARCH_OPENAI_API_KEY=sk-...
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
| | Local (default) | OpenAI |
|
|
106
|
+
|---|---|---|
|
|
107
|
+
| Dimensions | 384 | 1536 |
|
|
108
|
+
| Speed | ~5 min / 1000 files | ~15s / 1000 files |
|
|
109
|
+
| Cost | Free | Per-token API cost |
|
|
110
|
+
| Privacy | Everything stays local | Code sent to OpenAI |
|
|
111
|
+
| Setup | Zero config | Requires API key |
|
|
112
|
+
|
|
113
|
+
Switching providers triggers a full reindex automatically.
|
|
114
|
+
|
|
115
|
+
> **Privacy notice:** When using the OpenAI provider, source code chunks from your repository are sent to OpenAI's embedding API. The local provider (default) keeps everything on your machine.
|
|
116
|
+
|
|
117
|
+
### Supported File Types
|
|
118
|
+
|
|
119
|
+
`*.rb` `*.erb` `*.js` `*.ts` `*.yml` `*.yaml` `*.md`
|
|
120
|
+
|
|
121
|
+
Excluded: `vendor/` `node_modules/` `tmp/` `log/`
|
|
122
|
+
|
|
123
|
+
## Architecture
|
|
124
|
+
|
|
125
|
+
```text
|
|
126
|
+
┌─────────────────────────────────────────────────────────┐
|
|
127
|
+
│ Claude Code │
|
|
128
|
+
│ (MCP Client) │
|
|
129
|
+
└──────────────────────┬──────────────────────────────────┘
|
|
130
|
+
│ stdio
|
|
131
|
+
▼
|
|
132
|
+
┌─────────────────────────────────────────────────────────┐
|
|
133
|
+
│ MCP Server (search, reindex, status) │
|
|
134
|
+
└──────────────────────┬──────────────────────────────────┘
|
|
135
|
+
│
|
|
136
|
+
┌────────────┴────────────┐
|
|
137
|
+
▼ ▼
|
|
138
|
+
┌──────────────────┐ ┌──────────────────┐
|
|
139
|
+
│ Background │ │ Embedding │
|
|
140
|
+
│ Worker │ │ Adapter │
|
|
141
|
+
│ (sole writer) │ │ (local / openai) │
|
|
142
|
+
└────────┬─────────┘ └────────┬─────────┘
|
|
143
|
+
│ │
|
|
144
|
+
▼ ▼
|
|
145
|
+
┌─────────────────────────────────────────────┐
|
|
146
|
+
│ SQLite + sqlite-vec │
|
|
147
|
+
│ (WAL mode, per-project DB) │
|
|
148
|
+
│ ~/.local/share/rails-mcp-code-search/ │
|
|
149
|
+
└─────────────────────────────────────────────┘
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Parsers
|
|
153
|
+
|
|
154
|
+
| File Type | Parser | Chunk Types |
|
|
155
|
+
|-----------|--------|-------------|
|
|
156
|
+
| `*.rb` | Prism AST | `class`, `module`, `method` |
|
|
157
|
+
| `*.erb` | Herb AST | `erb_block`, `erb_conditional`, `html_element` |
|
|
158
|
+
| Everything else | Sliding window | `window` (50 lines, 10 overlap) |
|
|
159
|
+
|
|
160
|
+
### Key Design Decisions
|
|
161
|
+
|
|
162
|
+
- **Standalone ActiveRecord** — No Rails runtime dependency, just SQLite
|
|
163
|
+
- **Single writer thread** — All DB mutations go through the background worker
|
|
164
|
+
- **Smart reindex** — Changed files (via `git diff`) are re-indexed before each search
|
|
165
|
+
- **Per-project database** — SHA256 of the project path, stored in `~/.local/share/`
|
|
166
|
+
- **Prism + Herb AST** — Semantic chunking produces better search results than naive line splitting
|
|
167
|
+
|
|
168
|
+
## License
|
|
169
|
+
|
|
170
|
+
MIT
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
<div align="center">
|
|
175
|
+
<p>Made in Tokyo with ❤️ and 🤖</p>
|
|
176
|
+
</div>
|
data/Rakefile
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
module RailsMcpCodeSearch
|
|
2
|
+
class BackgroundWorker
|
|
3
|
+
REINDEX_COOLDOWN = 60
|
|
4
|
+
|
|
5
|
+
attr_reader :state, :progress, :errors
|
|
6
|
+
|
|
7
|
+
def initialize(indexer:, logger: nil)
|
|
8
|
+
@indexer = indexer
|
|
9
|
+
@logger = logger
|
|
10
|
+
@queue = Queue.new
|
|
11
|
+
@mutex = Mutex.new
|
|
12
|
+
@state = :idle
|
|
13
|
+
@progress = 0.0
|
|
14
|
+
@errors = []
|
|
15
|
+
@stop_requested = false
|
|
16
|
+
@last_full_reindex_at = nil
|
|
17
|
+
@hit_count_buffer = []
|
|
18
|
+
@search_count = 0
|
|
19
|
+
@reindex_count = 0
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def start
|
|
23
|
+
@thread = Thread.new { run }
|
|
24
|
+
enqueue(:full_index)
|
|
25
|
+
self
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def stop
|
|
29
|
+
@stop_requested = true
|
|
30
|
+
@queue.push(:shutdown)
|
|
31
|
+
@thread&.join(5)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def enqueue(work_type, payload: nil)
|
|
35
|
+
@queue.push({ type: work_type, payload: })
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def enqueue_hit_counts(chunk_ids)
|
|
39
|
+
@mutex.synchronize { @hit_count_buffer.concat(chunk_ids) }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def increment_search_count
|
|
43
|
+
@mutex.synchronize { @search_count += 1 }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def increment_reindex_count
|
|
47
|
+
@mutex.synchronize { @reindex_count += 1 }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def wait_for_reindex(timeout: 0.2)
|
|
51
|
+
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
|
|
52
|
+
loop do
|
|
53
|
+
return true if @mutex.synchronize { @state } == :idle
|
|
54
|
+
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
55
|
+
return false if remaining <= 0
|
|
56
|
+
sleep([ remaining, 0.05 ].min)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def run
|
|
63
|
+
ActiveRecord::Base.connection_pool.with_connection do
|
|
64
|
+
loop do
|
|
65
|
+
break if @stop_requested
|
|
66
|
+
|
|
67
|
+
work = begin
|
|
68
|
+
@queue.pop(true)
|
|
69
|
+
rescue ThreadError
|
|
70
|
+
flush_counters
|
|
71
|
+
sleep 0.1
|
|
72
|
+
next
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
break if work == :shutdown
|
|
76
|
+
|
|
77
|
+
process_work(work)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
rescue => e
|
|
81
|
+
set_state :error
|
|
82
|
+
@mutex.synchronize { @errors << e.message }
|
|
83
|
+
log(:error, "Background worker crashed: #{e.message}")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def process_work(work)
|
|
87
|
+
case work[:type]
|
|
88
|
+
when :full_index
|
|
89
|
+
return if on_cooldown?
|
|
90
|
+
set_state :indexing
|
|
91
|
+
@indexer.index_all
|
|
92
|
+
@mutex.synchronize do
|
|
93
|
+
@errors = @indexer.errors
|
|
94
|
+
@last_full_reindex_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
95
|
+
@reindex_count += 1
|
|
96
|
+
end
|
|
97
|
+
set_state :idle
|
|
98
|
+
when :index_files
|
|
99
|
+
set_state :indexing
|
|
100
|
+
@indexer.index_files(work[:payload])
|
|
101
|
+
@mutex.synchronize { @errors = @indexer.errors }
|
|
102
|
+
set_state :idle
|
|
103
|
+
when :flush
|
|
104
|
+
flush_counters
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def on_cooldown?
|
|
109
|
+
@mutex.synchronize do
|
|
110
|
+
return false unless @last_full_reindex_at
|
|
111
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - @last_full_reindex_at
|
|
112
|
+
elapsed < REINDEX_COOLDOWN
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def flush_counters
|
|
117
|
+
ids = @mutex.synchronize { @hit_count_buffer.shift(@hit_count_buffer.size) }
|
|
118
|
+
if ids.any?
|
|
119
|
+
Chunk.where(id: ids).update_all("hit_count = hit_count + 1")
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
search_count, reindex_count = @mutex.synchronize do
|
|
123
|
+
counts = [ @search_count, @reindex_count ]
|
|
124
|
+
@search_count = 0
|
|
125
|
+
@reindex_count = 0
|
|
126
|
+
counts
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
if search_count > 0
|
|
130
|
+
current = Database::Metadata.get("total_searches").to_i
|
|
131
|
+
Database::Metadata.set "total_searches", current + search_count
|
|
132
|
+
Database::Metadata.set "last_search_at", Time.now.iso8601
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
if reindex_count > 0
|
|
136
|
+
current = Database::Metadata.get("total_reindexes").to_i
|
|
137
|
+
Database::Metadata.set "total_reindexes", current + reindex_count
|
|
138
|
+
Database::Metadata.set "last_reindex_at", Time.now.iso8601
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def set_state(new_state)
|
|
143
|
+
@mutex.synchronize { @state = new_state }
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def log(level, message)
|
|
147
|
+
@logger&.send(level, message)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
require "active_record"
|
|
2
|
+
require "neighbor"
|
|
3
|
+
require "digest"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
|
|
6
|
+
module RailsMcpCodeSearch
|
|
7
|
+
module Database
|
|
8
|
+
DATA_DIR = File.join(ENV.fetch("XDG_DATA_HOME", File.join(Dir.home, ".local", "share")), "rails-mcp-code-search")
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def setup(project_path: Dir.pwd, db_path: nil)
|
|
12
|
+
path = db_path || db_path_for(project_path)
|
|
13
|
+
ensure_data_directory(File.dirname(path))
|
|
14
|
+
|
|
15
|
+
Neighbor::SQLite.initialize!
|
|
16
|
+
|
|
17
|
+
ActiveRecord::Base.establish_connection(
|
|
18
|
+
adapter: "sqlite3",
|
|
19
|
+
database: path,
|
|
20
|
+
pool: 5,
|
|
21
|
+
timeout: 5000
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
configure_pragmas
|
|
25
|
+
create_schema
|
|
26
|
+
Metadata.set "project_path", project_path
|
|
27
|
+
|
|
28
|
+
File.chmod 0o600, path if File.exist?(path)
|
|
29
|
+
|
|
30
|
+
path
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def db_path_for(project_path)
|
|
34
|
+
digest = Digest::SHA256.hexdigest(project_path)
|
|
35
|
+
File.join(DATA_DIR, "#{digest}.db")
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def ensure_data_directory(dir)
|
|
41
|
+
FileUtils.mkdir_p dir, mode: 0o700
|
|
42
|
+
noindex = File.join(dir, ".noindex")
|
|
43
|
+
FileUtils.touch noindex unless File.exist?(noindex)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def configure_pragmas
|
|
47
|
+
ActiveRecord::Base.connection.execute "PRAGMA journal_mode=WAL"
|
|
48
|
+
ActiveRecord::Base.connection.execute "PRAGMA synchronous=NORMAL"
|
|
49
|
+
ActiveRecord::Base.connection.execute "PRAGMA cache_size=-64000"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def create_schema
|
|
53
|
+
ActiveRecord::Schema.define do
|
|
54
|
+
unless table_exists?(:chunks)
|
|
55
|
+
create_table :chunks do |t|
|
|
56
|
+
t.text :file_path, null: false
|
|
57
|
+
t.integer :line_start, null: false
|
|
58
|
+
t.integer :line_end, null: false
|
|
59
|
+
t.text :chunk_type, null: false
|
|
60
|
+
t.text :qualified_name
|
|
61
|
+
t.text :content, null: false
|
|
62
|
+
t.text :checksum, null: false
|
|
63
|
+
t.binary :embedding
|
|
64
|
+
t.integer :hit_count, default: 0
|
|
65
|
+
t.timestamps
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
add_index :chunks, :file_path
|
|
69
|
+
add_index :chunks, :checksum
|
|
70
|
+
add_index :chunks, :hit_count, order: { hit_count: :desc }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
unless table_exists?(:metadata)
|
|
74
|
+
create_table :metadata, id: false do |t|
|
|
75
|
+
t.text :key, null: false, primary_key: true
|
|
76
|
+
t.text :value, null: false
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
class Metadata < ActiveRecord::Base
|
|
84
|
+
self.table_name = "metadata"
|
|
85
|
+
self.primary_key = "key"
|
|
86
|
+
|
|
87
|
+
def self.get(key)
|
|
88
|
+
find_by(key:)&.value
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def self.set(key, value)
|
|
92
|
+
upsert({ key:, value: value.to_s }, unique_by: :key)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require "informers"
|
|
2
|
+
|
|
3
|
+
module RailsMcpCodeSearch
|
|
4
|
+
module Embeddings
|
|
5
|
+
class LocalAdapter < Adapter
|
|
6
|
+
DIMENSIONS = 384
|
|
7
|
+
MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@mutex = Mutex.new
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def embed(texts)
|
|
14
|
+
texts = Array(texts)
|
|
15
|
+
pipeline.call(texts)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def dimensions = DIMENSIONS
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def pipeline
|
|
23
|
+
@mutex.synchronize do
|
|
24
|
+
@_pipeline ||= Informers.pipeline("embedding", MODEL)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
require "openai"
|
|
2
|
+
|
|
3
|
+
module RailsMcpCodeSearch
|
|
4
|
+
module Embeddings
|
|
5
|
+
class OpenaiAdapter < Adapter
|
|
6
|
+
DIMENSIONS = 1536
|
|
7
|
+
MODEL = "text-embedding-3-small"
|
|
8
|
+
MAX_RETRIES = 3
|
|
9
|
+
MAX_CHARS = 15_000
|
|
10
|
+
|
|
11
|
+
ApiKeyError = Class.new(StandardError)
|
|
12
|
+
|
|
13
|
+
def initialize
|
|
14
|
+
api_key = ENV["RAILS_MCP_CODE_SEARCH_OPENAI_API_KEY"]
|
|
15
|
+
raise ApiKeyError, "RAILS_MCP_CODE_SEARCH_OPENAI_API_KEY environment variable is required for OpenAI provider" unless api_key
|
|
16
|
+
|
|
17
|
+
@client = OpenAI::Client.new(access_token: api_key)
|
|
18
|
+
@warned = false
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def embed(texts)
|
|
22
|
+
texts = Array(texts).map { truncate(_1) }
|
|
23
|
+
warn_once
|
|
24
|
+
|
|
25
|
+
retries = 0
|
|
26
|
+
begin
|
|
27
|
+
response = @client.embeddings(parameters: { model: MODEL, input: texts })
|
|
28
|
+
response.dig("data").sort_by { _1["index"] }.map { _1["embedding"] }
|
|
29
|
+
rescue Faraday::TooManyRequestsError
|
|
30
|
+
retries += 1
|
|
31
|
+
raise if retries > MAX_RETRIES
|
|
32
|
+
sleep(2**retries)
|
|
33
|
+
retry
|
|
34
|
+
rescue => e
|
|
35
|
+
raise sanitize_error(e)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def dimensions = DIMENSIONS
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def warn_once
|
|
44
|
+
return if @warned
|
|
45
|
+
@warned = true
|
|
46
|
+
$stderr.puts "[rails-mcp-code-search] WARNING: Source code from this repository will be sent to OpenAI's embedding API."
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def truncate(text)
|
|
50
|
+
text.size > MAX_CHARS ? text[0, MAX_CHARS] : text
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def sanitize_error(error)
|
|
54
|
+
message = error.message.gsub(/sk-[A-Za-z0-9_-]+/, "[REDACTED]")
|
|
55
|
+
StandardError.new(message)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
require "herb"
|
|
2
|
+
|
|
3
|
+
module RailsMcpCodeSearch
|
|
4
|
+
class ErbParser
|
|
5
|
+
Result = RubyParser::Result
|
|
6
|
+
MIN_LINES = 3
|
|
7
|
+
MAX_CHUNKS = 30
|
|
8
|
+
|
|
9
|
+
def self.parse(source, file_path: nil)
|
|
10
|
+
new.parse(source, file_path:)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def parse(source, file_path: nil)
|
|
14
|
+
result = Herb.parse(source)
|
|
15
|
+
return SlidingWindowParser.parse(source, file_path:) unless result.success?
|
|
16
|
+
|
|
17
|
+
visitor = Visitor.new(source)
|
|
18
|
+
visitor.visit(result.value)
|
|
19
|
+
chunks = deduplicate(visitor.chunks).first(MAX_CHUNKS)
|
|
20
|
+
|
|
21
|
+
chunks.empty? ? SlidingWindowParser.parse(source, file_path:) : chunks
|
|
22
|
+
rescue => _e
|
|
23
|
+
SlidingWindowParser.parse(source, file_path:)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def deduplicate(chunks)
|
|
29
|
+
chunks.reject do |chunk|
|
|
30
|
+
chunks.any? { _1 != chunk && _1.line_start <= chunk.line_start && _1.line_end >= chunk.line_end }
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
class Visitor < Herb::Visitor
|
|
35
|
+
attr_reader :chunks
|
|
36
|
+
|
|
37
|
+
def initialize(source)
|
|
38
|
+
super()
|
|
39
|
+
@source = source
|
|
40
|
+
@lines = source.lines
|
|
41
|
+
@chunks = []
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def visit_erb_block_node(node)
|
|
45
|
+
add_chunk node, "erb_block"
|
|
46
|
+
super
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def visit_erb_if_node(node)
|
|
50
|
+
add_chunk node, "erb_conditional"
|
|
51
|
+
super
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def visit_erb_unless_node(node)
|
|
55
|
+
add_chunk node, "erb_conditional"
|
|
56
|
+
super
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def visit_erb_case_node(node)
|
|
60
|
+
add_chunk node, "erb_conditional"
|
|
61
|
+
super
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def visit_html_element_node(node)
|
|
65
|
+
add_chunk node, "html_element"
|
|
66
|
+
super
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def add_chunk(node, type)
|
|
72
|
+
line_start = node.location.start.line
|
|
73
|
+
line_end = node.location.end.line
|
|
74
|
+
return if (line_end - line_start + 1) < MIN_LINES
|
|
75
|
+
|
|
76
|
+
content = @lines[(line_start - 1)..(line_end - 1)]&.join
|
|
77
|
+
return if content.nil? || content.strip.empty?
|
|
78
|
+
|
|
79
|
+
@chunks << Result.new(
|
|
80
|
+
content:,
|
|
81
|
+
line_start:,
|
|
82
|
+
line_end:,
|
|
83
|
+
chunk_type: type,
|
|
84
|
+
qualified_name: nil
|
|
85
|
+
)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|