codebase_index 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +95 -300
- data/exe/codebase-index-mcp +3 -31
- data/exe/codebase-index-mcp-http +3 -31
- data/lib/codebase_index/ast/method_extractor.rb +3 -8
- data/lib/codebase_index/ast/node.rb +28 -0
- data/lib/codebase_index/ast/parser.rb +53 -92
- data/lib/codebase_index/builder.rb +67 -4
- data/lib/codebase_index/cache/cache_middleware.rb +199 -0
- data/lib/codebase_index/cache/cache_store.rb +264 -0
- data/lib/codebase_index/cache/redis_cache_store.rb +116 -0
- data/lib/codebase_index/cache/solid_cache_store.rb +111 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +29 -24
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +7 -40
- data/lib/codebase_index/console/adapters/job_adapter.rb +68 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +7 -40
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +7 -40
- data/lib/codebase_index/console/bridge.rb +7 -0
- data/lib/codebase_index/console/console_response_renderer.rb +3 -7
- data/lib/codebase_index/console/embedded_executor.rb +2 -1
- data/lib/codebase_index/console/server.rb +1 -4
- data/lib/codebase_index/dependency_graph.rb +28 -19
- data/lib/codebase_index/embedding/indexer.rb +18 -8
- data/lib/codebase_index/embedding/openai.rb +27 -6
- data/lib/codebase_index/embedding/provider.rb +29 -2
- data/lib/codebase_index/evaluation/evaluator.rb +5 -12
- data/lib/codebase_index/extractor.rb +40 -44
- data/lib/codebase_index/extractors/action_cable_extractor.rb +9 -36
- data/lib/codebase_index/extractors/callback_analyzer.rb +22 -8
- data/lib/codebase_index/extractors/controller_extractor.rb +3 -93
- data/lib/codebase_index/extractors/decorator_extractor.rb +7 -14
- data/lib/codebase_index/extractors/engine_extractor.rb +20 -1
- data/lib/codebase_index/extractors/graphql_extractor.rb +4 -29
- data/lib/codebase_index/extractors/job_extractor.rb +11 -6
- data/lib/codebase_index/extractors/lib_extractor.rb +0 -31
- data/lib/codebase_index/extractors/mailer_extractor.rb +15 -85
- data/lib/codebase_index/extractors/manager_extractor.rb +1 -15
- data/lib/codebase_index/extractors/model_extractor.rb +20 -53
- data/lib/codebase_index/extractors/phlex_extractor.rb +8 -8
- data/lib/codebase_index/extractors/policy_extractor.rb +1 -24
- data/lib/codebase_index/extractors/poro_extractor.rb +0 -17
- data/lib/codebase_index/extractors/serializer_extractor.rb +12 -7
- data/lib/codebase_index/extractors/service_extractor.rb +1 -38
- data/lib/codebase_index/extractors/shared_utility_methods.rb +183 -1
- data/lib/codebase_index/extractors/validator_extractor.rb +3 -17
- data/lib/codebase_index/extractors/view_component_extractor.rb +10 -9
- data/lib/codebase_index/filename_utils.rb +32 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +1 -4
- data/lib/codebase_index/formatting/base.rb +0 -10
- data/lib/codebase_index/graph_analyzer.rb +1 -1
- data/lib/codebase_index/mcp/bootstrapper.rb +58 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +35 -34
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +29 -29
- data/lib/codebase_index/mcp/server.rb +59 -68
- data/lib/codebase_index/mcp/tool_response_renderer.rb +23 -0
- data/lib/codebase_index/notion/client.rb +2 -2
- data/lib/codebase_index/notion/mapper.rb +1 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +3 -11
- data/lib/codebase_index/notion/mappers/model_mapper.rb +20 -23
- data/lib/codebase_index/notion/mappers/shared.rb +22 -0
- data/lib/codebase_index/observability/health_check.rb +0 -2
- data/lib/codebase_index/observability/structured_logger.rb +12 -30
- data/lib/codebase_index/operator/pipeline_guard.rb +0 -7
- data/lib/codebase_index/resilience/index_validator.rb +3 -21
- data/lib/codebase_index/retrieval/context_assembler.rb +19 -7
- data/lib/codebase_index/retrieval/query_classifier.rb +14 -12
- data/lib/codebase_index/retrieval/ranker.rb +6 -2
- data/lib/codebase_index/retrieval/search_executor.rb +8 -19
- data/lib/codebase_index/retriever.rb +1 -9
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +5 -25
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +6 -7
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +58 -53
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +11 -7
- data/lib/codebase_index/session_tracer/file_store.rb +1 -8
- data/lib/codebase_index/session_tracer/redis_store.rb +1 -7
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +4 -13
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +1 -7
- data/lib/codebase_index/session_tracer/store.rb +14 -0
- data/lib/codebase_index/storage/metadata_store.rb +37 -10
- data/lib/codebase_index/storage/pgvector.rb +37 -5
- data/lib/codebase_index/storage/qdrant.rb +39 -6
- data/lib/codebase_index/storage/vector_store.rb +11 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +14 -10
- data/lib/codebase_index/token_utils.rb +19 -0
- data/lib/codebase_index/version.rb +1 -1
- data/lib/codebase_index.rb +25 -6
- data/lib/tasks/codebase_index.rake +2 -2
- metadata +11 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 982e7949df0e0db9249705ab9f009121c3c8156582c63712f0613fccc998337d
|
|
4
|
+
data.tar.gz: 4fb41c658901cd26606e44164da7059a7d62aa39c795a682020cfbb6252311be
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6b62fe0a1d8b0db683744214461ec5d0029e41cf4538b7313ce2701a3a985bf9b1d06ce955acb225db09e93aaa72bcbc5e3b40cd534f7d682d98d190a670d722
|
|
7
|
+
data.tar.gz: f0a948295982aa85951fa8cca96cc8c30b317176a53424fe18e50cc1d3e28b17df5fc9dfb6a191e6450a1318b3b0f81a0f2cdf20fe7326c0c4e492a7f8b47f70
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,66 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.3.1] - 2026-03-04
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- **Gemspec version** now reads from `version.rb` instead of being hardcoded — prevents version mismatch during gem builds
|
|
13
|
+
- **Release workflow** replaced `rake release` (fails on tag-triggered detached HEAD) with `gem build` + `gem push`
|
|
14
|
+
|
|
15
|
+
## [0.3.0] - 2026-03-04
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
|
|
19
|
+
- **Redis/SolidCache caching layer** for retrieval pipeline with TTL, namespace isolation, and nil-caching
|
|
20
|
+
- **Engine classification** — engines tagged as `:framework` or `:application` based on install path (handles Docker vendor paths)
|
|
21
|
+
- **Graph analysis staleness tracking** — `generated_at` timestamp and `graph_sha` for detecting stale analysis
|
|
22
|
+
- **Docker setup guide** (`docs/DOCKER_SETUP.md`) — split architecture, volume mounts, bridge mode, troubleshooting
|
|
23
|
+
- **Context7 documentation suite** — 10 new user-facing docs optimized for AI retrieval: FAQ, Troubleshooting, Architecture, Extractor Reference, WHY CodebaseIndex, MCP Tool Cookbook, and 3 Context7 skills
|
|
24
|
+
- **`context7.json`** configuration for controlling Context7 indexing scope
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- **Vendor path leak** in source file resolution across 9 extractors — framework gems under `vendor/bundle` no longer produce empty source
|
|
29
|
+
- **Prism cross-version compatibility** — handle API differences between Prism versions
|
|
30
|
+
- **`schema_sha`** now supports `db/structure.sql` fallback (not just `db/schema.rb`)
|
|
31
|
+
- **ViewComponent extractor** skips framework-internal components with no resolvable source file
|
|
32
|
+
- **HTTP connection reuse** and retry handling in embedding providers
|
|
33
|
+
- **DependencyGraph `to_h`** returns a dup to prevent cache pollution
|
|
34
|
+
- **MCP tool counts** corrected across all documentation (27 index / 31 console)
|
|
35
|
+
- **TROUBLESHOOTING.md** corrected: `config.extractors` controls retrieval scope, not which extractors run
|
|
36
|
+
|
|
37
|
+
### Changed
|
|
38
|
+
|
|
39
|
+
- **README streamlined** from 620 to 325 lines — added Quick Start, Documentation table; removed verbose sections in favor of links to dedicated docs
|
|
40
|
+
- **Internal rake tasks** (`retrieve`, `self_analyze`) hidden from `rails -T`
|
|
41
|
+
- **Estimated tokens memoization** removed to prevent stale values after source changes
|
|
42
|
+
- **Simplification sweep** — dead code removal, shared helper extraction, bug fixes across caching and retrieval layers
|
|
43
|
+
|
|
44
|
+
### Performance
|
|
45
|
+
|
|
46
|
+
- Critical hotspots fixed across extraction, storage, and retrieval pipelines
|
|
47
|
+
- `fetch_key` optimization for falsy value handling in cache layer
|
|
48
|
+
|
|
49
|
+
## [0.2.1] - 2026-02-19
|
|
50
|
+
|
|
51
|
+
### Changed
|
|
52
|
+
|
|
53
|
+
- Switch release workflow to RubyGems trusted publishing
|
|
54
|
+
|
|
55
|
+
## [0.2.0] - 2026-02-19
|
|
56
|
+
|
|
57
|
+
### Added
|
|
58
|
+
|
|
59
|
+
- **Embedded console MCP server** for zero-config Rails querying (no bridge process needed)
|
|
60
|
+
- **Console MCP setup guide** (`docs/CONSOLE_MCP_SETUP.md`) — stdio, Docker, HTTP/Rack, SSH bridge options
|
|
61
|
+
- **CODEOWNERS** and issue template configuration
|
|
62
|
+
|
|
63
|
+
### Fixed
|
|
64
|
+
|
|
65
|
+
- MCP gem compatibility and symbol key handling in embedded executor
|
|
66
|
+
- Duplicate URI warning in gemspec
|
|
67
|
+
|
|
8
68
|
## [0.1.0] - 2026-02-18
|
|
9
69
|
|
|
10
70
|
### Added
|
data/README.md
CHANGED
|
@@ -18,6 +18,21 @@ CodebaseIndex solves this by:
|
|
|
18
18
|
- **Tracking dependencies** bidirectionally so you can trace impact across the codebase
|
|
19
19
|
- **Enriching with git data** so you know what's actively changing vs. dormant
|
|
20
20
|
|
|
21
|
+
See [Why CodebaseIndex?](docs/WHY_CODEBASE_INDEX.md) for concrete before/after examples.
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Add to your Rails app's Gemfile, then:
|
|
27
|
+
bundle install
|
|
28
|
+
rails generate codebase_index:install
|
|
29
|
+
bundle exec rake codebase_index:extract
|
|
30
|
+
bundle exec rake codebase_index:stats
|
|
31
|
+
# Add the MCP server to .mcp.json (see below) and start asking questions
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
See [Getting Started](docs/GETTING_STARTED.md) for the full walkthrough including Docker, storage presets, and CI setup.
|
|
35
|
+
|
|
21
36
|
## Installation
|
|
22
37
|
|
|
23
38
|
Add to your Gemfile:
|
|
@@ -30,15 +45,26 @@ Then:
|
|
|
30
45
|
|
|
31
46
|
```bash
|
|
32
47
|
bundle install
|
|
48
|
+
rails generate codebase_index:install
|
|
49
|
+
rails db:migrate
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Create a minimal configuration:
|
|
53
|
+
|
|
54
|
+
```ruby
|
|
55
|
+
# config/initializers/codebase_index.rb
|
|
56
|
+
CodebaseIndex.configure do |config|
|
|
57
|
+
config.output_dir = Rails.root.join('tmp/codebase_index')
|
|
58
|
+
end
|
|
33
59
|
```
|
|
34
60
|
|
|
35
|
-
Or install directly:
|
|
61
|
+
Or install the gem directly:
|
|
36
62
|
|
|
37
63
|
```bash
|
|
38
64
|
gem install codebase_index
|
|
39
65
|
```
|
|
40
66
|
|
|
41
|
-
> **Requires Rails.** Extraction runs inside a booted Rails application using runtime introspection (`ActiveRecord::Base.descendants`, `Rails.application.routes`, etc.). The gem cannot extract from source files alone. See [Getting Started](docs/GETTING_STARTED.md) for setup.
|
|
67
|
+
> **Requires Rails.** Extraction runs inside a booted Rails application using runtime introspection (`ActiveRecord::Base.descendants`, `Rails.application.routes`, etc.). The gem cannot extract from source files alone. See [Getting Started](docs/GETTING_STARTED.md) for full setup details.
|
|
42
68
|
|
|
43
69
|
## Target Environment
|
|
44
70
|
|
|
@@ -99,85 +125,19 @@ Extraction runs inside the Rails application (via rake task) to access runtime i
|
|
|
99
125
|
|
|
100
126
|
### Extractors (34)
|
|
101
127
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
| Extractor | What it captures |
|
|
105
|
-
|-----------|-----------------|
|
|
106
|
-
| **ModelExtractor** | Schema (columns, indexes, FKs), associations, validations, callbacks (all 13 types), scopes, enums, inlined concerns. Chunks large models into summary/associations/callbacks/validations. |
|
|
107
|
-
| **ControllerExtractor** | Route mapping (verb → path → action), filter chains per action, response formats, permitted params. Per-action chunks with applicable filters and route context. |
|
|
108
|
-
| **ServiceExtractor** | Scans `app/services`, `app/interactors`, `app/operations`, `app/commands`, `app/use_cases`. Entry points, dependency injection, custom errors, return type inference. |
|
|
109
|
-
| **JobExtractor** | ActiveJob and Sidekiq workers. Queue config, retry/concurrency options, perform arguments, callbacks. |
|
|
110
|
-
| **MailerExtractor** | ActionMailer classes with defaults, per-action templates, callbacks, helper usage. |
|
|
111
|
-
| **ConfigurationExtractor** | Rails initializers from `config/initializers` and `config/environments`, plus behavioral profile from resolved `Rails.application.config`. |
|
|
112
|
-
| **RouteExtractor** | All Rails routes via runtime introspection of `Rails.application.routes`. |
|
|
113
|
-
| **MiddlewareExtractor** | Rack middleware stack as a single ordered unit. |
|
|
114
|
-
|
|
115
|
-
**UI Components**
|
|
116
|
-
|
|
117
|
-
| Extractor | What it captures |
|
|
118
|
-
|-----------|-----------------|
|
|
119
|
-
| **PhlexExtractor** | Phlex component slots, initialize params, sub-components, Stimulus controller references, route helpers. |
|
|
120
|
-
| **ViewComponentExtractor** | ViewComponent slots, template paths, preview classes, collection support. |
|
|
121
|
-
| **ViewTemplateExtractor** | ERB view templates with render calls, instance variables, helper usage. |
|
|
122
|
-
| **DecoratorExtractor** | Decorators, presenters, and form objects from `app/decorators`, `app/presenters`, `app/form_objects`. |
|
|
123
|
-
|
|
124
|
-
**Data Layer**
|
|
125
|
-
|
|
126
|
-
| Extractor | What it captures |
|
|
127
|
-
|-----------|-----------------|
|
|
128
|
-
| **ConcernExtractor** | ActiveSupport::Concern modules from `app/models/concerns` and `app/controllers/concerns`. |
|
|
129
|
-
| **PoroExtractor** | Plain Ruby objects in `app/models` (non-ActiveRecord classes, excluding concerns). |
|
|
130
|
-
| **SerializerExtractor** | ActiveModelSerializers, Blueprinter, Alba, and Draper. Auto-detects loaded serialization gems. |
|
|
131
|
-
| **ValidatorExtractor** | Custom ActiveModel validator classes with validation rules. |
|
|
132
|
-
| **ManagerExtractor** | SimpleDelegator subclasses — wrapped model, public methods, delegation chain. |
|
|
133
|
-
|
|
134
|
-
**API & Authorization**
|
|
135
|
-
|
|
136
|
-
| Extractor | What it captures |
|
|
137
|
-
|-----------|-----------------|
|
|
138
|
-
| **GraphQLExtractor** | graphql-ruby types, mutations, queries, resolvers, field metadata, authorization patterns. Produces 4 unit types. |
|
|
139
|
-
| **PunditExtractor** | Pundit authorization policies with action methods (index?, show?, create?, etc.). |
|
|
140
|
-
| **PolicyExtractor** | Domain policy classes with decision methods and eligibility rules. |
|
|
141
|
-
|
|
142
|
-
**Infrastructure**
|
|
143
|
-
|
|
144
|
-
| Extractor | What it captures |
|
|
145
|
-
|-----------|-----------------|
|
|
146
|
-
| **EngineExtractor** | Mounted Rails engines via runtime introspection with mount points and route counts. |
|
|
147
|
-
| **I18nExtractor** | Locale files from `config/locales` with translation key structures. |
|
|
148
|
-
| **ActionCableExtractor** | ActionCable channels with stream subscriptions, actions, broadcast patterns. |
|
|
149
|
-
| **ScheduledJobExtractor** | Scheduled jobs from `config/recurring.yml`, `config/sidekiq_cron.yml`, `config/schedule.rb`. |
|
|
150
|
-
| **RakeTaskExtractor** | Rake tasks from `lib/tasks/*.rake` with namespaces, dependencies, descriptions. |
|
|
151
|
-
| **MigrationExtractor** | ActiveRecord migrations with DDL metadata, table operations, reversibility, risk indicators. |
|
|
152
|
-
| **DatabaseViewExtractor** | SQL views from `db/views` (Scenic convention) with materialization and table references. |
|
|
153
|
-
| **StateMachineExtractor** | AASM, Statesman, and state_machines DSL definitions with states and transitions. |
|
|
154
|
-
| **EventExtractor** | Event publish/subscribe patterns (ActiveSupport::Notifications, Wisper). |
|
|
155
|
-
| **CachingExtractor** | Cache usage across controllers, models, and views — strategies, TTLs, cache keys. |
|
|
156
|
-
|
|
157
|
-
**Testing & Source**
|
|
158
|
-
|
|
159
|
-
| Extractor | What it captures |
|
|
160
|
-
|-----------|-----------------|
|
|
161
|
-
| **FactoryExtractor** | FactoryBot factory definitions with traits and associations. |
|
|
162
|
-
| **TestMappingExtractor** | Test file → subject class mapping with test counts and framework type. |
|
|
163
|
-
| **LibExtractor** | Ruby files from `lib/` (excluding tasks and generators). |
|
|
164
|
-
| **RailsSourceExtractor** | High-value Rails framework source and gem source pinned to exact installed versions. |
|
|
165
|
-
|
|
166
|
-
### Key Design Decisions
|
|
167
|
-
|
|
168
|
-
**Concern inlining.** When extracting a model, included concerns are read from disk and embedded as formatted comments directly in the model's source. This means the full behavioral picture is in one unit — no separate lookups needed during retrieval.
|
|
128
|
+
34 extractors cover every major Rails concept: models (with inlined concerns and schema), controllers (with route context), services, jobs, mailers, GraphQL types/mutations/resolvers, serializers, view components (Phlex and ViewComponent), ERB templates, decorators, concerns, validators, policies, routes, middleware, engines, i18n, Action Cable, rake tasks, migrations, database views, state machines, events, caching patterns, factories, test mappings, and Rails framework source pinned to exact installed versions.
|
|
169
129
|
|
|
170
|
-
|
|
130
|
+
See [docs/EXTRACTOR_REFERENCE.md](docs/EXTRACTOR_REFERENCE.md) for per-extractor documentation with configuration, edge cases, and example output.
|
|
171
131
|
|
|
172
|
-
|
|
132
|
+
### Key Design Decisions
|
|
173
133
|
|
|
174
|
-
**
|
|
134
|
+
**Concern inlining** — included concerns are embedded directly in the model's source. **Route prepending** — controllers get a route header showing HTTP verb → path → action. **Semantic chunking** — models split by purpose (associations, callbacks, validations), controllers split per-action. **Dependency graph with BFS blast radius** — forward and reverse edges enable change-impact traversal.
|
|
175
135
|
|
|
176
136
|
## MCP Servers
|
|
177
137
|
|
|
178
138
|
CodebaseIndex ships two [MCP](https://modelcontextprotocol.io/) servers for integrating with AI development tools (Claude Code, Cursor, Windsurf, etc.).
|
|
179
139
|
|
|
180
|
-
**Index Server** (
|
|
140
|
+
**Index Server** (27 tools) — Reads pre-extracted data from disk. No Rails boot required. Provides code lookup, dependency traversal, graph analysis, semantic search, pipeline management, feedback collection, and temporal snapshots.
|
|
181
141
|
|
|
182
142
|
```bash
|
|
183
143
|
codebase-index-mcp /path/to/rails-app/tmp/codebase_index
|
|
@@ -199,7 +159,7 @@ Add the servers to your project's `.mcp.json`:
|
|
|
199
159
|
{
|
|
200
160
|
"mcpServers": {
|
|
201
161
|
"codebase-index": {
|
|
202
|
-
"command": "codebase-index-mcp",
|
|
162
|
+
"command": "codebase-index-mcp-start",
|
|
203
163
|
"args": ["/path/to/rails-app/tmp/codebase_index"]
|
|
204
164
|
},
|
|
205
165
|
"codebase-console": {
|
|
@@ -211,217 +171,44 @@ Add the servers to your project's `.mcp.json`:
|
|
|
211
171
|
}
|
|
212
172
|
```
|
|
213
173
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
The **console server** runs embedded inside your Rails app (no config file needed). For Docker:
|
|
174
|
+
> **Recommended**: Use `codebase-index-mcp-start` instead of `codebase-index-mcp` for Claude Code. It validates the index directory exists, checks for a manifest, ensures dependencies are installed, and restarts automatically on failure.
|
|
217
175
|
|
|
218
|
-
|
|
219
|
-
{
|
|
220
|
-
"mcpServers": {
|
|
221
|
-
"codebase-console": {
|
|
222
|
-
"command": "docker",
|
|
223
|
-
"args": ["exec", "-i", "my_container", "bundle", "exec", "rake", "codebase_index:console"]
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
```
|
|
228
|
-
|
|
229
|
-
### Validation
|
|
230
|
-
|
|
231
|
-
Verify each server starts and lists its tools:
|
|
232
|
-
|
|
233
|
-
```bash
|
|
234
|
-
# Index server — should list 27 tools
|
|
235
|
-
echo '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' | \
|
|
236
|
-
codebase-index-mcp /path/to/rails-app/tmp/codebase_index
|
|
176
|
+
The **index server** reads from a pre-extracted directory — run `bundle exec rake codebase_index:extract` in your Rails app first.
|
|
237
177
|
|
|
238
|
-
|
|
239
|
-
echo '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' | \
|
|
240
|
-
bundle exec rake codebase_index:console
|
|
241
|
-
```
|
|
178
|
+
The **console server** runs embedded inside your Rails app (no config file needed). For Docker setups, see [docs/DOCKER_SETUP.md](docs/DOCKER_SETUP.md).
|
|
242
179
|
|
|
243
180
|
## Subsystems
|
|
244
181
|
|
|
245
182
|
```
|
|
246
|
-
lib/
|
|
247
|
-
├──
|
|
248
|
-
├──
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
│ │ ├── view_component_extractor.rb # ViewComponent
|
|
267
|
-
│ │ ├── graphql_extractor.rb # GraphQL types, mutations, queries
|
|
268
|
-
│ │ ├── serializer_extractor.rb # Serializers/decorators
|
|
269
|
-
│ │ ├── manager_extractor.rb # SimpleDelegator managers
|
|
270
|
-
│ │ ├── policy_extractor.rb # Policy classes
|
|
271
|
-
│ │ ├── validator_extractor.rb # Standalone validators
|
|
272
|
-
│ │ ├── rails_source_extractor.rb # Framework/gem source
|
|
273
|
-
│ │ ├── shared_dependency_scanner.rb # Shared dependency detection
|
|
274
|
-
│ │ ├── shared_utility_methods.rb # Shared extractor utilities
|
|
275
|
-
│ │ └── ast_source_extraction.rb # AST-based source extraction
|
|
276
|
-
│ │
|
|
277
|
-
│ ├── ast/ # Prism-based AST layer
|
|
278
|
-
│ │ ├── parser.rb # Source parsing adapter
|
|
279
|
-
│ │ ├── node.rb # Normalized AST node
|
|
280
|
-
│ │ ├── method_extractor.rb # Method boundary detection
|
|
281
|
-
│ │ └── call_site_extractor.rb # Call site analysis
|
|
282
|
-
│ │
|
|
283
|
-
│ ├── ruby_analyzer/ # Static analysis
|
|
284
|
-
│ │ ├── class_analyzer.rb # Class structure analysis
|
|
285
|
-
│ │ ├── method_analyzer.rb # Method complexity/dependencies
|
|
286
|
-
│ │ ├── dataflow_analyzer.rb # Data flow tracing
|
|
287
|
-
│ │ ├── trace_enricher.rb # Enriches flow traces
|
|
288
|
-
│ │ ├── fqn_builder.rb # Fully-qualified name resolution
|
|
289
|
-
│ │ └── mermaid_renderer.rb # Diagram generation
|
|
290
|
-
│ │
|
|
291
|
-
│ ├── flow_analysis/ # Execution flow tracing
|
|
292
|
-
│ │ ├── operation_extractor.rb # Extract operations from AST
|
|
293
|
-
│ │ └── response_code_mapper.rb # HTTP response mapping
|
|
294
|
-
│ ├── flow_assembler.rb # Assembles execution flows
|
|
295
|
-
│ ├── flow_document.rb # Flow documentation format
|
|
296
|
-
│ │
|
|
297
|
-
│ ├── chunking/ # Semantic chunking
|
|
298
|
-
│ │ ├── chunk.rb # Chunk value object
|
|
299
|
-
│ │ └── semantic_chunker.rb # Type-aware splitting
|
|
300
|
-
│ │
|
|
301
|
-
│ ├── embedding/ # Embedding pipeline
|
|
302
|
-
│ │ ├── provider.rb # Provider interface
|
|
303
|
-
│ │ ├── openai.rb # OpenAI adapter
|
|
304
|
-
│ │ ├── text_preparer.rb # Text preparation for embedding
|
|
305
|
-
│ │ └── indexer.rb # Batch indexing with resumability
|
|
306
|
-
│ │
|
|
307
|
-
│ ├── storage/ # Storage backends
|
|
308
|
-
│ │ ├── vector_store.rb # Vector store interface + InMemory
|
|
309
|
-
│ │ ├── metadata_store.rb # Metadata store interface + InMemory/SQLite
|
|
310
|
-
│ │ ├── graph_store.rb # Graph store interface + InMemory
|
|
311
|
-
│ │ ├── pgvector.rb # PostgreSQL pgvector adapter
|
|
312
|
-
│ │ └── qdrant.rb # Qdrant adapter
|
|
313
|
-
│ │
|
|
314
|
-
│ ├── retrieval/ # Retrieval pipeline
|
|
315
|
-
│ │ ├── query_classifier.rb # Intent/scope/type classification
|
|
316
|
-
│ │ ├── search_executor.rb # Multi-strategy search
|
|
317
|
-
│ │ ├── ranker.rb # RRF-based ranking
|
|
318
|
-
│ │ └── context_assembler.rb # Token-budgeted context assembly
|
|
319
|
-
│ │
|
|
320
|
-
│ ├── formatting/ # LLM context formatting
|
|
321
|
-
│ │ ├── base.rb # Base formatter
|
|
322
|
-
│ │ ├── claude_adapter.rb # Claude-optimized output
|
|
323
|
-
│ │ ├── gpt_adapter.rb # GPT-optimized output
|
|
324
|
-
│ │ ├── generic_adapter.rb # Generic LLM output
|
|
325
|
-
│ │ └── human_adapter.rb # Human-readable output
|
|
326
|
-
│ │
|
|
327
|
-
│ ├── mcp/ # MCP Index Server (26 tools)
|
|
328
|
-
│ │ ├── server.rb # Tool definitions + dispatch
|
|
329
|
-
│ │ └── index_reader.rb # JSON index reader
|
|
330
|
-
│ │
|
|
331
|
-
│ ├── console/ # Console MCP Server (31 tools)
|
|
332
|
-
│ │ ├── server.rb # Console server + tool registration
|
|
333
|
-
│ │ ├── bridge.rb # JSON-lines protocol bridge
|
|
334
|
-
│ │ ├── safe_context.rb # Transaction rollback + timeout
|
|
335
|
-
│ │ ├── connection_manager.rb # Docker/direct/SSH modes
|
|
336
|
-
│ │ ├── model_validator.rb # AR schema validation
|
|
337
|
-
│ │ ├── sql_validator.rb # SQL statement validation
|
|
338
|
-
│ │ ├── audit_logger.rb # JSONL audit logging
|
|
339
|
-
│ │ ├── confirmation.rb # Human-in-the-loop confirmation
|
|
340
|
-
│ │ ├── tools/
|
|
341
|
-
│ │ │ ├── tier1.rb # 9 safe read-only tools
|
|
342
|
-
│ │ │ ├── tier2.rb # 9 domain-aware tools
|
|
343
|
-
│ │ │ ├── tier3.rb # 10 analytics tools
|
|
344
|
-
│ │ │ └── tier4.rb # 3 guarded tools
|
|
345
|
-
│ │ └── adapters/
|
|
346
|
-
│ │ ├── sidekiq_adapter.rb # Sidekiq job backend
|
|
347
|
-
│ │ ├── solid_queue_adapter.rb # Solid Queue job backend
|
|
348
|
-
│ │ ├── good_job_adapter.rb # GoodJob job backend
|
|
349
|
-
│ │ └── cache_adapter.rb # Cache backend adapters
|
|
350
|
-
│ │
|
|
351
|
-
│ ├── coordination/ # Multi-agent coordination
|
|
352
|
-
│ │ └── pipeline_lock.rb # File-based pipeline locking
|
|
353
|
-
│ │
|
|
354
|
-
│ ├── feedback/ # Agent self-service
|
|
355
|
-
│ │ ├── store.rb # JSONL feedback storage
|
|
356
|
-
│ │ └── gap_detector.rb # Feedback-driven gap detection
|
|
357
|
-
│ │
|
|
358
|
-
│ ├── operator/ # Pipeline management
|
|
359
|
-
│ │ ├── status_reporter.rb # Pipeline status
|
|
360
|
-
│ │ ├── error_escalator.rb # Error classification
|
|
361
|
-
│ │ └── pipeline_guard.rb # Rate limiting
|
|
362
|
-
│ │
|
|
363
|
-
│ ├── observability/ # Instrumentation
|
|
364
|
-
│ │ ├── instrumentation.rb # ActiveSupport::Notifications
|
|
365
|
-
│ │ ├── structured_logger.rb # JSON structured logging
|
|
366
|
-
│ │ └── health_check.rb # Component health checks
|
|
367
|
-
│ │
|
|
368
|
-
│ ├── resilience/ # Fault tolerance
|
|
369
|
-
│ │ ├── circuit_breaker.rb # Circuit breaker pattern
|
|
370
|
-
│ │ ├── retryable_provider.rb # Retry with backoff
|
|
371
|
-
│ │ └── index_validator.rb # Index integrity validation
|
|
372
|
-
│ │
|
|
373
|
-
│ ├── db/ # Schema management
|
|
374
|
-
│ │ ├── schema_version.rb # Version tracking
|
|
375
|
-
│ │ ├── migrator.rb # Standalone migration runner
|
|
376
|
-
│ │ └── migrations/
|
|
377
|
-
│ │ ├── 001_create_units.rb
|
|
378
|
-
│ │ ├── 002_create_edges.rb
|
|
379
|
-
│ │ └── 003_create_embeddings.rb
|
|
380
|
-
│ │
|
|
381
|
-
│ ├── session_tracer/ # Session tracing middleware + stores
|
|
382
|
-
│ │ ├── middleware.rb # Rack middleware
|
|
383
|
-
│ │ ├── file_store.rb # File-based trace storage
|
|
384
|
-
│ │ ├── redis_store.rb # Redis trace storage
|
|
385
|
-
│ │ └── solid_cache_store.rb # SolidCache trace storage
|
|
386
|
-
│ │
|
|
387
|
-
│ ├── temporal/ # Temporal snapshot system
|
|
388
|
-
│ │ ├── snapshot_store.rb # Snapshot persistence + diff
|
|
389
|
-
│ │ └── snapshot_metadata.rb # Snapshot metadata
|
|
390
|
-
│ │
|
|
391
|
-
│ └── evaluation/ # Retrieval evaluation
|
|
392
|
-
│ ├── query_set.rb # Evaluation query loading
|
|
393
|
-
│ ├── metrics.rb # Precision@k, Recall, MRR
|
|
394
|
-
│ ├── evaluator.rb # Query evaluation
|
|
395
|
-
│ ├── baseline_runner.rb # Grep/random/file baselines
|
|
396
|
-
│ └── report_generator.rb # JSON report generation
|
|
397
|
-
│
|
|
398
|
-
├── generators/codebase_index/ # Rails generators
|
|
399
|
-
│ ├── install_generator.rb # Initial setup
|
|
400
|
-
│ └── pgvector_generator.rb # pgvector migration
|
|
401
|
-
│
|
|
402
|
-
├── tasks/
|
|
403
|
-
│ └── codebase_index.rake # Rake task definitions
|
|
404
|
-
│
|
|
405
|
-
exe/
|
|
406
|
-
├── codebase-index-mcp # MCP Index Server executable (stdio)
|
|
407
|
-
├── codebase-index-mcp-start # Self-healing MCP wrapper
|
|
408
|
-
├── codebase-index-mcp-http # MCP Index Server (HTTP/Rack)
|
|
409
|
-
└── codebase-console-mcp # Console MCP Server executable
|
|
410
|
-
```
|
|
183
|
+
lib/codebase_index/
|
|
184
|
+
├── extractor.rb # Orchestrator — coordinates all 34 extractors
|
|
185
|
+
├── extracted_unit.rb # Core value object (the universal currency)
|
|
186
|
+
├── dependency_graph.rb # Directed graph + PageRank scoring
|
|
187
|
+
├── graph_analyzer.rb # Structural analysis (orphans, hubs, cycles, bridges)
|
|
188
|
+
├── retriever.rb # Retrieval orchestrator with degradation tiers
|
|
189
|
+
├── extractors/ # 34 extractors (one per Rails concept)
|
|
190
|
+
├── ast/ # Prism-based AST layer
|
|
191
|
+
├── ruby_analyzer/ # Static analysis (class, method, dataflow)
|
|
192
|
+
├── chunking/ # Semantic chunking (type-aware splitting)
|
|
193
|
+
├── embedding/ # Embedding pipeline (OpenAI, Ollama)
|
|
194
|
+
├── storage/ # Storage backends (pgvector, Qdrant, SQLite)
|
|
195
|
+
├── retrieval/ # Retrieval pipeline (classify, search, rank, assemble)
|
|
196
|
+
├── mcp/ # MCP Index Server (27 tools)
|
|
197
|
+
├── console/ # Console MCP Server (31 tools, 4 tiers)
|
|
198
|
+
├── coordination/ # Multi-agent pipeline locking
|
|
199
|
+
├── notion/ # Notion export
|
|
200
|
+
├── session_tracer/ # Session tracing middleware
|
|
201
|
+
├── temporal/ # Temporal snapshot system
|
|
202
|
+
└── evaluation/ # Retrieval evaluation harness
|
|
411
203
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
Budget Allocation:
|
|
418
|
-
├── 10% Structural overview (always included)
|
|
419
|
-
├── 50% Primary relevant units
|
|
420
|
-
├── 25% Supporting context (dependencies)
|
|
421
|
-
└── 15% Framework reference (when needed)
|
|
204
|
+
exe/
|
|
205
|
+
├── codebase-index-mcp # Index Server executable (stdio)
|
|
206
|
+
├── codebase-index-mcp-start # Self-healing MCP wrapper
|
|
207
|
+
├── codebase-index-mcp-http # Index Server (HTTP/Rack)
|
|
208
|
+
└── codebase-console-mcp # Console MCP Server executable
|
|
422
209
|
```
|
|
423
210
|
|
|
424
|
-
|
|
211
|
+
See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full pipeline explanation — extraction phases, dependency graph, retrieval pipeline, storage backends, and semantic chunking.
|
|
425
212
|
|
|
426
213
|
## Usage
|
|
427
214
|
|
|
@@ -434,43 +221,40 @@ bundle exec rake codebase_index:extract
|
|
|
434
221
|
### Incremental (CI)
|
|
435
222
|
|
|
436
223
|
```bash
|
|
437
|
-
# Auto-detects GitHub Actions / GitLab CI environment
|
|
438
224
|
bundle exec rake codebase_index:incremental
|
|
439
225
|
```
|
|
440
226
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
index:
|
|
445
|
-
runs-on: ubuntu-latest
|
|
446
|
-
steps:
|
|
447
|
-
- uses: actions/checkout@v4
|
|
448
|
-
with:
|
|
449
|
-
fetch-depth: 2
|
|
450
|
-
- name: Update index
|
|
451
|
-
run: bundle exec rake codebase_index:incremental
|
|
452
|
-
env:
|
|
453
|
-
GITHUB_BASE_REF: ${{ github.base_ref }}
|
|
454
|
-
```
|
|
227
|
+
Auto-detects GitHub Actions / GitLab CI environment. See [Getting Started](docs/GETTING_STARTED.md) for CI workflow YAML.
|
|
228
|
+
|
|
229
|
+
### Docker
|
|
455
230
|
|
|
456
|
-
|
|
231
|
+
Extraction runs inside the container; the Index Server runs on the host reading volume-mounted output. See [docs/DOCKER_SETUP.md](docs/DOCKER_SETUP.md) for Docker setup, MCP config, and troubleshooting.
|
|
457
232
|
|
|
458
233
|
```bash
|
|
459
|
-
bundle exec rake codebase_index:
|
|
234
|
+
docker compose exec app bundle exec rake codebase_index:extract
|
|
460
235
|
```
|
|
461
236
|
|
|
462
237
|
### Other Tasks
|
|
463
238
|
|
|
464
239
|
```bash
|
|
465
|
-
rake codebase_index:validate
|
|
466
|
-
rake codebase_index:stats
|
|
467
|
-
rake codebase_index:clean
|
|
240
|
+
rake codebase_index:validate # Check index integrity
|
|
241
|
+
rake codebase_index:stats # Show unit counts, sizes, graph stats
|
|
242
|
+
rake codebase_index:clean # Remove index
|
|
243
|
+
rake codebase_index:embed # Embed all extracted units
|
|
244
|
+
rake codebase_index:embed_incremental # Embed changed units only
|
|
245
|
+
rake codebase_index:flow[EntryPoint] # Generate execution flow for an entry point
|
|
246
|
+
rake codebase_index:console # Start console MCP server
|
|
247
|
+
rake codebase_index:notion_sync # Sync models/columns to Notion databases
|
|
468
248
|
```
|
|
469
249
|
|
|
250
|
+
See [docs/NOTION_INTEGRATION.md](docs/NOTION_INTEGRATION.md) for Notion export configuration.
|
|
251
|
+
|
|
470
252
|
### Ruby API
|
|
471
253
|
|
|
254
|
+
> **Requires a booted Rails environment.** These methods use runtime introspection and must be called from within a Rails process (console, rake task, initializer).
|
|
255
|
+
|
|
472
256
|
```ruby
|
|
473
|
-
# Full extraction
|
|
257
|
+
# Full extraction (output_dir from configuration)
|
|
474
258
|
CodebaseIndex.extract!
|
|
475
259
|
|
|
476
260
|
# Incremental
|
|
@@ -510,13 +294,24 @@ tmp/codebase_index/
|
|
|
510
294
|
|
|
511
295
|
Each unit JSON contains: `identifier`, `type`, `file_path`, `source_code` (annotated), `metadata` (rich structured data), `dependencies`, `dependents`, `chunks` (if applicable), and `estimated_tokens`.
|
|
512
296
|
|
|
513
|
-
##
|
|
297
|
+
## Documentation
|
|
514
298
|
|
|
515
|
-
|
|
299
|
+
| Guide | Purpose |
|
|
300
|
+
|-------|---------|
|
|
301
|
+
| [Getting Started](docs/GETTING_STARTED.md) | Install, configure, extract, inspect |
|
|
302
|
+
| [FAQ](docs/FAQ.md) | Common questions about setup, extraction, MCP, Docker |
|
|
303
|
+
| [Troubleshooting](docs/TROUBLESHOOTING.md) | Symptom → cause → fix for common problems |
|
|
304
|
+
| [Architecture](docs/ARCHITECTURE.md) | Pipeline stages, dependency graph, retrieval, storage |
|
|
305
|
+
| [Extractor Reference](docs/EXTRACTOR_REFERENCE.md) | What each of the 34 extractors captures |
|
|
306
|
+
| [MCP Servers](docs/MCP_SERVERS.md) | Full tool catalog and setup for Claude Code, Cursor, Windsurf |
|
|
307
|
+
| [MCP Tool Cookbook](docs/MCP_TOOL_COOKBOOK.md) | Scenario-based examples for common tasks |
|
|
308
|
+
| [Configuration Reference](docs/CONFIGURATION_REFERENCE.md) | All options with defaults |
|
|
309
|
+
| [Backend Matrix](docs/BACKEND_MATRIX.md) | Supported infrastructure combinations |
|
|
310
|
+
|
|
311
|
+
## Development
|
|
516
312
|
|
|
517
313
|
```bash
|
|
518
314
|
bin/setup # Install dependencies
|
|
519
|
-
bin/console # Interactive prompt
|
|
520
315
|
bundle exec rake spec # Run tests
|
|
521
316
|
bundle exec rubocop # Lint
|
|
522
317
|
```
|
data/exe/codebase-index-mcp
CHANGED
|
@@ -15,40 +15,12 @@ require_relative '../lib/codebase_index'
|
|
|
15
15
|
require_relative '../lib/codebase_index/dependency_graph'
|
|
16
16
|
require_relative '../lib/codebase_index/graph_analyzer'
|
|
17
17
|
require_relative '../lib/codebase_index/mcp/server'
|
|
18
|
+
require_relative '../lib/codebase_index/mcp/bootstrapper'
|
|
18
19
|
require_relative '../lib/codebase_index/embedding/text_preparer'
|
|
19
20
|
require_relative '../lib/codebase_index/embedding/indexer'
|
|
20
21
|
|
|
21
|
-
index_dir = ARGV
|
|
22
|
-
|
|
23
|
-
unless Dir.exist?(index_dir)
|
|
24
|
-
warn "Error: Index directory does not exist: #{index_dir}"
|
|
25
|
-
exit 1
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
unless File.exist?(File.join(index_dir, 'manifest.json'))
|
|
29
|
-
warn "Error: No manifest.json found in: #{index_dir}"
|
|
30
|
-
warn 'Run `bundle exec rake codebase_index:extract` in your Rails app first.'
|
|
31
|
-
exit 1
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Attempt to build a retriever for semantic search.
|
|
35
|
-
# Auto-configures from environment variables when no explicit configuration exists.
|
|
36
|
-
retriever = begin
|
|
37
|
-
config = CodebaseIndex.configuration
|
|
38
|
-
|
|
39
|
-
if !config.embedding_provider && ENV.fetch('OPENAI_API_KEY', nil)
|
|
40
|
-
config.vector_store = :in_memory
|
|
41
|
-
config.metadata_store = :in_memory
|
|
42
|
-
config.graph_store = :in_memory
|
|
43
|
-
config.embedding_provider = :openai
|
|
44
|
-
config.embedding_options = { api_key: ENV.fetch('OPENAI_API_KEY', nil) }
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
CodebaseIndex::Builder.new(config).build_retriever if config.embedding_provider
|
|
48
|
-
rescue StandardError => e
|
|
49
|
-
warn "Note: Semantic search unavailable (#{e.message}). Using pattern-based search only."
|
|
50
|
-
nil
|
|
51
|
-
end
|
|
22
|
+
index_dir = CodebaseIndex::MCP::Bootstrapper.resolve_index_dir(ARGV)
|
|
23
|
+
retriever = CodebaseIndex::MCP::Bootstrapper.build_retriever
|
|
52
24
|
|
|
53
25
|
server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever)
|
|
54
26
|
|