codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
data/README.md
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
# CodebaseIndex
|
|
2
|
+
|
|
3
|
+
A Rails codebase extraction and indexing system designed to provide accurate, version-specific context for AI-assisted development tooling.
|
|
4
|
+
|
|
5
|
+
## The Problem
|
|
6
|
+
|
|
7
|
+
LLMs working with Rails codebases face a fundamental accuracy gap. Training data contains documentation and examples from many Rails versions, but a production app runs on *one* version. When a developer asks "what options does `has_many` support?" or "what callbacks fire when a record is saved?", the answer depends on their exact Rails version — and generic LLM responses often get it wrong.
|
|
8
|
+
|
|
9
|
+
Beyond version accuracy, Rails conventions hide enormous amounts of implementation behind "magic." A model file might be 50 lines, but with concerns inlined, schema context, callbacks, validations, and association behavior, the *actual* surface area is 10x that. AI tools that only see the source file miss most of what matters.
|
|
10
|
+
|
|
11
|
+
CodebaseIndex solves this by:
|
|
12
|
+
|
|
13
|
+
- **Running inside Rails** to leverage runtime introspection (not just static parsing)
|
|
14
|
+
- **Inlining concerns** directly into model source so the full picture is visible
|
|
15
|
+
- **Prepending schema comments** with column types, indexes, and foreign keys
|
|
16
|
+
- **Mapping routes to controllers** so HTTP → action flow is explicit
|
|
17
|
+
- **Indexing the exact Rails/gem source** for the versions in `Gemfile.lock`
|
|
18
|
+
- **Tracking dependencies** bidirectionally so you can trace impact across the codebase
|
|
19
|
+
- **Enriching with git data** so you know what's actively changing vs. dormant
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
Add to your Gemfile:
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
gem 'codebase_index'
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Then:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
bundle install
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Or install directly:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
gem install codebase_index
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
> **Requires Rails.** Extraction runs inside a booted Rails application using runtime introspection (`ActiveRecord::Base.descendants`, `Rails.application.routes`, etc.). The gem cannot extract from source files alone. See [Getting Started](docs/GETTING_STARTED.md) for setup.
|
|
42
|
+
|
|
43
|
+
## Target Environment
|
|
44
|
+
|
|
45
|
+
Designed for Rails applications of any scale, with particular strength in large monoliths:
|
|
46
|
+
|
|
47
|
+
- Any database (MySQL, PostgreSQL, SQLite)
|
|
48
|
+
- Any background job system (Sidekiq, Solid Queue, GoodJob, inline)
|
|
49
|
+
- Any view layer (ERB, Phlex, ViewComponent)
|
|
50
|
+
- Docker or bare metal, CI or manual
|
|
51
|
+
- Continuous or one-shot indexing
|
|
52
|
+
|
|
53
|
+
See [docs/BACKEND_MATRIX.md](docs/BACKEND_MATRIX.md) for supported infrastructure combinations.
|
|
54
|
+
|
|
55
|
+
## Use Cases
|
|
56
|
+
|
|
57
|
+
**1. Coding & Debugging** — Primary context for AI coding assistants. Answer "how does our checkout flow work?" with the actual service, model callbacks, controller actions, and framework behavior for the running version.
|
|
58
|
+
|
|
59
|
+
**2. Performance Analysis** — Correlate code structure with runtime behavior. Identify models with high write volume and complex callback chains, find N+1-prone association patterns, surface hot code paths.
|
|
60
|
+
|
|
61
|
+
**3. Deeper Analytics** — Query frequency by scope, error rates by action, background job characteristics. Bridge the gap between code structure and operational data.
|
|
62
|
+
|
|
63
|
+
**4. Support & Marketing Tooling** — Domain-concept retrieval for non-developers. Map business terms to code paths, surface feature flags, document user-facing behavior.
|
|
64
|
+
|
|
65
|
+
## Architecture
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
┌─────────────────────────────────────────────────────────────────────┐
|
|
69
|
+
│ CodebaseIndex │
|
|
70
|
+
├─────────────────────────────────────────────────────────────────────┤
|
|
71
|
+
│ │
|
|
72
|
+
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
|
|
73
|
+
│ │ Extraction │───▶│ Storage │◀───│ Retrieval │ │
|
|
74
|
+
│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
|
|
75
|
+
│ │ │ │ │
|
|
76
|
+
│ ▼ ▼ ▼ │
|
|
77
|
+
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
|
|
78
|
+
│ │ Extractors │ │ JSON per unit │ │ Query Classifier│ │
|
|
79
|
+
│ │ · Model │ │ Vector Index │ │ Context Assembly│ │
|
|
80
|
+
│ │ · Controller │ │ Metadata Index │ │ Result Ranking │ │
|
|
81
|
+
│ │ · Service │ │ Dep Graph │ │ │ │
|
|
82
|
+
│ │ · Component │ │ │ │ │ │
|
|
83
|
+
│ │ · Rails Source │ │ │ │ │ │
|
|
84
|
+
│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
|
|
85
|
+
│ │
|
|
86
|
+
└─────────────────────────────────────────────────────────────────────┘
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Extraction Pipeline
|
|
90
|
+
|
|
91
|
+
Extraction runs inside the Rails application (via rake task) to access runtime introspection — `ActiveRecord::Base.descendants`, `Rails.application.routes`, reflection APIs, etc. This is fundamentally more accurate than static parsing.
|
|
92
|
+
|
|
93
|
+
**Four phases:**
|
|
94
|
+
|
|
95
|
+
1. **Extract** — Each extractor produces `ExtractedUnit` objects with source, metadata, and dependencies
|
|
96
|
+
2. **Resolve dependents** — Build reverse dependency edges (who calls what)
|
|
97
|
+
3. **Enrich with git** — Last modified, contributors, change frequency, recent commits
|
|
98
|
+
4. **Write output** — JSON per unit, dependency graph, manifest, structural summary
|
|
99
|
+
|
|
100
|
+
### Extractors (34)
|
|
101
|
+
|
|
102
|
+
**Core Application**
|
|
103
|
+
|
|
104
|
+
| Extractor | What it captures |
|
|
105
|
+
|-----------|-----------------|
|
|
106
|
+
| **ModelExtractor** | Schema (columns, indexes, FKs), associations, validations, callbacks (all 13 types), scopes, enums, inlined concerns. Chunks large models into summary/associations/callbacks/validations. |
|
|
107
|
+
| **ControllerExtractor** | Route mapping (verb → path → action), filter chains per action, response formats, permitted params. Per-action chunks with applicable filters and route context. |
|
|
108
|
+
| **ServiceExtractor** | Scans `app/services`, `app/interactors`, `app/operations`, `app/commands`, `app/use_cases`. Entry points, dependency injection, custom errors, return type inference. |
|
|
109
|
+
| **JobExtractor** | ActiveJob and Sidekiq workers. Queue config, retry/concurrency options, perform arguments, callbacks. |
|
|
110
|
+
| **MailerExtractor** | ActionMailer classes with defaults, per-action templates, callbacks, helper usage. |
|
|
111
|
+
| **ConfigurationExtractor** | Rails initializers from `config/initializers` and `config/environments`, plus behavioral profile from resolved `Rails.application.config`. |
|
|
112
|
+
| **RouteExtractor** | All Rails routes via runtime introspection of `Rails.application.routes`. |
|
|
113
|
+
| **MiddlewareExtractor** | Rack middleware stack as a single ordered unit. |
|
|
114
|
+
|
|
115
|
+
**UI Components**
|
|
116
|
+
|
|
117
|
+
| Extractor | What it captures |
|
|
118
|
+
|-----------|-----------------|
|
|
119
|
+
| **PhlexExtractor** | Phlex component slots, initialize params, sub-components, Stimulus controller references, route helpers. |
|
|
120
|
+
| **ViewComponentExtractor** | ViewComponent slots, template paths, preview classes, collection support. |
|
|
121
|
+
| **ViewTemplateExtractor** | ERB view templates with render calls, instance variables, helper usage. |
|
|
122
|
+
| **DecoratorExtractor** | Decorators, presenters, and form objects from `app/decorators`, `app/presenters`, `app/form_objects`. |
|
|
123
|
+
|
|
124
|
+
**Data Layer**
|
|
125
|
+
|
|
126
|
+
| Extractor | What it captures |
|
|
127
|
+
|-----------|-----------------|
|
|
128
|
+
| **ConcernExtractor** | ActiveSupport::Concern modules from `app/models/concerns` and `app/controllers/concerns`. |
|
|
129
|
+
| **PoroExtractor** | Plain Ruby objects in `app/models` (non-ActiveRecord classes, excluding concerns). |
|
|
130
|
+
| **SerializerExtractor** | ActiveModelSerializers, Blueprinter, Alba, and Draper. Auto-detects loaded serialization gems. |
|
|
131
|
+
| **ValidatorExtractor** | Custom ActiveModel validator classes with validation rules. |
|
|
132
|
+
| **ManagerExtractor** | SimpleDelegator subclasses — wrapped model, public methods, delegation chain. |
|
|
133
|
+
|
|
134
|
+
**API & Authorization**
|
|
135
|
+
|
|
136
|
+
| Extractor | What it captures |
|
|
137
|
+
|-----------|-----------------|
|
|
138
|
+
| **GraphQLExtractor** | graphql-ruby types, mutations, queries, resolvers, field metadata, authorization patterns. Produces 4 unit types. |
|
|
139
|
+
| **PunditExtractor** | Pundit authorization policies with action methods (index?, show?, create?, etc.). |
|
|
140
|
+
| **PolicyExtractor** | Domain policy classes with decision methods and eligibility rules. |
|
|
141
|
+
|
|
142
|
+
**Infrastructure**
|
|
143
|
+
|
|
144
|
+
| Extractor | What it captures |
|
|
145
|
+
|-----------|-----------------|
|
|
146
|
+
| **EngineExtractor** | Mounted Rails engines via runtime introspection with mount points and route counts. |
|
|
147
|
+
| **I18nExtractor** | Locale files from `config/locales` with translation key structures. |
|
|
148
|
+
| **ActionCableExtractor** | ActionCable channels with stream subscriptions, actions, broadcast patterns. |
|
|
149
|
+
| **ScheduledJobExtractor** | Scheduled jobs from `config/recurring.yml`, `config/sidekiq_cron.yml`, `config/schedule.rb`. |
|
|
150
|
+
| **RakeTaskExtractor** | Rake tasks from `lib/tasks/*.rake` with namespaces, dependencies, descriptions. |
|
|
151
|
+
| **MigrationExtractor** | ActiveRecord migrations with DDL metadata, table operations, reversibility, risk indicators. |
|
|
152
|
+
| **DatabaseViewExtractor** | SQL views from `db/views` (Scenic convention) with materialization and table references. |
|
|
153
|
+
| **StateMachineExtractor** | AASM, Statesman, and state_machines DSL definitions with states and transitions. |
|
|
154
|
+
| **EventExtractor** | Event publish/subscribe patterns (ActiveSupport::Notifications, Wisper). |
|
|
155
|
+
| **CachingExtractor** | Cache usage across controllers, models, and views — strategies, TTLs, cache keys. |
|
|
156
|
+
|
|
157
|
+
**Testing & Source**
|
|
158
|
+
|
|
159
|
+
| Extractor | What it captures |
|
|
160
|
+
|-----------|-----------------|
|
|
161
|
+
| **FactoryExtractor** | FactoryBot factory definitions with traits and associations. |
|
|
162
|
+
| **TestMappingExtractor** | Test file → subject class mapping with test counts and framework type. |
|
|
163
|
+
| **LibExtractor** | Ruby files from `lib/` (excluding tasks and generators). |
|
|
164
|
+
| **RailsSourceExtractor** | High-value Rails framework source and gem source pinned to exact installed versions. |
|
|
165
|
+
|
|
166
|
+
### Key Design Decisions
|
|
167
|
+
|
|
168
|
+
**Concern inlining.** When extracting a model, included concerns are read from disk and embedded as formatted comments directly in the model's source. This means the full behavioral picture is in one unit — no separate lookups needed during retrieval.
|
|
169
|
+
|
|
170
|
+
**Route prepending.** Controller source gets a header block showing the HTTP routes that map to it, so the relationship between URLs and actions is immediately visible.
|
|
171
|
+
|
|
172
|
+
**Semantic chunking.** Large models are split into purpose-specific chunks (summary, associations, callbacks, validations) rather than arbitrary size-based splits. Controllers chunk per-action with the relevant filters and route attached.
|
|
173
|
+
|
|
174
|
+
**Dependency graph with BFS blast radius.** The graph tracks both forward dependencies (what this unit uses) and reverse dependencies (what uses this unit). Changed-file impact is computed via breadth-first traversal — if a concern changes, every model including it gets re-indexed.
|
|
175
|
+
|
|
176
|
+
## MCP Servers
|
|
177
|
+
|
|
178
|
+
CodebaseIndex ships two [MCP](https://modelcontextprotocol.io/) servers for integrating with AI development tools (Claude Code, Cursor, Windsurf, etc.).
|
|
179
|
+
|
|
180
|
+
**Index Server** (26 tools) — Reads pre-extracted data from disk. No Rails boot required. Provides code lookup, dependency traversal, graph analysis, semantic search, pipeline management, feedback collection, and temporal snapshots.
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
codebase-index-mcp /path/to/rails-app/tmp/codebase_index
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Console Server** (31 tools) — Bridges to a live Rails process for database queries, model diagnostics, job monitoring, and guarded operations. All queries run in rolled-back transactions with SQL validation and audit logging.
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
codebase-console-mcp
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
See [docs/MCP_SERVERS.md](docs/MCP_SERVERS.md) for the full tool catalog and setup instructions.
|
|
193
|
+
|
|
194
|
+
## Subsystems
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
lib/
|
|
198
|
+
├── codebase_index.rb # Module interface, Configuration, entry point
|
|
199
|
+
├── codebase_index/
|
|
200
|
+
│ ├── extracted_unit.rb # Core value object
|
|
201
|
+
│ ├── extractor.rb # Orchestrator — coordinates all extractors
|
|
202
|
+
│ ├── dependency_graph.rb # Directed graph + PageRank scoring
|
|
203
|
+
│ ├── graph_analyzer.rb # Structural analysis (orphans, hubs, cycles, bridges)
|
|
204
|
+
│ ├── model_name_cache.rb # Precomputed regex for dependency scanning
|
|
205
|
+
│ ├── retriever.rb # Retriever orchestrator with degradation tiers
|
|
206
|
+
│ ├── builder.rb # DSL builder for configuration
|
|
207
|
+
│ ├── version.rb # Gem version
|
|
208
|
+
│ ├── railtie.rb # Rails integration
|
|
209
|
+
│ │
|
|
210
|
+
│ ├── extractors/ # 34 extractors (one per Rails concept)
|
|
211
|
+
│ │ ├── model_extractor.rb # ActiveRecord models
|
|
212
|
+
│ │ ├── controller_extractor.rb # ActionController
|
|
213
|
+
│ │ ├── service_extractor.rb # Service objects
|
|
214
|
+
│ │ ├── job_extractor.rb # ActiveJob/Sidekiq workers
|
|
215
|
+
│ │ ├── mailer_extractor.rb # ActionMailer
|
|
216
|
+
│ │ ├── phlex_extractor.rb # Phlex components
|
|
217
|
+
│ │ ├── view_component_extractor.rb # ViewComponent
|
|
218
|
+
│ │ ├── graphql_extractor.rb # GraphQL types, mutations, queries
|
|
219
|
+
│ │ ├── serializer_extractor.rb # Serializers/decorators
|
|
220
|
+
│ │ ├── manager_extractor.rb # SimpleDelegator managers
|
|
221
|
+
│ │ ├── policy_extractor.rb # Policy classes
|
|
222
|
+
│ │ ├── validator_extractor.rb # Standalone validators
|
|
223
|
+
│ │ ├── rails_source_extractor.rb # Framework/gem source
|
|
224
|
+
│ │ ├── shared_dependency_scanner.rb # Shared dependency detection
|
|
225
|
+
│ │ ├── shared_utility_methods.rb # Shared extractor utilities
|
|
226
|
+
│ │ └── ast_source_extraction.rb # AST-based source extraction
|
|
227
|
+
│ │
|
|
228
|
+
│ ├── ast/ # Prism-based AST layer
|
|
229
|
+
│ │ ├── parser.rb # Source parsing adapter
|
|
230
|
+
│ │ ├── node.rb # Normalized AST node
|
|
231
|
+
│ │ ├── method_extractor.rb # Method boundary detection
|
|
232
|
+
│ │ └── call_site_extractor.rb # Call site analysis
|
|
233
|
+
│ │
|
|
234
|
+
│ ├── ruby_analyzer/ # Static analysis
|
|
235
|
+
│ │ ├── class_analyzer.rb # Class structure analysis
|
|
236
|
+
│ │ ├── method_analyzer.rb # Method complexity/dependencies
|
|
237
|
+
│ │ ├── dataflow_analyzer.rb # Data flow tracing
|
|
238
|
+
│ │ ├── trace_enricher.rb # Enriches flow traces
|
|
239
|
+
│ │ ├── fqn_builder.rb # Fully-qualified name resolution
|
|
240
|
+
│ │ └── mermaid_renderer.rb # Diagram generation
|
|
241
|
+
│ │
|
|
242
|
+
│ ├── flow_analysis/ # Execution flow tracing
|
|
243
|
+
│ │ ├── operation_extractor.rb # Extract operations from AST
|
|
244
|
+
│ │ └── response_code_mapper.rb # HTTP response mapping
|
|
245
|
+
│ ├── flow_assembler.rb # Assembles execution flows
|
|
246
|
+
│ ├── flow_document.rb # Flow documentation format
|
|
247
|
+
│ │
|
|
248
|
+
│ ├── chunking/ # Semantic chunking
|
|
249
|
+
│ │ ├── chunk.rb # Chunk value object
|
|
250
|
+
│ │ └── semantic_chunker.rb # Type-aware splitting
|
|
251
|
+
│ │
|
|
252
|
+
│ ├── embedding/ # Embedding pipeline
|
|
253
|
+
│ │ ├── provider.rb # Provider interface
|
|
254
|
+
│ │ ├── openai.rb # OpenAI adapter
|
|
255
|
+
│ │ ├── text_preparer.rb # Text preparation for embedding
|
|
256
|
+
│ │ └── indexer.rb # Batch indexing with resumability
|
|
257
|
+
│ │
|
|
258
|
+
│ ├── storage/ # Storage backends
|
|
259
|
+
│ │ ├── vector_store.rb # Vector store interface + InMemory
|
|
260
|
+
│ │ ├── metadata_store.rb # Metadata store interface + InMemory/SQLite
|
|
261
|
+
│ │ ├── graph_store.rb # Graph store interface + InMemory
|
|
262
|
+
│ │ ├── pgvector.rb # PostgreSQL pgvector adapter
|
|
263
|
+
│ │ └── qdrant.rb # Qdrant adapter
|
|
264
|
+
│ │
|
|
265
|
+
│ ├── retrieval/ # Retrieval pipeline
|
|
266
|
+
│ │ ├── query_classifier.rb # Intent/scope/type classification
|
|
267
|
+
│ │ ├── search_executor.rb # Multi-strategy search
|
|
268
|
+
│ │ ├── ranker.rb # RRF-based ranking
|
|
269
|
+
│ │ └── context_assembler.rb # Token-budgeted context assembly
|
|
270
|
+
│ │
|
|
271
|
+
│ ├── formatting/ # LLM context formatting
|
|
272
|
+
│ │ ├── base.rb # Base formatter
|
|
273
|
+
│ │ ├── claude_adapter.rb # Claude-optimized output
|
|
274
|
+
│ │ ├── gpt_adapter.rb # GPT-optimized output
|
|
275
|
+
│ │ ├── generic_adapter.rb # Generic LLM output
|
|
276
|
+
│ │ └── human_adapter.rb # Human-readable output
|
|
277
|
+
│ │
|
|
278
|
+
│ ├── mcp/ # MCP Index Server (26 tools)
|
|
279
|
+
│ │ ├── server.rb # Tool definitions + dispatch
|
|
280
|
+
│ │ └── index_reader.rb # JSON index reader
|
|
281
|
+
│ │
|
|
282
|
+
│ ├── console/ # Console MCP Server (31 tools)
|
|
283
|
+
│ │ ├── server.rb # Console server + tool registration
|
|
284
|
+
│ │ ├── bridge.rb # JSON-lines protocol bridge
|
|
285
|
+
│ │ ├── safe_context.rb # Transaction rollback + timeout
|
|
286
|
+
│ │ ├── connection_manager.rb # Docker/direct/SSH modes
|
|
287
|
+
│ │ ├── model_validator.rb # AR schema validation
|
|
288
|
+
│ │ ├── sql_validator.rb # SQL statement validation
|
|
289
|
+
│ │ ├── audit_logger.rb # JSONL audit logging
|
|
290
|
+
│ │ ├── confirmation.rb # Human-in-the-loop confirmation
|
|
291
|
+
│ │ ├── tools/
|
|
292
|
+
│ │ │ ├── tier1.rb # 9 safe read-only tools
|
|
293
|
+
│ │ │ ├── tier2.rb # 9 domain-aware tools
|
|
294
|
+
│ │ │ ├── tier3.rb # 10 analytics tools
|
|
295
|
+
│ │ │ └── tier4.rb # 3 guarded tools
|
|
296
|
+
│ │ └── adapters/
|
|
297
|
+
│ │ ├── sidekiq_adapter.rb # Sidekiq job backend
|
|
298
|
+
│ │ ├── solid_queue_adapter.rb # Solid Queue job backend
|
|
299
|
+
│ │ ├── good_job_adapter.rb # GoodJob job backend
|
|
300
|
+
│ │ └── cache_adapter.rb # Cache backend adapters
|
|
301
|
+
│ │
|
|
302
|
+
│ ├── coordination/ # Multi-agent coordination
|
|
303
|
+
│ │ └── pipeline_lock.rb # File-based pipeline locking
|
|
304
|
+
│ │
|
|
305
|
+
│ ├── feedback/ # Agent self-service
|
|
306
|
+
│ │ ├── store.rb # JSONL feedback storage
|
|
307
|
+
│ │ └── gap_detector.rb # Feedback-driven gap detection
|
|
308
|
+
│ │
|
|
309
|
+
│ ├── operator/ # Pipeline management
|
|
310
|
+
│ │ ├── status_reporter.rb # Pipeline status
|
|
311
|
+
│ │ ├── error_escalator.rb # Error classification
|
|
312
|
+
│ │ └── pipeline_guard.rb # Rate limiting
|
|
313
|
+
│ │
|
|
314
|
+
│ ├── observability/ # Instrumentation
|
|
315
|
+
│ │ ├── instrumentation.rb # ActiveSupport::Notifications
|
|
316
|
+
│ │ ├── structured_logger.rb # JSON structured logging
|
|
317
|
+
│ │ └── health_check.rb # Component health checks
|
|
318
|
+
│ │
|
|
319
|
+
│ ├── resilience/ # Fault tolerance
|
|
320
|
+
│ │ ├── circuit_breaker.rb # Circuit breaker pattern
|
|
321
|
+
│ │ ├── retryable_provider.rb # Retry with backoff
|
|
322
|
+
│ │ └── index_validator.rb # Index integrity validation
|
|
323
|
+
│ │
|
|
324
|
+
│ ├── db/ # Schema management
|
|
325
|
+
│ │ ├── schema_version.rb # Version tracking
|
|
326
|
+
│ │ ├── migrator.rb # Standalone migration runner
|
|
327
|
+
│ │ └── migrations/
|
|
328
|
+
│ │ ├── 001_create_units.rb
|
|
329
|
+
│ │ ├── 002_create_edges.rb
|
|
330
|
+
│ │ └── 003_create_embeddings.rb
|
|
331
|
+
│ │
|
|
332
|
+
│ ├── session_tracer/ # Session tracing middleware + stores
|
|
333
|
+
│ │ ├── middleware.rb # Rack middleware
|
|
334
|
+
│ │ ├── file_store.rb # File-based trace storage
|
|
335
|
+
│ │ ├── redis_store.rb # Redis trace storage
|
|
336
|
+
│ │ └── solid_cache_store.rb # SolidCache trace storage
|
|
337
|
+
│ │
|
|
338
|
+
│ ├── temporal/ # Temporal snapshot system
|
|
339
|
+
│ │ ├── snapshot_store.rb # Snapshot persistence + diff
|
|
340
|
+
│ │ └── snapshot_metadata.rb # Snapshot metadata
|
|
341
|
+
│ │
|
|
342
|
+
│ └── evaluation/ # Retrieval evaluation
|
|
343
|
+
│ ├── query_set.rb # Evaluation query loading
|
|
344
|
+
│ ├── metrics.rb # Precision@k, Recall, MRR
|
|
345
|
+
│ ├── evaluator.rb # Query evaluation
|
|
346
|
+
│ ├── baseline_runner.rb # Grep/random/file baselines
|
|
347
|
+
│ └── report_generator.rb # JSON report generation
|
|
348
|
+
│
|
|
349
|
+
├── generators/codebase_index/ # Rails generators
|
|
350
|
+
│ ├── install_generator.rb # Initial setup
|
|
351
|
+
│ └── pgvector_generator.rb # pgvector migration
|
|
352
|
+
│
|
|
353
|
+
├── tasks/
|
|
354
|
+
│ └── codebase_index.rake # Rake task definitions
|
|
355
|
+
│
|
|
356
|
+
exe/
|
|
357
|
+
├── codebase-index-mcp # MCP Index Server executable (stdio)
|
|
358
|
+
├── codebase-index-mcp-start # Self-healing MCP wrapper
|
|
359
|
+
├── codebase-index-mcp-http # MCP Index Server (HTTP/Rack)
|
|
360
|
+
└── codebase-console-mcp # Console MCP Server executable
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
## Context Assembly
|
|
364
|
+
|
|
365
|
+
When serving context to an LLM, token budget is allocated in layers:
|
|
366
|
+
|
|
367
|
+
```
|
|
368
|
+
Budget Allocation:
|
|
369
|
+
├── 10% Structural overview (always included)
|
|
370
|
+
├── 50% Primary relevant units
|
|
371
|
+
├── 25% Supporting context (dependencies)
|
|
372
|
+
└── 15% Framework reference (when needed)
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
Queries are classified to determine whether framework source context is needed. "What options does has_many support?" routes to Rails source; "how do we handle checkout?" routes to application code.
|
|
376
|
+
|
|
377
|
+
## Usage
|
|
378
|
+
|
|
379
|
+
### Full Extraction
|
|
380
|
+
|
|
381
|
+
```bash
|
|
382
|
+
bundle exec rake codebase_index:extract
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
### Incremental (CI)
|
|
386
|
+
|
|
387
|
+
```bash
|
|
388
|
+
# Auto-detects GitHub Actions / GitLab CI environment
|
|
389
|
+
bundle exec rake codebase_index:incremental
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
```yaml
|
|
393
|
+
# .github/workflows/index.yml
|
|
394
|
+
jobs:
|
|
395
|
+
index:
|
|
396
|
+
runs-on: ubuntu-latest
|
|
397
|
+
steps:
|
|
398
|
+
- uses: actions/checkout@v4
|
|
399
|
+
with:
|
|
400
|
+
fetch-depth: 2
|
|
401
|
+
- name: Update index
|
|
402
|
+
run: bundle exec rake codebase_index:incremental
|
|
403
|
+
env:
|
|
404
|
+
GITHUB_BASE_REF: ${{ github.base_ref }}
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
### Framework-Only (on dependency changes)
|
|
408
|
+
|
|
409
|
+
```bash
|
|
410
|
+
bundle exec rake codebase_index:extract_framework
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
### Other Tasks
|
|
414
|
+
|
|
415
|
+
```bash
|
|
416
|
+
rake codebase_index:validate # Check index integrity
|
|
417
|
+
rake codebase_index:stats # Show unit counts, sizes, graph stats
|
|
418
|
+
rake codebase_index:clean # Remove index
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
### Ruby API
|
|
422
|
+
|
|
423
|
+
```ruby
|
|
424
|
+
# Full extraction
|
|
425
|
+
CodebaseIndex.extract!
|
|
426
|
+
|
|
427
|
+
# Incremental
|
|
428
|
+
CodebaseIndex.extract_changed!(["app/models/user.rb", "app/services/checkout.rb"])
|
|
429
|
+
|
|
430
|
+
# Configuration
|
|
431
|
+
CodebaseIndex.configure do |config|
|
|
432
|
+
config.output_dir = Rails.root.join("tmp/codebase_index")
|
|
433
|
+
config.max_context_tokens = 8000
|
|
434
|
+
config.include_framework_sources = true
|
|
435
|
+
config.add_gem "devise", paths: ["lib/devise/models"], priority: :high
|
|
436
|
+
end
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
## Output Structure
|
|
440
|
+
|
|
441
|
+
```
|
|
442
|
+
tmp/codebase_index/
|
|
443
|
+
├── manifest.json # Extraction metadata, git SHA, checksums
|
|
444
|
+
├── dependency_graph.json # Full graph with forward/reverse edges
|
|
445
|
+
├── SUMMARY.md # Human-readable structural overview
|
|
446
|
+
├── models/
|
|
447
|
+
│ ├── _index.json # Quick lookup index
|
|
448
|
+
│ ├── User.json # Full extracted unit
|
|
449
|
+
│ └── Order.json
|
|
450
|
+
├── controllers/
|
|
451
|
+
│ ├── _index.json
|
|
452
|
+
│ └── OrdersController.json
|
|
453
|
+
├── services/
|
|
454
|
+
│ ├── _index.json
|
|
455
|
+
│ └── CheckoutService.json
|
|
456
|
+
├── components/
|
|
457
|
+
│ └── ...
|
|
458
|
+
└── rails_source/
|
|
459
|
+
└── ...
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
Each unit JSON contains: `identifier`, `type`, `file_path`, `source_code` (annotated), `metadata` (rich structured data), `dependencies`, `dependents`, `chunks` (if applicable), and `estimated_tokens`.
|
|
463
|
+
|
|
464
|
+
## Development
|
|
465
|
+
|
|
466
|
+
After checking out the repo:
|
|
467
|
+
|
|
468
|
+
```bash
|
|
469
|
+
bin/setup # Install dependencies
|
|
470
|
+
bin/console # Interactive prompt
|
|
471
|
+
bundle exec rake spec # Run tests
|
|
472
|
+
bundle exec rubocop # Lint
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
## Contributing
|
|
476
|
+
|
|
477
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/LeahArmstrong/codebase_index. See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
478
|
+
|
|
479
|
+
## License
|
|
480
|
+
|
|
481
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE.txt).
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Console MCP server for querying live Rails application state.
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# codebase-console-mcp
|
|
8
|
+
# CODEBASE_CONSOLE_CONFIG=/path/to/console.yml codebase-console-mcp
|
|
9
|
+
#
|
|
10
|
+
# Connects to a Rails application via a bridge process (Docker exec, direct,
|
|
11
|
+
# or SSH) and exposes read-only query tools via the Model Context Protocol
|
|
12
|
+
# (stdio transport).
|
|
13
|
+
|
|
14
|
+
require 'yaml'
|
|
15
|
+
require_relative '../lib/codebase_index/console/server'
|
|
16
|
+
|
|
17
|
+
config_path = ENV.fetch('CODEBASE_CONSOLE_CONFIG', File.expand_path('~/.codebase_index/console.yml'))
|
|
18
|
+
config = File.exist?(config_path) ? YAML.safe_load_file(config_path) : {}
|
|
19
|
+
|
|
20
|
+
server = CodebaseIndex::Console::Server.build(config: config)
|
|
21
|
+
transport = MCP::Server::Transports::StdioTransport.new(server)
|
|
22
|
+
transport.open
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# MCP server for querying CodebaseIndex extraction output.
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# codebase-index-mcp [INDEX_DIR]
|
|
8
|
+
# CODEBASE_INDEX_DIR=/path/to/output codebase-index-mcp
|
|
9
|
+
#
|
|
10
|
+
# Reads JSON files from the extraction output directory and exposes
|
|
11
|
+
# them via the Model Context Protocol (stdio transport).
|
|
12
|
+
# Does NOT require Rails — only reads pre-extracted data.
|
|
13
|
+
|
|
14
|
+
require_relative '../lib/codebase_index'
|
|
15
|
+
require_relative '../lib/codebase_index/dependency_graph'
|
|
16
|
+
require_relative '../lib/codebase_index/graph_analyzer'
|
|
17
|
+
require_relative '../lib/codebase_index/mcp/server'
|
|
18
|
+
require_relative '../lib/codebase_index/embedding/text_preparer'
|
|
19
|
+
require_relative '../lib/codebase_index/embedding/indexer'
|
|
20
|
+
|
|
21
|
+
index_dir = ARGV[0] || ENV['CODEBASE_INDEX_DIR'] || Dir.pwd
|
|
22
|
+
|
|
23
|
+
unless Dir.exist?(index_dir)
|
|
24
|
+
warn "Error: Index directory does not exist: #{index_dir}"
|
|
25
|
+
exit 1
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
unless File.exist?(File.join(index_dir, 'manifest.json'))
|
|
29
|
+
warn "Error: No manifest.json found in: #{index_dir}"
|
|
30
|
+
warn 'Run `bundle exec rake codebase_index:extract` in your Rails app first.'
|
|
31
|
+
exit 1
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Attempt to build a retriever for semantic search.
|
|
35
|
+
# Auto-configures from environment variables when no explicit configuration exists.
|
|
36
|
+
retriever = begin
|
|
37
|
+
config = CodebaseIndex.configuration
|
|
38
|
+
|
|
39
|
+
if !config.embedding_provider && ENV.fetch('OPENAI_API_KEY', nil)
|
|
40
|
+
config.vector_store = :in_memory
|
|
41
|
+
config.metadata_store = :in_memory
|
|
42
|
+
config.graph_store = :in_memory
|
|
43
|
+
config.embedding_provider = :openai
|
|
44
|
+
config.embedding_options = { api_key: ENV.fetch('OPENAI_API_KEY', nil) }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
CodebaseIndex::Builder.new(config).build_retriever if config.embedding_provider
|
|
48
|
+
rescue StandardError => e
|
|
49
|
+
warn "Note: Semantic search unavailable (#{e.message}). Using pattern-based search only."
|
|
50
|
+
nil
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever)
|
|
54
|
+
|
|
55
|
+
# Pin protocol version for broad client compatibility (Claude Code, Cursor, etc.)
|
|
56
|
+
if ENV['MCP_PROTOCOL_VERSION']
|
|
57
|
+
server.configuration = MCP::Configuration.new(protocol_version: ENV['MCP_PROTOCOL_VERSION'])
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
transport = MCP::Server::Transports::StdioTransport.new(server)
|
|
61
|
+
transport.open
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# MCP server for querying CodebaseIndex extraction output over HTTP.
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# codebase-index-mcp-http [INDEX_DIR]
|
|
8
|
+
# CODEBASE_INDEX_DIR=/path/to/output codebase-index-mcp-http
|
|
9
|
+
#
|
|
10
|
+
# Reads JSON files from the extraction output directory and exposes
|
|
11
|
+
# them via the Model Context Protocol (Streamable HTTP transport).
|
|
12
|
+
# Requires the `rackup` gem and a Rack-compatible server (e.g., puma).
|
|
13
|
+
|
|
14
|
+
require 'rackup'
|
|
15
|
+
require_relative '../lib/codebase_index'
|
|
16
|
+
require_relative '../lib/codebase_index/dependency_graph'
|
|
17
|
+
require_relative '../lib/codebase_index/graph_analyzer'
|
|
18
|
+
require_relative '../lib/codebase_index/mcp/server'
|
|
19
|
+
require_relative '../lib/codebase_index/embedding/text_preparer'
|
|
20
|
+
require_relative '../lib/codebase_index/embedding/indexer'
|
|
21
|
+
|
|
22
|
+
index_dir = ARGV[0] || ENV['CODEBASE_INDEX_DIR'] || Dir.pwd
|
|
23
|
+
|
|
24
|
+
unless Dir.exist?(index_dir)
|
|
25
|
+
warn "Error: Index directory does not exist: #{index_dir}"
|
|
26
|
+
exit 1
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
unless File.exist?(File.join(index_dir, 'manifest.json'))
|
|
30
|
+
warn "Error: No manifest.json found in: #{index_dir}"
|
|
31
|
+
warn 'Run `bundle exec rake codebase_index:extract` in your Rails app first.'
|
|
32
|
+
exit 1
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Attempt to build a retriever for semantic search.
|
|
36
|
+
# Auto-configures from environment variables when no explicit configuration exists.
|
|
37
|
+
retriever = begin
|
|
38
|
+
config = CodebaseIndex.configuration
|
|
39
|
+
|
|
40
|
+
if !config.embedding_provider && ENV.fetch('OPENAI_API_KEY', nil)
|
|
41
|
+
config.vector_store = :in_memory
|
|
42
|
+
config.metadata_store = :in_memory
|
|
43
|
+
config.graph_store = :in_memory
|
|
44
|
+
config.embedding_provider = :openai
|
|
45
|
+
config.embedding_options = { api_key: ENV.fetch('OPENAI_API_KEY', nil) }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
CodebaseIndex::Builder.new(config).build_retriever if config.embedding_provider
|
|
49
|
+
rescue StandardError => e
|
|
50
|
+
warn "Note: Semantic search unavailable (#{e.message}). Using pattern-based search only."
|
|
51
|
+
nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
port = (ENV['PORT'] || 9292).to_i
|
|
55
|
+
host = ENV['HOST'] || 'localhost'
|
|
56
|
+
|
|
57
|
+
server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever)
|
|
58
|
+
transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
|
|
59
|
+
server.transport = transport
|
|
60
|
+
|
|
61
|
+
app = proc { |env| transport.handle_request(Rack::Request.new(env)) }
|
|
62
|
+
|
|
63
|
+
warn "CodebaseIndex MCP HTTP server starting on http://#{host}:#{port}"
|
|
64
|
+
Rackup::Handler.default.run(app, Port: port, Host: host)
|