codebase_index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/lib/codebase_index.rb +3 -243
  3. metadata +28 -223
  4. data/CHANGELOG.md +0 -89
  5. data/CODE_OF_CONDUCT.md +0 -83
  6. data/CONTRIBUTING.md +0 -65
  7. data/LICENSE.txt +0 -21
  8. data/README.md +0 -325
  9. data/exe/codebase-console +0 -59
  10. data/exe/codebase-console-mcp +0 -22
  11. data/exe/codebase-index-mcp +0 -34
  12. data/exe/codebase-index-mcp-http +0 -37
  13. data/exe/codebase-index-mcp-start +0 -58
  14. data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
  15. data/lib/codebase_index/ast/method_extractor.rb +0 -71
  16. data/lib/codebase_index/ast/node.rb +0 -116
  17. data/lib/codebase_index/ast/parser.rb +0 -614
  18. data/lib/codebase_index/ast.rb +0 -6
  19. data/lib/codebase_index/builder.rb +0 -200
  20. data/lib/codebase_index/cache/cache_middleware.rb +0 -199
  21. data/lib/codebase_index/cache/cache_store.rb +0 -264
  22. data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
  23. data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
  24. data/lib/codebase_index/chunking/chunk.rb +0 -84
  25. data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
  26. data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
  27. data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
  28. data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
  29. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
  30. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
  31. data/lib/codebase_index/console/audit_logger.rb +0 -75
  32. data/lib/codebase_index/console/bridge.rb +0 -177
  33. data/lib/codebase_index/console/confirmation.rb +0 -90
  34. data/lib/codebase_index/console/connection_manager.rb +0 -173
  35. data/lib/codebase_index/console/console_response_renderer.rb +0 -74
  36. data/lib/codebase_index/console/embedded_executor.rb +0 -373
  37. data/lib/codebase_index/console/model_validator.rb +0 -81
  38. data/lib/codebase_index/console/rack_middleware.rb +0 -87
  39. data/lib/codebase_index/console/safe_context.rb +0 -82
  40. data/lib/codebase_index/console/server.rb +0 -612
  41. data/lib/codebase_index/console/sql_validator.rb +0 -172
  42. data/lib/codebase_index/console/tools/tier1.rb +0 -118
  43. data/lib/codebase_index/console/tools/tier2.rb +0 -117
  44. data/lib/codebase_index/console/tools/tier3.rb +0 -110
  45. data/lib/codebase_index/console/tools/tier4.rb +0 -79
  46. data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
  47. data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
  48. data/lib/codebase_index/cost_model/estimator.rb +0 -128
  49. data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
  50. data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
  51. data/lib/codebase_index/cost_model.rb +0 -22
  52. data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
  53. data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
  54. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
  55. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
  56. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
  57. data/lib/codebase_index/db/migrator.rb +0 -71
  58. data/lib/codebase_index/db/schema_version.rb +0 -73
  59. data/lib/codebase_index/dependency_graph.rb +0 -236
  60. data/lib/codebase_index/embedding/indexer.rb +0 -140
  61. data/lib/codebase_index/embedding/openai.rb +0 -126
  62. data/lib/codebase_index/embedding/provider.rb +0 -162
  63. data/lib/codebase_index/embedding/text_preparer.rb +0 -112
  64. data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
  65. data/lib/codebase_index/evaluation/evaluator.rb +0 -139
  66. data/lib/codebase_index/evaluation/metrics.rb +0 -79
  67. data/lib/codebase_index/evaluation/query_set.rb +0 -148
  68. data/lib/codebase_index/evaluation/report_generator.rb +0 -90
  69. data/lib/codebase_index/extracted_unit.rb +0 -145
  70. data/lib/codebase_index/extractor.rb +0 -1028
  71. data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
  72. data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
  73. data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
  74. data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
  75. data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
  76. data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
  77. data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
  78. data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
  79. data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
  80. data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
  81. data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
  82. data/lib/codebase_index/extractors/event_extractor.rb +0 -211
  83. data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
  84. data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
  85. data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
  86. data/lib/codebase_index/extractors/job_extractor.rb +0 -374
  87. data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
  88. data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
  89. data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
  90. data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
  91. data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
  92. data/lib/codebase_index/extractors/model_extractor.rb +0 -988
  93. data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
  94. data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
  95. data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
  96. data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
  97. data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
  98. data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
  99. data/lib/codebase_index/extractors/route_extractor.rb +0 -181
  100. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
  101. data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
  102. data/lib/codebase_index/extractors/service_extractor.rb +0 -217
  103. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
  104. data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
  105. data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
  106. data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
  107. data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
  108. data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
  109. data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
  110. data/lib/codebase_index/feedback/gap_detector.rb +0 -89
  111. data/lib/codebase_index/feedback/store.rb +0 -119
  112. data/lib/codebase_index/filename_utils.rb +0 -32
  113. data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
  114. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
  115. data/lib/codebase_index/flow_assembler.rb +0 -290
  116. data/lib/codebase_index/flow_document.rb +0 -191
  117. data/lib/codebase_index/flow_precomputer.rb +0 -102
  118. data/lib/codebase_index/formatting/base.rb +0 -30
  119. data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
  120. data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
  121. data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
  122. data/lib/codebase_index/formatting/human_adapter.rb +0 -78
  123. data/lib/codebase_index/graph_analyzer.rb +0 -374
  124. data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
  125. data/lib/codebase_index/mcp/index_reader.rb +0 -394
  126. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
  127. data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
  128. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
  129. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
  130. data/lib/codebase_index/mcp/server.rb +0 -961
  131. data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
  132. data/lib/codebase_index/model_name_cache.rb +0 -51
  133. data/lib/codebase_index/notion/client.rb +0 -217
  134. data/lib/codebase_index/notion/exporter.rb +0 -219
  135. data/lib/codebase_index/notion/mapper.rb +0 -40
  136. data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
  137. data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
  138. data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
  139. data/lib/codebase_index/notion/mappers/shared.rb +0 -22
  140. data/lib/codebase_index/notion/rate_limiter.rb +0 -68
  141. data/lib/codebase_index/observability/health_check.rb +0 -79
  142. data/lib/codebase_index/observability/instrumentation.rb +0 -34
  143. data/lib/codebase_index/observability/structured_logger.rb +0 -57
  144. data/lib/codebase_index/operator/error_escalator.rb +0 -81
  145. data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
  146. data/lib/codebase_index/operator/status_reporter.rb +0 -80
  147. data/lib/codebase_index/railtie.rb +0 -38
  148. data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
  149. data/lib/codebase_index/resilience/index_validator.rb +0 -167
  150. data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
  151. data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
  152. data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
  153. data/lib/codebase_index/retrieval/ranker.rb +0 -277
  154. data/lib/codebase_index/retrieval/search_executor.rb +0 -316
  155. data/lib/codebase_index/retriever.rb +0 -152
  156. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
  157. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
  158. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
  159. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
  160. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
  161. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
  162. data/lib/codebase_index/ruby_analyzer.rb +0 -87
  163. data/lib/codebase_index/session_tracer/file_store.rb +0 -104
  164. data/lib/codebase_index/session_tracer/middleware.rb +0 -143
  165. data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
  166. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
  167. data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
  168. data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
  169. data/lib/codebase_index/session_tracer/store.rb +0 -81
  170. data/lib/codebase_index/storage/graph_store.rb +0 -120
  171. data/lib/codebase_index/storage/metadata_store.rb +0 -196
  172. data/lib/codebase_index/storage/pgvector.rb +0 -195
  173. data/lib/codebase_index/storage/qdrant.rb +0 -205
  174. data/lib/codebase_index/storage/vector_store.rb +0 -167
  175. data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
  176. data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
  177. data/lib/codebase_index/token_utils.rb +0 -19
  178. data/lib/codebase_index/version.rb +0 -5
  179. data/lib/generators/codebase_index/install_generator.rb +0 -32
  180. data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
  181. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
  182. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
  183. data/lib/tasks/codebase_index.rake +0 -597
  184. data/lib/tasks/codebase_index_evaluation.rake +0 -115
data/CODE_OF_CONDUCT.md DELETED
@@ -1,83 +0,0 @@
1
- # Contributor Covenant Code of Conduct
2
-
3
- ## Our Pledge
4
-
5
- We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation.
6
-
7
- We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
8
-
9
- ## Our Standards
10
-
11
- Examples of behavior that contributes to a positive environment for our community include:
12
-
13
- * Demonstrating empathy and kindness toward other people
14
- * Being respectful of differing opinions, viewpoints, and experiences
15
- * Giving and gracefully accepting constructive feedback
16
- * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
17
- * Focusing on what is best not just for us as individuals, but for the overall community
18
-
19
- Examples of unacceptable behavior include:
20
-
21
- * The use of sexualized language or imagery, and sexual attention or advances of any kind
22
- * Trolling, insulting or derogatory comments, and personal or political attacks
23
- * Public or private harassment
24
- * Publishing others' private information, such as a physical or email address, without their explicit permission
25
- * Other conduct which could reasonably be considered inappropriate in a professional setting
26
-
27
- ## Enforcement Responsibilities
28
-
29
- Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
30
-
31
- Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
32
-
33
- ## Scope
34
-
35
- This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
36
-
37
- ## Enforcement
38
-
39
- Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at info@leah.wtf. All complaints will be reviewed and investigated promptly and fairly.
40
-
41
- All community leaders are obligated to respect the privacy and security of the reporter of any incident.
42
-
43
- ## Enforcement Guidelines
44
-
45
- Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
46
-
47
- ### 1. Correction
48
-
49
- **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
50
-
51
- **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
52
-
53
- ### 2. Warning
54
-
55
- **Community Impact**: A violation through a single incident or series of actions.
56
-
57
- **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
58
-
59
- ### 3. Temporary Ban
60
-
61
- **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
62
-
63
- **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
64
-
65
- ### 4. Permanent Ban
66
-
67
- **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
68
-
69
- **Consequence**: A permanent ban from any sort of public interaction within the community.
70
-
71
- ## Attribution
72
-
73
- This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.1, available at [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
74
-
75
- Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
76
-
77
- For answers to common questions about this code of conduct, see the FAQ at [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at [https://www.contributor-covenant.org/translations][translations].
78
-
79
- [homepage]: https://www.contributor-covenant.org
80
- [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
81
- [Mozilla CoC]: https://github.com/mozilla/diversity
82
- [FAQ]: https://www.contributor-covenant.org/faq
83
- [translations]: https://www.contributor-covenant.org/translations
data/CONTRIBUTING.md DELETED
@@ -1,65 +0,0 @@
1
- # Contributing to CodebaseIndex
2
-
3
- Thank you for your interest in contributing to CodebaseIndex!
4
-
5
- ## Bug Reports
6
-
7
- Please open an issue on GitHub with:
8
-
9
- - A clear description of the bug
10
- - Steps to reproduce
11
- - Expected vs. actual behavior
12
- - Your Ruby version, Rails version, and database adapter
13
-
14
- ## Feature Requests
15
-
16
- Open an issue describing:
17
-
18
- - The problem you're trying to solve
19
- - Your proposed solution
20
- - Any alternatives you've considered
21
-
22
- ## Pull Requests
23
-
24
- 1. Fork the repo and create your branch from `main`
25
- 2. Install dependencies: `bin/setup`
26
- 3. Make your changes
27
- 4. Add tests for new functionality
28
- 5. Ensure the test suite passes: `bundle exec rake spec`
29
- 6. Ensure code style passes: `bundle exec rubocop`
30
- 7. Update CHANGELOG.md with your changes
31
- 8. Open a pull request
32
-
33
- ## Development Setup
34
-
35
- ```bash
36
- git clone https://github.com/LeahArmstrong/codebase_index.git
37
- cd codebase_index
38
- bin/setup
39
- bundle exec rake spec # Run tests
40
- bundle exec rubocop # Check style
41
- ```
42
-
43
- ## Testing
44
-
45
- CodebaseIndex has two test suites:
46
-
47
- - **Gem unit specs** (`spec/`): Run with `bundle exec rake spec`. No Rails boot required.
48
- - **Integration specs**: Run inside a host Rails app to test real extraction.
49
-
50
- All new features need tests. Bug fixes should include a regression test.
51
-
52
- ## Code Style
53
-
54
- - `frozen_string_literal: true` on every file
55
- - YARD documentation on public methods
56
- - `rescue StandardError`, never bare `rescue`
57
- - All extractors return `Array<ExtractedUnit>`
58
-
59
- ## Runtime Introspection Requirement
60
-
61
- CodebaseIndex uses runtime introspection, not static parsing. If your feature requires access to Rails internals (ActiveRecord reflections, route introspection, etc.), it must run inside a booted Rails environment. Unit tests should use mocks/stubs; integration tests should run in a real Rails app.
62
-
63
- ## License
64
-
65
- By contributing, you agree that your contributions will be licensed under the MIT License.
data/LICENSE.txt DELETED
@@ -1,21 +0,0 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2024-2026 Leah Armstrong
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
data/README.md DELETED
@@ -1,325 +0,0 @@
1
- # CodebaseIndex
2
-
3
- A Rails codebase extraction and indexing system designed to provide accurate, version-specific context for AI-assisted development tooling.
4
-
5
- ## The Problem
6
-
7
- LLMs working with Rails codebases face a fundamental accuracy gap. Training data contains documentation and examples from many Rails versions, but a production app runs on *one* version. When a developer asks "what options does `has_many` support?" or "what callbacks fire when a record is saved?", the answer depends on their exact Rails version — and generic LLM responses often get it wrong.
8
-
9
- Beyond version accuracy, Rails conventions hide enormous amounts of implementation behind "magic." A model file might be 50 lines, but with concerns inlined, schema context, callbacks, validations, and association behavior, the *actual* surface area is 10x that. AI tools that only see the source file miss most of what matters.
10
-
11
- CodebaseIndex solves this by:
12
-
13
- - **Running inside Rails** to leverage runtime introspection (not just static parsing)
14
- - **Inlining concerns** directly into model source so the full picture is visible
15
- - **Prepending schema comments** with column types, indexes, and foreign keys
16
- - **Mapping routes to controllers** so HTTP → action flow is explicit
17
- - **Indexing the exact Rails/gem source** for the versions in `Gemfile.lock`
18
- - **Tracking dependencies** bidirectionally so you can trace impact across the codebase
19
- - **Enriching with git data** so you know what's actively changing vs. dormant
20
-
21
- See [Why CodebaseIndex?](docs/WHY_CODEBASE_INDEX.md) for concrete before/after examples.
22
-
23
- ## Quick Start
24
-
25
- ```bash
26
- # Add to your Rails app's Gemfile, then:
27
- bundle install
28
- rails generate codebase_index:install
29
- bundle exec rake codebase_index:extract
30
- bundle exec rake codebase_index:stats
31
- # Add the MCP server to .mcp.json (see below) and start asking questions
32
- ```
33
-
34
- See [Getting Started](docs/GETTING_STARTED.md) for the full walkthrough including Docker, storage presets, and CI setup.
35
-
36
- ## Installation
37
-
38
- Add to your Gemfile:
39
-
40
- ```ruby
41
- gem 'codebase_index'
42
- ```
43
-
44
- Then:
45
-
46
- ```bash
47
- bundle install
48
- rails generate codebase_index:install
49
- rails db:migrate
50
- ```
51
-
52
- Create a minimal configuration:
53
-
54
- ```ruby
55
- # config/initializers/codebase_index.rb
56
- CodebaseIndex.configure do |config|
57
- config.output_dir = Rails.root.join('tmp/codebase_index')
58
- end
59
- ```
60
-
61
- Or install the gem directly:
62
-
63
- ```bash
64
- gem install codebase_index
65
- ```
66
-
67
- > **Requires Rails.** Extraction runs inside a booted Rails application using runtime introspection (`ActiveRecord::Base.descendants`, `Rails.application.routes`, etc.). The gem cannot extract from source files alone. See [Getting Started](docs/GETTING_STARTED.md) for full setup details.
68
-
69
- ## Target Environment
70
-
71
- Designed for Rails applications of any scale, with particular strength in large monoliths:
72
-
73
- - Any database (MySQL, PostgreSQL, SQLite)
74
- - Any background job system (Sidekiq, Solid Queue, GoodJob, inline)
75
- - Any view layer (ERB, Phlex, ViewComponent)
76
- - Docker or bare metal, CI or manual
77
- - Continuous or one-shot indexing
78
-
79
- See [docs/BACKEND_MATRIX.md](docs/BACKEND_MATRIX.md) for supported infrastructure combinations.
80
-
81
- ## Use Cases
82
-
83
- **1. Coding & Debugging** — Primary context for AI coding assistants. Answer "how does our checkout flow work?" with the actual service, model callbacks, controller actions, and framework behavior for the running version.
84
-
85
- **2. Performance Analysis** — Correlate code structure with runtime behavior. Identify models with high write volume and complex callback chains, find N+1-prone association patterns, surface hot code paths.
86
-
87
- **3. Deeper Analytics** — Query frequency by scope, error rates by action, background job characteristics. Bridge the gap between code structure and operational data.
88
-
89
- **4. Support & Marketing Tooling** — Domain-concept retrieval for non-developers. Map business terms to code paths, surface feature flags, document user-facing behavior.
90
-
91
- ## Architecture
92
-
93
- ```
94
- ┌─────────────────────────────────────────────────────────────────────┐
95
- │ CodebaseIndex │
96
- ├─────────────────────────────────────────────────────────────────────┤
97
- │ │
98
- │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
99
- │ │ Extraction │───▶│ Storage │◀───│ Retrieval │ │
100
- │ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
101
- │ │ │ │ │
102
- │ ▼ ▼ ▼ │
103
- │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
104
- │ │ Extractors │ │ JSON per unit │ │ Query Classifier│ │
105
- │ │ · Model │ │ Vector Index │ │ Context Assembly│ │
106
- │ │ · Controller │ │ Metadata Index │ │ Result Ranking │ │
107
- │ │ · Service │ │ Dep Graph │ │ │ │
108
- │ │ · Component │ │ │ │ │ │
109
- │ │ · Rails Source │ │ │ │ │ │
110
- │ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
111
- │ │
112
- └─────────────────────────────────────────────────────────────────────┘
113
- ```
114
-
115
- ### Extraction Pipeline
116
-
117
- Extraction runs inside the Rails application (via rake task) to access runtime introspection — `ActiveRecord::Base.descendants`, `Rails.application.routes`, reflection APIs, etc. This is fundamentally more accurate than static parsing.
118
-
119
- **Four phases:**
120
-
121
- 1. **Extract** — Each extractor produces `ExtractedUnit` objects with source, metadata, and dependencies
122
- 2. **Resolve dependents** — Build reverse dependency edges (who calls what)
123
- 3. **Enrich with git** — Last modified, contributors, change frequency, recent commits
124
- 4. **Write output** — JSON per unit, dependency graph, manifest, structural summary
125
-
126
- ### Extractors (34)
127
-
128
- 34 extractors cover every major Rails concept: models (with inlined concerns and schema), controllers (with route context), services, jobs, mailers, GraphQL types/mutations/resolvers, serializers, view components (Phlex and ViewComponent), ERB templates, decorators, concerns, validators, policies, routes, middleware, engines, i18n, Action Cable, rake tasks, migrations, database views, state machines, events, caching patterns, factories, test mappings, and Rails framework source pinned to exact installed versions.
129
-
130
- See [docs/EXTRACTOR_REFERENCE.md](docs/EXTRACTOR_REFERENCE.md) for per-extractor documentation with configuration, edge cases, and example output.
131
-
132
- ### Key Design Decisions
133
-
134
- **Concern inlining** — included concerns are embedded directly in the model's source. **Route prepending** — controllers get a route header showing HTTP verb → path → action. **Semantic chunking** — models split by purpose (associations, callbacks, validations), controllers split per-action. **Dependency graph with BFS blast radius** — forward and reverse edges enable change-impact traversal.
135
-
136
- ## MCP Servers
137
-
138
- CodebaseIndex ships two [MCP](https://modelcontextprotocol.io/) servers for integrating with AI development tools (Claude Code, Cursor, Windsurf, etc.).
139
-
140
- **Index Server** (27 tools) — Reads pre-extracted data from disk. No Rails boot required. Provides code lookup, dependency traversal, graph analysis, semantic search, pipeline management, feedback collection, and temporal snapshots.
141
-
142
- ```bash
143
- codebase-index-mcp /path/to/rails-app/tmp/codebase_index
144
- ```
145
-
146
- **Console Server** (31 tools) — Bridges to a live Rails process for database queries, model diagnostics, job monitoring, and guarded operations. All queries run in rolled-back transactions with SQL validation and audit logging.
147
-
148
- ```bash
149
- codebase-console-mcp
150
- ```
151
-
152
- See [docs/MCP_SERVERS.md](docs/MCP_SERVERS.md) for the full tool catalog and setup instructions.
153
-
154
- ### Claude Code Setup
155
-
156
- Add the servers to your project's `.mcp.json`:
157
-
158
- ```json
159
- {
160
- "mcpServers": {
161
- "codebase-index": {
162
- "command": "codebase-index-mcp-start",
163
- "args": ["/path/to/rails-app/tmp/codebase_index"]
164
- },
165
- "codebase-console": {
166
- "command": "bundle",
167
- "args": ["exec", "rake", "codebase_index:console"],
168
- "cwd": "/path/to/rails-app"
169
- }
170
- }
171
- }
172
- ```
173
-
174
- > **Recommended**: Use `codebase-index-mcp-start` instead of `codebase-index-mcp` for Claude Code. It validates the index directory exists, checks for a manifest, ensures dependencies are installed, and restarts automatically on failure.
175
-
176
- The **index server** reads from a pre-extracted directory — run `bundle exec rake codebase_index:extract` in your Rails app first.
177
-
178
- The **console server** runs embedded inside your Rails app (no config file needed). For Docker setups, see [docs/DOCKER_SETUP.md](docs/DOCKER_SETUP.md).
179
-
180
- ## Subsystems
181
-
182
- ```
183
- lib/codebase_index/
184
- ├── extractor.rb # Orchestrator — coordinates all 34 extractors
185
- ├── extracted_unit.rb # Core value object (the universal currency)
186
- ├── dependency_graph.rb # Directed graph + PageRank scoring
187
- ├── graph_analyzer.rb # Structural analysis (orphans, hubs, cycles, bridges)
188
- ├── retriever.rb # Retrieval orchestrator with degradation tiers
189
- ├── extractors/ # 34 extractors (one per Rails concept)
190
- ├── ast/ # Prism-based AST layer
191
- ├── ruby_analyzer/ # Static analysis (class, method, dataflow)
192
- ├── chunking/ # Semantic chunking (type-aware splitting)
193
- ├── embedding/ # Embedding pipeline (OpenAI, Ollama)
194
- ├── storage/ # Storage backends (pgvector, Qdrant, SQLite)
195
- ├── retrieval/ # Retrieval pipeline (classify, search, rank, assemble)
196
- ├── mcp/ # MCP Index Server (27 tools)
197
- ├── console/ # Console MCP Server (31 tools, 4 tiers)
198
- ├── coordination/ # Multi-agent pipeline locking
199
- ├── notion/ # Notion export
200
- ├── session_tracer/ # Session tracing middleware
201
- ├── temporal/ # Temporal snapshot system
202
- └── evaluation/ # Retrieval evaluation harness
203
-
204
- exe/
205
- ├── codebase-index-mcp # Index Server executable (stdio)
206
- ├── codebase-index-mcp-start # Self-healing MCP wrapper
207
- ├── codebase-index-mcp-http # Index Server (HTTP/Rack)
208
- └── codebase-console-mcp # Console MCP Server executable
209
- ```
210
-
211
- See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full pipeline explanation — extraction phases, dependency graph, retrieval pipeline, storage backends, and semantic chunking.
212
-
213
- ## Usage
214
-
215
- ### Full Extraction
216
-
217
- ```bash
218
- bundle exec rake codebase_index:extract
219
- ```
220
-
221
- ### Incremental (CI)
222
-
223
- ```bash
224
- bundle exec rake codebase_index:incremental
225
- ```
226
-
227
- Auto-detects GitHub Actions / GitLab CI environment. See [Getting Started](docs/GETTING_STARTED.md) for CI workflow YAML.
228
-
229
- ### Docker
230
-
231
- Extraction runs inside the container; the Index Server runs on the host reading volume-mounted output. See [docs/DOCKER_SETUP.md](docs/DOCKER_SETUP.md) for Docker setup, MCP config, and troubleshooting.
232
-
233
- ```bash
234
- docker compose exec app bundle exec rake codebase_index:extract
235
- ```
236
-
237
- ### Other Tasks
238
-
239
- ```bash
240
- rake codebase_index:validate # Check index integrity
241
- rake codebase_index:stats # Show unit counts, sizes, graph stats
242
- rake codebase_index:clean # Remove index
243
- rake codebase_index:embed # Embed all extracted units
244
- rake codebase_index:embed_incremental # Embed changed units only
245
- rake codebase_index:flow[EntryPoint] # Generate execution flow for an entry point
246
- rake codebase_index:console # Start console MCP server
247
- rake codebase_index:notion_sync # Sync models/columns to Notion databases
248
- ```
249
-
250
- See [docs/NOTION_INTEGRATION.md](docs/NOTION_INTEGRATION.md) for Notion export configuration.
251
-
252
- ### Ruby API
253
-
254
- > **Requires a booted Rails environment.** These methods use runtime introspection and must be called from within a Rails process (console, rake task, initializer).
255
-
256
- ```ruby
257
- # Full extraction (output_dir from configuration)
258
- CodebaseIndex.extract!
259
-
260
- # Incremental
261
- CodebaseIndex.extract_changed!(["app/models/user.rb", "app/services/checkout.rb"])
262
-
263
- # Configuration
264
- CodebaseIndex.configure do |config|
265
- config.output_dir = Rails.root.join("tmp/codebase_index")
266
- config.max_context_tokens = 8000
267
- config.include_framework_sources = true
268
- config.add_gem "devise", paths: ["lib/devise/models"], priority: :high
269
- end
270
- ```
271
-
272
- ## Output Structure
273
-
274
- ```
275
- tmp/codebase_index/
276
- ├── manifest.json # Extraction metadata, git SHA, checksums
277
- ├── dependency_graph.json # Full graph with forward/reverse edges
278
- ├── SUMMARY.md # Human-readable structural overview
279
- ├── models/
280
- │ ├── _index.json # Quick lookup index
281
- │ ├── User.json # Full extracted unit
282
- │ └── Order.json
283
- ├── controllers/
284
- │ ├── _index.json
285
- │ └── OrdersController.json
286
- ├── services/
287
- │ ├── _index.json
288
- │ └── CheckoutService.json
289
- ├── components/
290
- │ └── ...
291
- └── rails_source/
292
- └── ...
293
- ```
294
-
295
- Each unit JSON contains: `identifier`, `type`, `file_path`, `source_code` (annotated), `metadata` (rich structured data), `dependencies`, `dependents`, `chunks` (if applicable), and `estimated_tokens`.
296
-
297
- ## Documentation
298
-
299
- | Guide | Purpose |
300
- |-------|---------|
301
- | [Getting Started](docs/GETTING_STARTED.md) | Install, configure, extract, inspect |
302
- | [FAQ](docs/FAQ.md) | Common questions about setup, extraction, MCP, Docker |
303
- | [Troubleshooting](docs/TROUBLESHOOTING.md) | Symptom → cause → fix for common problems |
304
- | [Architecture](docs/ARCHITECTURE.md) | Pipeline stages, dependency graph, retrieval, storage |
305
- | [Extractor Reference](docs/EXTRACTOR_REFERENCE.md) | What each of the 34 extractors captures |
306
- | [MCP Servers](docs/MCP_SERVERS.md) | Full tool catalog and setup for Claude Code, Cursor, Windsurf |
307
- | [MCP Tool Cookbook](docs/MCP_TOOL_COOKBOOK.md) | Scenario-based examples for common tasks |
308
- | [Configuration Reference](docs/CONFIGURATION_REFERENCE.md) | All options with defaults |
309
- | [Backend Matrix](docs/BACKEND_MATRIX.md) | Supported infrastructure combinations |
310
-
311
- ## Development
312
-
313
- ```bash
314
- bin/setup # Install dependencies
315
- bundle exec rake spec # Run tests
316
- bundle exec rubocop # Lint
317
- ```
318
-
319
- ## Contributing
320
-
321
- Bug reports and pull requests are welcome on GitHub at https://github.com/LeahArmstrong/codebase_index. See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
322
-
323
- ## License
324
-
325
- The gem is available as open source under the terms of the [MIT License](LICENSE.txt).
data/exe/codebase-console DELETED
@@ -1,59 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Embedded console MCP server — runs inside a Rails environment.
5
- #
6
- # Usage (via rake, recommended):
7
- # bundle exec rake codebase_index:console
8
- #
9
- # Usage (via rails runner):
10
- # bundle exec rails runner "$(bundle show codebase_index)/exe/codebase-console"
11
- #
12
- # The rake task captures stdout before Rails boots and passes the fd via
13
- # $codebase_index_protocol_out. When run via rails runner, this script
14
- # captures stdout itself to keep MCP protocol clean.
15
-
16
- # Check if the rake task already captured stdout for us.
17
- protocol_out = $codebase_index_protocol_out # rubocop:disable Style/GlobalVars
18
-
19
- unless protocol_out
20
- # Running via rails runner — capture stdout ourselves.
21
- protocol_out = $stdout.dup
22
- $stdout.reopen($stderr)
23
- end
24
-
25
- require 'codebase_index/console/server'
26
-
27
- # Ensure all application models are loaded for the registry.
28
- Rails.application.eager_load!
29
-
30
- registry = ActiveRecord::Base.descendants.each_with_object({}) do |model, hash|
31
- next if model.abstract_class?
32
- next unless model.table_exists?
33
-
34
- hash[model.name] = model.column_names
35
- rescue StandardError
36
- next
37
- end
38
-
39
- validator = CodebaseIndex::Console::ModelValidator.new(registry: registry)
40
- safe_context = CodebaseIndex::Console::SafeContext.new(connection: ActiveRecord::Base.connection)
41
-
42
- redacted_columns = if CodebaseIndex.respond_to?(:configuration) && CodebaseIndex.configuration
43
- Array(CodebaseIndex.configuration.console_redacted_columns)
44
- else
45
- []
46
- end
47
-
48
- server = CodebaseIndex::Console::Server.build_embedded(
49
- model_validator: validator,
50
- safe_context: safe_context,
51
- redacted_columns: redacted_columns
52
- )
53
-
54
- # Restore the protocol output for MCP transport.
55
- $stdout.reopen(protocol_out)
56
- protocol_out.close unless protocol_out.closed?
57
-
58
- transport = MCP::Server::Transports::StdioTransport.new(server)
59
- transport.open
@@ -1,22 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Console MCP server for querying live Rails application state.
5
- #
6
- # Usage:
7
- # codebase-console-mcp
8
- # CODEBASE_CONSOLE_CONFIG=/path/to/console.yml codebase-console-mcp
9
- #
10
- # Connects to a Rails application via a bridge process (Docker exec, direct,
11
- # or SSH) and exposes read-only query tools via the Model Context Protocol
12
- # (stdio transport).
13
-
14
- require 'yaml'
15
- require_relative '../lib/codebase_index/console/server'
16
-
17
- config_path = ENV.fetch('CODEBASE_CONSOLE_CONFIG', File.expand_path('~/.codebase_index/console.yml'))
18
- config = File.exist?(config_path) ? YAML.safe_load_file(config_path) : {}
19
-
20
- server = CodebaseIndex::Console::Server.build(config: config)
21
- transport = MCP::Server::Transports::StdioTransport.new(server)
22
- transport.open
@@ -1,34 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # MCP server for querying CodebaseIndex extraction output.
5
- #
6
- # Usage:
7
- # codebase-index-mcp [INDEX_DIR]
8
- # CODEBASE_INDEX_DIR=/path/to/output codebase-index-mcp
9
- #
10
- # Reads JSON files from the extraction output directory and exposes
11
- # them via the Model Context Protocol (stdio transport).
12
- # Does NOT require Rails — only reads pre-extracted data.
13
-
14
- require_relative '../lib/codebase_index'
15
- require_relative '../lib/codebase_index/dependency_graph'
16
- require_relative '../lib/codebase_index/graph_analyzer'
17
- require_relative '../lib/codebase_index/mcp/server'
18
- require_relative '../lib/codebase_index/mcp/bootstrapper'
19
- require_relative '../lib/codebase_index/embedding/text_preparer'
20
- require_relative '../lib/codebase_index/embedding/indexer'
21
-
22
- index_dir = CodebaseIndex::MCP::Bootstrapper.resolve_index_dir(ARGV)
23
- retriever = CodebaseIndex::MCP::Bootstrapper.build_retriever
24
- snapshot_store = CodebaseIndex::MCP::Bootstrapper.build_snapshot_store(index_dir)
25
-
26
- server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store)
27
-
28
- # Pin protocol version for broad client compatibility (Claude Code, Cursor, etc.)
29
- if ENV['MCP_PROTOCOL_VERSION']
30
- server.configuration = MCP::Configuration.new(protocol_version: ENV['MCP_PROTOCOL_VERSION'])
31
- end
32
-
33
- transport = MCP::Server::Transports::StdioTransport.new(server)
34
- transport.open
@@ -1,37 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # MCP server for querying CodebaseIndex extraction output over HTTP.
5
- #
6
- # Usage:
7
- # codebase-index-mcp-http [INDEX_DIR]
8
- # CODEBASE_INDEX_DIR=/path/to/output codebase-index-mcp-http
9
- #
10
- # Reads JSON files from the extraction output directory and exposes
11
- # them via the Model Context Protocol (Streamable HTTP transport).
12
- # Requires the `rackup` gem and a Rack-compatible server (e.g., puma).
13
-
14
- require 'rackup'
15
- require_relative '../lib/codebase_index'
16
- require_relative '../lib/codebase_index/dependency_graph'
17
- require_relative '../lib/codebase_index/graph_analyzer'
18
- require_relative '../lib/codebase_index/mcp/server'
19
- require_relative '../lib/codebase_index/mcp/bootstrapper'
20
- require_relative '../lib/codebase_index/embedding/text_preparer'
21
- require_relative '../lib/codebase_index/embedding/indexer'
22
-
23
- index_dir = CodebaseIndex::MCP::Bootstrapper.resolve_index_dir(ARGV)
24
- retriever = CodebaseIndex::MCP::Bootstrapper.build_retriever
25
- snapshot_store = CodebaseIndex::MCP::Bootstrapper.build_snapshot_store(index_dir)
26
-
27
- port = (ENV['PORT'] || 9292).to_i
28
- host = ENV['HOST'] || 'localhost'
29
-
30
- server = CodebaseIndex::MCP::Server.build(index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store)
31
- transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
32
- server.transport = transport
33
-
34
- app = proc { |env| transport.handle_request(Rack::Request.new(env)) }
35
-
36
- warn "CodebaseIndex MCP HTTP server starting on http://#{host}:#{port}"
37
- Rackup::Handler.default.run(app, Port: port, Host: host)