codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require 'json'
5
+
6
+ module CodebaseIndex
7
+ module Embedding
8
+ # Interface and adapters for embedding providers.
9
+ #
10
+ # All embedding providers implement the {Interface} module, which defines
11
+ # the contract for generating vector embeddings from text.
12
+ module Provider
13
+ # Interface that all embedding providers must implement.
14
+ #
15
+ # Defines the contract for embedding text into vector representations.
16
+ # Implementations must provide single-text embedding, batch embedding,
17
+ # dimension reporting, and model identification.
18
+ module Interface
19
+ # Embed a single text string into a vector.
20
+ #
21
+ # @param text [String] the text to embed
22
+ # @return [Array<Float>] the embedding vector
23
+ # @raise [NotImplementedError] if not implemented by the provider
24
+ def embed(text)
25
+ raise NotImplementedError
26
+ end
27
+
28
+ # Embed multiple texts into vectors in a single request.
29
+ #
30
+ # @param texts [Array<String>] the texts to embed
31
+ # @return [Array<Array<Float>>] array of embedding vectors
32
+ # @raise [NotImplementedError] if not implemented by the provider
33
+ def embed_batch(texts)
34
+ raise NotImplementedError
35
+ end
36
+
37
+ # Return the dimensionality of the embedding vectors.
38
+ #
39
+ # @return [Integer] number of dimensions
40
+ # @raise [NotImplementedError] if not implemented by the provider
41
+ def dimensions
42
+ raise NotImplementedError
43
+ end
44
+
45
+ # Return the name of the embedding model.
46
+ #
47
+ # @return [String] model name
48
+ # @raise [NotImplementedError] if not implemented by the provider
49
+ def model_name
50
+ raise NotImplementedError
51
+ end
52
+ end
53
+
54
+ # Ollama adapter for local embeddings via the Ollama HTTP API.
55
+ #
56
+ # Uses the `/api/embed` endpoint to generate embeddings. Requires a running
57
+ # Ollama instance (default: localhost:11434) with the specified model pulled.
58
+ #
59
+ # @example
60
+ # provider = CodebaseIndex::Embedding::Provider::Ollama.new
61
+ # vector = provider.embed("class User < ApplicationRecord; end")
62
+ # vectors = provider.embed_batch(["text1", "text2"])
63
+ class Ollama
64
+ include Interface
65
+
66
+ DEFAULT_MODEL = 'nomic-embed-text'
67
+ DEFAULT_HOST = 'http://localhost:11434'
68
+
69
+ # @param model [String] Ollama model name (default: nomic-embed-text)
70
+ # @param host [String] Ollama server URL (default: http://localhost:11434)
71
+ def initialize(model: DEFAULT_MODEL, host: DEFAULT_HOST)
72
+ @model = model
73
+ @host = host
74
+ @uri = URI("#{host}/api/embed")
75
+ end
76
+
77
+ # Embed a single text string.
78
+ #
79
+ # @param text [String] the text to embed
80
+ # @return [Array<Float>] the embedding vector
81
+ # @raise [CodebaseIndex::Error] if the API returns an error
82
+ def embed(text)
83
+ response = post_request({ model: @model, input: text })
84
+ response['embeddings'].first
85
+ end
86
+
87
+ # Embed multiple texts in a single request.
88
+ #
89
+ # @param texts [Array<String>] the texts to embed
90
+ # @return [Array<Array<Float>>] array of embedding vectors
91
+ # @raise [CodebaseIndex::Error] if the API returns an error
92
+ def embed_batch(texts)
93
+ response = post_request({ model: @model, input: texts })
94
+ response['embeddings']
95
+ end
96
+
97
+ # Return the dimensionality of vectors produced by this model.
98
+ #
99
+ # Determined dynamically by embedding a test string on first call.
100
+ #
101
+ # @return [Integer] number of dimensions
102
+ def dimensions
103
+ @dimensions ||= embed('test').length
104
+ end
105
+
106
+ # Return the model name.
107
+ #
108
+ # @return [String] the Ollama model name
109
+ def model_name
110
+ @model
111
+ end
112
+
113
+ private
114
+
115
+ # Send a POST request to the Ollama API.
116
+ #
117
+ # @param body [Hash] request body
118
+ # @return [Hash] parsed JSON response
119
+ # @raise [CodebaseIndex::Error] if the API returns a non-success status
120
+ def post_request(body)
121
+ http = Net::HTTP.new(@uri.host, @uri.port)
122
+ request = Net::HTTP::Post.new(@uri.path, 'Content-Type' => 'application/json')
123
+ request.body = body.to_json
124
+ response = http.request(request)
125
+
126
+ unless response.is_a?(Net::HTTPSuccess)
127
+ raise CodebaseIndex::Error, "Ollama API error: #{response.code} #{response.body}"
128
+ end
129
+
130
+ JSON.parse(response.body)
131
+ end
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Embedding
5
+ # Prepares ExtractedUnit data for embedding by building context-prefixed text.
6
+ #
7
+ # Follows the context prefix format from docs/CONTEXT_AND_CHUNKING.md:
8
+ # [type] identifier
9
+ # namespace: ...
10
+ # file: ...
11
+ # dependencies: dep1, dep2, ...
12
+ #
13
+ # Handles token limit enforcement by truncating text that exceeds the
14
+ # embedding model's context window.
15
+ #
16
+ # @example
17
+ # preparer = CodebaseIndex::Embedding::TextPreparer.new(max_tokens: 8192)
18
+ # text = preparer.prepare(unit)
19
+ # chunks = preparer.prepare_chunks(unit)
20
+ class TextPreparer
21
+ DEFAULT_MAX_TOKENS = 8192
22
+
23
+ # @param max_tokens [Integer] maximum token budget for prepared text
24
+ def initialize(max_tokens: DEFAULT_MAX_TOKENS)
25
+ @max_tokens = max_tokens
26
+ end
27
+
28
+ # Prepare text for embedding from an ExtractedUnit.
29
+ #
30
+ # Builds a context prefix and appends the unit's source code (or first
31
+ # chunk content for chunked units). Enforces token limits via truncation.
32
+ #
33
+ # @param unit [CodebaseIndex::ExtractedUnit] the unit to prepare
34
+ # @return [String] context-prefixed text ready for embedding
35
+ def prepare(unit)
36
+ prefix = build_prefix(unit)
37
+ content = select_content(unit)
38
+ text = "#{prefix}\n#{content}"
39
+ enforce_token_limit(text)
40
+ end
41
+
42
+ # Prepare text for each chunk of an ExtractedUnit.
43
+ #
44
+ # If the unit has no chunks, returns a single-element array with the
45
+ # full prepared text. For chunked units, each chunk gets the same
46
+ # context prefix prepended.
47
+ #
48
+ # @param unit [CodebaseIndex::ExtractedUnit] the unit to prepare
49
+ # @return [Array<String>] array of context-prefixed texts
50
+ def prepare_chunks(unit)
51
+ return [prepare(unit)] unless unit.chunks&.any?
52
+
53
+ prefix = build_prefix(unit)
54
+ unit.chunks.map do |chunk|
55
+ text = "#{prefix}\n#{chunk[:content]}"
56
+ enforce_token_limit(text)
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ # Build the context prefix for a unit.
63
+ #
64
+ # @param unit [CodebaseIndex::ExtractedUnit] the unit
65
+ # @return [String] formatted prefix lines
66
+ def build_prefix(unit)
67
+ lines = []
68
+ lines << "[#{unit.type}] #{unit.identifier}"
69
+ lines << "namespace: #{unit.namespace}" if unit.namespace
70
+ lines << "file: #{unit.file_path}" if unit.file_path
71
+ append_dependency_line(lines, unit.dependencies)
72
+ lines.join("\n")
73
+ end
74
+
75
+ # Append a formatted dependency line if dependencies exist.
76
+ #
77
+ # @param lines [Array<String>] lines to append to
78
+ # @param dependencies [Array<Hash>, nil] dependency list
79
+ # @return [void]
80
+ def append_dependency_line(lines, dependencies)
81
+ return unless dependencies&.any?
82
+
83
+ dep_names = dependencies.map { |d| d[:target] }.compact.first(10)
84
+ lines << "dependencies: #{dep_names.join(', ')}" if dep_names.any?
85
+ end
86
+
87
+ # Select the content to embed for a unit.
88
+ #
89
+ # @param unit [CodebaseIndex::ExtractedUnit] the unit
90
+ # @return [String] source code or first chunk content
91
+ def select_content(unit)
92
+ if unit.chunks&.any?
93
+ unit.chunks.first[:content]
94
+ else
95
+ unit.source_code || ''
96
+ end
97
+ end
98
+
99
+ # Truncate text to fit within the token budget.
100
+ #
101
+ # @param text [String] the text to truncate
102
+ # @return [String] text within token limits
103
+ def enforce_token_limit(text)
104
+ estimated = (text.length / 4.0).ceil
105
+ return text if estimated <= @max_tokens
106
+
107
+ max_chars = (@max_tokens * 4.0).floor
108
+ text[0...max_chars]
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Evaluation
5
+ # Runs simple baseline strategies for comparison against the full
6
+ # retrieval pipeline.
7
+ #
8
+ # Provides three baseline strategies:
9
+ # - `:grep` — substring match on unit identifiers
10
+ # - `:random` — random selection from available units
11
+ # - `:file_level` — returns identifiers matching file paths
12
+ #
13
+ # @example
14
+ # runner = BaselineRunner.new(metadata_store: store)
15
+ # results = runner.run("User model", strategy: :grep, limit: 10)
16
+ # results # => ["User", "UserProfile", "UserSerializer"]
17
+ #
18
+ class BaselineRunner
19
+ VALID_STRATEGIES = %i[grep random file_level].freeze
20
+
21
+ # @param metadata_store [Object] Store that responds to #all_identifiers and #find_by_type
22
+ def initialize(metadata_store:)
23
+ @metadata_store = metadata_store
24
+ end
25
+
26
+ # Run a baseline strategy for a query.
27
+ #
28
+ # @param query [String] Natural language query
29
+ # @param strategy [Symbol] Baseline strategy (:grep, :random, :file_level)
30
+ # @param limit [Integer] Maximum number of results
31
+ # @return [Array<String>] Unit identifiers
32
+ # @raise [ArgumentError] if the strategy is invalid
33
+ def run(query, strategy:, limit: 10)
34
+ unless VALID_STRATEGIES.include?(strategy)
35
+ raise ArgumentError, "Invalid strategy: #{strategy}. Must be one of #{VALID_STRATEGIES.join(', ')}"
36
+ end
37
+
38
+ send(:"run_#{strategy}", query, limit)
39
+ end
40
+
41
+ private
42
+
43
+ # Grep strategy: substring match on unit identifiers.
44
+ #
45
+ # Extracts words from the query and matches identifiers that contain
46
+ # any query word (case-insensitive).
47
+ #
48
+ # @param query [String] Query string
49
+ # @param limit [Integer] Max results
50
+ # @return [Array<String>]
51
+ def run_grep(query, limit)
52
+ all_ids = @metadata_store.all_identifiers
53
+ keywords = extract_keywords(query)
54
+
55
+ return all_ids.first(limit) if keywords.empty?
56
+
57
+ matches = all_ids.select do |id|
58
+ id_lower = id.downcase
59
+ keywords.any? { |kw| id_lower.include?(kw) }
60
+ end
61
+
62
+ matches.first(limit)
63
+ end
64
+
65
+ # Random strategy: random selection from all available units.
66
+ #
67
+ # @param _query [String] Query string (unused)
68
+ # @param limit [Integer] Max results
69
+ # @return [Array<String>]
70
+ def run_random(_query, limit)
71
+ @metadata_store.all_identifiers.sample(limit)
72
+ end
73
+
74
+ # File-level strategy: matches identifiers that look like file paths
75
+ # or class names extracted from the query.
76
+ #
77
+ # @param query [String] Query string
78
+ # @param limit [Integer] Max results
79
+ # @return [Array<String>]
80
+ def run_file_level(query, limit)
81
+ all_ids = @metadata_store.all_identifiers
82
+ keywords = extract_keywords(query)
83
+
84
+ return all_ids.first(limit) if keywords.empty?
85
+
86
+ # Score each identifier by how many keywords it matches
87
+ scored = all_ids.map do |id|
88
+ id_lower = id.downcase
89
+ score = keywords.count { |kw| id_lower.include?(kw) }
90
+ [id, score]
91
+ end
92
+
93
+ scored.select { |_, score| score.positive? }
94
+ .sort_by { |_, score| -score }
95
+ .first(limit)
96
+ .map(&:first)
97
+ end
98
+
99
+ # Extract lowercase keywords from a query string.
100
+ #
101
+ # Filters out common stop words and short words.
102
+ #
103
+ # @param query [String] Query text
104
+ # @return [Array<String>] Keywords
105
+ def extract_keywords(query)
106
+ stop_words = %w[the a an is are was were how does do what which where when why
107
+ this that these those in on at to for of and or but with from by]
108
+
109
+ query.downcase
110
+ .scan(/[a-z0-9_]+/)
111
+ .reject { |w| stop_words.include?(w) || w.length < 2 }
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'metrics'
4
+
5
+ module CodebaseIndex
6
+ module Evaluation
7
+ # Runs evaluation queries through a Retriever and scores results
8
+ # against ground truth annotations.
9
+ #
10
+ # Takes a configured retriever and a query set, runs each query,
11
+ # and produces per-query and aggregate metrics.
12
+ #
13
+ # @example
14
+ # evaluator = Evaluator.new(retriever: retriever, query_set: query_set)
15
+ # report = evaluator.evaluate
16
+ # report.aggregates[:mean_mrr] # => 0.75
17
+ #
18
+ class Evaluator
19
+ # Result for a single evaluation query.
20
+ QueryResult = Struct.new(:query, :expected_units, :retrieved_units, :scores, :tokens_used,
21
+ keyword_init: true)
22
+
23
+ # Aggregate report across all queries.
24
+ EvaluationReport = Struct.new(:results, :aggregates, keyword_init: true)
25
+
26
+ # @param retriever [CodebaseIndex::Retriever] Configured retriever instance
27
+ # @param query_set [QuerySet] Set of evaluation queries with ground truth
28
+ # @param budget [Integer] Token budget per query
29
+ def initialize(retriever:, query_set:, budget: 8000)
30
+ @retriever = retriever
31
+ @query_set = query_set
32
+ @budget = budget
33
+ end
34
+
35
+ # Run all queries and produce an evaluation report.
36
+ #
37
+ # @return [EvaluationReport] Per-query results and aggregate metrics
38
+ def evaluate
39
+ results = @query_set.queries.map { |q| evaluate_query(q) }
40
+ aggregates = compute_aggregates(results)
41
+ EvaluationReport.new(results: results, aggregates: aggregates)
42
+ end
43
+
44
+ private
45
+
46
+ # Evaluate a single query against the retriever.
47
+ #
48
+ # @param query [QuerySet::Query] Evaluation query
49
+ # @return [QueryResult]
50
+ def evaluate_query(query)
51
+ retrieval_result = @retriever.retrieve(query.query, budget: @budget)
52
+ retrieved_ids = extract_identifiers(retrieval_result)
53
+
54
+ scores = compute_scores(retrieved_ids, query.expected_units, retrieval_result)
55
+
56
+ QueryResult.new(
57
+ query: query.query,
58
+ expected_units: query.expected_units,
59
+ retrieved_units: retrieved_ids,
60
+ scores: scores,
61
+ tokens_used: retrieval_result.tokens_used
62
+ )
63
+ end
64
+
65
+ # Extract unit identifiers from retrieval result sources.
66
+ #
67
+ # @param result [Retriever::RetrievalResult] Retrieval result
68
+ # @return [Array<String>] Ordered list of unit identifiers
69
+ def extract_identifiers(result)
70
+ return [] unless result.sources
71
+
72
+ result.sources.map { |s| s.is_a?(Hash) ? s[:identifier] || s['identifier'] : s.to_s }
73
+ end
74
+
75
+ # Compute all metrics for a query result.
76
+ #
77
+ # @param retrieved [Array<String>] Retrieved identifiers
78
+ # @param expected [Array<String>] Expected identifiers
79
+ # @param result [Retriever::RetrievalResult] Retrieval result
80
+ # @return [Hash] Metric scores
81
+ def compute_scores(retrieved, expected, result)
82
+ {
83
+ precision_at5: Metrics.precision_at_k(retrieved, expected, cutoff: 5),
84
+ precision_at10: Metrics.precision_at_k(retrieved, expected, cutoff: 10),
85
+ recall: Metrics.recall(retrieved, expected),
86
+ mrr: Metrics.mrr(retrieved, expected),
87
+ context_completeness: Metrics.context_completeness(retrieved, expected),
88
+ token_efficiency: compute_token_efficiency(retrieved, expected, result)
89
+ }
90
+ end
91
+
92
+ # Compute token efficiency from the retrieval result.
93
+ #
94
+ # @param retrieved [Array<String>] Retrieved identifiers
95
+ # @param expected [Array<String>] Expected identifiers
96
+ # @param result [Retriever::RetrievalResult] Retrieval result
97
+ # @return [Float]
98
+ def compute_token_efficiency(retrieved, expected, result)
99
+ return 0.0 if result.tokens_used.nil? || result.tokens_used.zero?
100
+
101
+ expected_set = expected.to_set
102
+ relevant_count = retrieved.count { |id| expected_set.include?(id) }
103
+ total_count = [retrieved.size, 1].max
104
+ relevant_ratio = relevant_count.to_f / total_count
105
+
106
+ Metrics.token_efficiency((result.tokens_used * relevant_ratio).ceil, result.tokens_used)
107
+ end
108
+
109
+ # Compute aggregate metrics across all query results.
110
+ #
111
+ # @param results [Array<QueryResult>] Individual query results
112
+ # @return [Hash] Aggregate metrics
113
+ def compute_aggregates(results)
114
+ return empty_aggregates if results.empty?
115
+
116
+ metric_keys = %i[precision_at5 precision_at10 recall mrr context_completeness token_efficiency]
117
+ aggregates = {}
118
+
119
+ metric_keys.each do |key|
120
+ values = results.map { |r| r.scores[key] }
121
+ aggregates[:"mean_#{key}"] = values.sum / values.size.to_f
122
+ end
123
+
124
+ aggregates[:total_queries] = results.size
125
+ aggregates[:mean_tokens_used] = results.sum(&:tokens_used) / results.size.to_f
126
+ aggregates
127
+ end
128
+
129
+ # Return zero-valued aggregates for empty result sets.
130
+ #
131
+ # @return [Hash]
132
+ def empty_aggregates
133
+ {
134
+ mean_precision_at5: 0.0,
135
+ mean_precision_at10: 0.0,
136
+ mean_recall: 0.0,
137
+ mean_mrr: 0.0,
138
+ mean_context_completeness: 0.0,
139
+ mean_token_efficiency: 0.0,
140
+ total_queries: 0,
141
+ mean_tokens_used: 0.0
142
+ }
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Evaluation
5
+ # Retrieval quality metrics.
6
+ #
7
+ # All methods are stateless pure functions that take arrays of identifiers
8
+ # and return numeric scores.
9
+ #
10
+ module Metrics
11
+ module_function
12
+
13
+ # Fraction of top-k results that are relevant.
14
+ #
15
+ # @param retrieved [Array<String>] Retrieved unit identifiers (ordered)
16
+ # @param relevant [Array<String>] Ground-truth relevant identifiers
17
+ # @param cutoff [Integer] Number of top results to consider
18
+ # @return [Float] 0.0 to 1.0
19
+ def precision_at_k(retrieved, relevant, cutoff: 5)
20
+ return 0.0 if retrieved.empty? || relevant.empty?
21
+
22
+ top_k = retrieved.first(cutoff)
23
+ relevant_set = relevant.to_set
24
+ hits = top_k.count { |id| relevant_set.include?(id) }
25
+ hits.to_f / cutoff
26
+ end
27
+
28
+ # Fraction of relevant items that were retrieved.
29
+ #
30
+ # @param retrieved [Array<String>] Retrieved identifiers
31
+ # @param relevant [Array<String>] Ground-truth relevant identifiers
32
+ # @return [Float] 0.0 to 1.0
33
+ def recall(retrieved, relevant)
34
+ return 0.0 if relevant.empty?
35
+
36
+ retrieved_set = retrieved.to_set
37
+ found = relevant.count { |id| retrieved_set.include?(id) }
38
+ found.to_f / relevant.size
39
+ end
40
+
41
+ # Mean Reciprocal Rank — inverse of the rank of the first relevant result.
42
+ #
43
+ # @param retrieved [Array<String>] Retrieved identifiers (ordered)
44
+ # @param relevant [Array<String>] Ground-truth relevant identifiers
45
+ # @return [Float] 0.0 to 1.0
46
+ def mrr(retrieved, relevant)
47
+ relevant_set = relevant.to_set
48
+ retrieved.each_with_index do |id, idx|
49
+ return 1.0 / (idx + 1) if relevant_set.include?(id)
50
+ end
51
+ 0.0
52
+ end
53
+
54
+ # Fraction of required units present in retrieved results.
55
+ #
56
+ # @param retrieved [Array<String>] Retrieved identifiers
57
+ # @param required [Array<String>] Required identifiers (subset of relevant)
58
+ # @return [Float] 0.0 to 1.0
59
+ def context_completeness(retrieved, required)
60
+ return 1.0 if required.empty?
61
+
62
+ retrieved_set = retrieved.to_set
63
+ found = required.count { |id| retrieved_set.include?(id) }
64
+ found.to_f / required.size
65
+ end
66
+
67
+ # Ratio of relevant tokens to total tokens in context.
68
+ #
69
+ # @param relevant_tokens [Integer] Tokens from relevant units
70
+ # @param total_tokens [Integer] Total tokens in assembled context
71
+ # @return [Float] 0.0 to 1.0
72
+ def token_efficiency(relevant_tokens, total_tokens)
73
+ return 0.0 if total_tokens.zero?
74
+
75
+ [relevant_tokens.to_f / total_tokens, 1.0].min
76
+ end
77
+ end
78
+ end
79
+ end