codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,261 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Extractors
5
+ # ViewTemplateExtractor handles ERB view template extraction.
6
+ #
7
+ # Scans `app/views/` for `.html.erb` and `.erb` files and produces
8
+ # one ExtractedUnit per template. Extracts render calls (partials),
9
+ # instance variables, and helper method usage. Links partials via
10
+ # dependencies and infers the owning controller from directory structure.
11
+ #
12
+ # This is an ERB-only MVP — HAML, Slim, and layout inheritance
13
+ # are not yet supported.
14
+ #
15
+ # @example
16
+ # extractor = ViewTemplateExtractor.new
17
+ # units = extractor.extract_all
18
+ # index = units.find { |u| u.identifier == "users/index.html.erb" }
19
+ #
20
+ class ViewTemplateExtractor
21
+ # Directories to scan for view templates
22
+ VIEW_DIRECTORIES = %w[
23
+ app/views
24
+ ].freeze
25
+
26
+ # Common Rails view helper methods to detect
27
+ COMMON_HELPERS = %w[
28
+ link_to
29
+ button_to
30
+ form_for
31
+ form_with
32
+ form_tag
33
+ image_tag
34
+ stylesheet_link_tag
35
+ javascript_include_tag
36
+ content_for
37
+ yield
38
+ render
39
+ redirect_to
40
+ truncate
41
+ pluralize
42
+ number_to_currency
43
+ number_to_percentage
44
+ number_with_delimiter
45
+ time_ago_in_words
46
+ distance_of_time_in_words
47
+ simple_format
48
+ sanitize
49
+ raw
50
+ safe_join
51
+ content_tag
52
+ tag
53
+ mail_to
54
+ url_for
55
+ asset_path
56
+ asset_url
57
+ ].freeze
58
+
59
+ def initialize
60
+ @directories = VIEW_DIRECTORIES.map { |d| Rails.root.join(d) }
61
+ .select(&:directory?)
62
+ end
63
+
64
+ # Extract all ERB view templates
65
+ #
66
+ # @return [Array<ExtractedUnit>] List of view template units
67
+ def extract_all
68
+ @directories.flat_map do |dir|
69
+ erb_files = Dir[dir.join('**/*.html.erb')] + Dir[dir.join('**/*.erb')]
70
+ erb_files.uniq.filter_map do |file|
71
+ extract_view_template_file(file)
72
+ end
73
+ end
74
+ end
75
+
76
+ # Extract a single view template file
77
+ #
78
+ # @param file_path [String] Path to the ERB template file
79
+ # @return [ExtractedUnit, nil] The extracted unit or nil if not ERB
80
+ def extract_view_template_file(file_path)
81
+ return nil unless file_path.end_with?('.erb')
82
+
83
+ source = File.read(file_path)
84
+ identifier = build_identifier(file_path)
85
+ namespace = extract_view_namespace(file_path)
86
+
87
+ unit = ExtractedUnit.new(
88
+ type: :view_template,
89
+ identifier: identifier,
90
+ file_path: file_path
91
+ )
92
+
93
+ unit.namespace = namespace
94
+ unit.source_code = source
95
+ unit.metadata = build_metadata(source, file_path)
96
+ unit.dependencies = build_dependencies(source, file_path, identifier)
97
+
98
+ unit
99
+ rescue StandardError => e
100
+ Rails.logger.error("Failed to extract view template #{file_path}: #{e.message}")
101
+ nil
102
+ end
103
+
104
+ private
105
+
106
+ # Build a readable identifier from the file path.
107
+ #
108
+ # @param file_path [String] Absolute path to the template
109
+ # @return [String] Relative identifier like "users/index.html.erb"
110
+ def build_identifier(file_path)
111
+ relative = file_path.sub("#{Rails.root}/", '')
112
+ relative.sub(%r{^app/views/}, '')
113
+ end
114
+
115
+ # Extract namespace from directory structure.
116
+ #
117
+ # @param file_path [String] Absolute path
118
+ # @return [String, nil] Namespace like "users" or "admin/users"
119
+ def extract_view_namespace(file_path)
120
+ identifier = build_identifier(file_path)
121
+ dir = File.dirname(identifier)
122
+ dir == '.' ? nil : dir
123
+ end
124
+
125
+ # Build metadata hash for the template.
126
+ #
127
+ # @param source [String] Template source code
128
+ # @param file_path [String] Path to the template
129
+ # @return [Hash]
130
+ def build_metadata(source, file_path)
131
+ {
132
+ template_engine: 'erb',
133
+ is_partial: partial?(file_path),
134
+ partials_rendered: extract_rendered_partials(source),
135
+ instance_variables: extract_instance_variables(source),
136
+ helpers_called: extract_helpers(source),
137
+ loc: source.lines.count { |l| l.strip.length.positive? }
138
+ }
139
+ end
140
+
141
+ # Check if a template is a partial (filename starts with _).
142
+ #
143
+ # @param file_path [String] Path to the template
144
+ # @return [Boolean]
145
+ def partial?(file_path)
146
+ File.basename(file_path).start_with?('_')
147
+ end
148
+
149
+ # Extract partial names from render calls.
150
+ #
151
+ # Matches:
152
+ # - render partial: 'foo/bar'
153
+ # - render 'foo/bar'
154
+ # - render :foo
155
+ #
156
+ # @param source [String] Template source code
157
+ # @return [Array<String>] Partial names
158
+ def extract_rendered_partials(source)
159
+ partials = Set.new
160
+
161
+ # render partial: 'path/to/partial'
162
+ source.scan(/render\s+partial:\s*['"]([^'"]+)['"]/).each do |match|
163
+ partials << match[0]
164
+ end
165
+
166
+ # render 'path/to/partial' (string without keyword)
167
+ source.scan(/render\s+['"]([^'"]+)['"]/).each do |match|
168
+ partials << match[0]
169
+ end
170
+
171
+ # render :symbol
172
+ source.scan(/render\s+:(\w+)/).each do |match|
173
+ partials << match[0]
174
+ end
175
+
176
+ partials.to_a
177
+ end
178
+
179
+ # Extract instance variables used in the template.
180
+ #
181
+ # @param source [String] Template source code
182
+ # @return [Array<String>] Instance variable names
183
+ def extract_instance_variables(source)
184
+ source.scan(/@[a-zA-Z_]\w*/).uniq.sort
185
+ end
186
+
187
+ # Extract common Rails helper calls from the template.
188
+ #
189
+ # @param source [String] Template source code
190
+ # @return [Array<String>] Helper method names
191
+ def extract_helpers(source)
192
+ found = Set.new
193
+ COMMON_HELPERS.each do |helper|
194
+ found << helper if source.match?(/\b#{Regexp.escape(helper)}\b/)
195
+ end
196
+ found.to_a.sort
197
+ end
198
+
199
+ # Build dependencies for the template.
200
+ #
201
+ # @param source [String] Template source code
202
+ # @param file_path [String] Path to the template
203
+ # @param identifier [String] Template identifier
204
+ # @return [Array<Hash>]
205
+ def build_dependencies(source, file_path, identifier)
206
+ deps = []
207
+
208
+ # Rendered partials
209
+ extract_rendered_partials(source).each do |partial_name|
210
+ partial_identifier = resolve_partial_identifier(partial_name, identifier)
211
+ deps << { type: :view_template, target: partial_identifier, via: :render }
212
+ end
213
+
214
+ # Inferred controller
215
+ controller = infer_controller(file_path)
216
+ deps << { type: :controller, target: controller, via: :view_render } if controller
217
+
218
+ deps
219
+ end
220
+
221
+ # Resolve a partial name to its file identifier.
222
+ #
223
+ # Given a render call like `render 'comments/comment'`, resolves to
224
+ # `comments/_comment.html.erb`.
225
+ #
226
+ # @param partial_name [String] The partial name from the render call
227
+ # @param current_identifier [String] The current template's identifier
228
+ # @return [String] Resolved partial identifier
229
+ def resolve_partial_identifier(partial_name, current_identifier)
230
+ if partial_name.include?('/')
231
+ dir = File.dirname(partial_name)
232
+ base = File.basename(partial_name)
233
+ "#{dir}/_#{base}.html.erb"
234
+ else
235
+ dir = File.dirname(current_identifier)
236
+ if dir == '.'
237
+ "_#{partial_name}.html.erb"
238
+ else
239
+ "#{dir}/_#{partial_name}.html.erb"
240
+ end
241
+ end
242
+ end
243
+
244
+ # Infer the controller class from the template's directory path.
245
+ #
246
+ # @param file_path [String] Path to the template
247
+ # @return [String, nil] Controller class name
248
+ def infer_controller(file_path)
249
+ namespace = extract_view_namespace(file_path)
250
+ return nil unless namespace
251
+
252
+ # Skip layout-only directories
253
+ return nil if namespace == 'layouts'
254
+
255
+ parts = namespace.split('/')
256
+ controller_name = parts.map { |p| p.split('_').map(&:capitalize).join }.join('::')
257
+ "#{controller_name}Controller"
258
+ end
259
+ end
260
+ end
261
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Feedback
5
+ # Detects patterns in retrieval feedback that suggest coverage gaps.
6
+ #
7
+ # Analyzes ratings and gap reports to find:
8
+ # - Repeated low-score queries with common keywords
9
+ # - Frequently reported missing units
10
+ #
11
+ # @example
12
+ # detector = GapDetector.new(feedback_store: store)
13
+ # issues = detector.detect
14
+ # issues.each { |i| puts "#{i[:type]}: #{i[:description]}" }
15
+ #
16
+ class GapDetector
17
+ LOW_SCORE_THRESHOLD = 2
18
+ MIN_PATTERN_COUNT = 2
19
+ MIN_GAP_COUNT = 2
20
+
21
+ # @param feedback_store [Feedback::Store]
22
+ def initialize(feedback_store:)
23
+ @feedback_store = feedback_store
24
+ end
25
+
26
+ # Detect coverage gaps from accumulated feedback.
27
+ #
28
+ # @return [Array<Hash>] List of detected issues with :type, :description, and details
29
+ def detect
30
+ issues = []
31
+ issues.concat(detect_low_score_patterns)
32
+ issues.concat(detect_frequently_missing)
33
+ issues
34
+ end
35
+
36
+ private
37
+
38
+ # Find keyword patterns in low-scoring queries.
39
+ #
40
+ # @return [Array<Hash>]
41
+ def detect_low_score_patterns
42
+ low_ratings = @feedback_store.ratings.select { |r| r['score'] <= LOW_SCORE_THRESHOLD }
43
+ return [] if low_ratings.size < MIN_PATTERN_COUNT
44
+
45
+ keyword_counts = count_keywords(low_ratings)
46
+ keyword_counts.select { |_, count| count >= MIN_PATTERN_COUNT }.map do |keyword, count|
47
+ {
48
+ type: :repeated_low_scores,
49
+ pattern: keyword,
50
+ count: count,
51
+ description: "#{count} low-score queries mention '#{keyword}'"
52
+ }
53
+ end
54
+ end
55
+
56
+ # Count keyword occurrences across low-scoring query texts.
57
+ #
58
+ # @param ratings [Array<Hash>] Low-score rating entries
59
+ # @return [Hash<String, Integer>] Keyword => occurrence count
60
+ def count_keywords(ratings)
61
+ counts = Hash.new(0)
62
+ ratings.each do |rating|
63
+ words = rating['query'].to_s.downcase.split(/\W+/).reject { |w| w.length < 3 }
64
+ words.each { |w| counts[w] += 1 }
65
+ end
66
+ counts
67
+ end
68
+
69
+ # Find units that are frequently reported as missing.
70
+ #
71
+ # @return [Array<Hash>]
72
+ def detect_frequently_missing
73
+ unit_counts = Hash.new(0)
74
+ @feedback_store.gaps.each do |gap|
75
+ unit_counts[gap['missing_unit']] += 1
76
+ end
77
+
78
+ unit_counts.select { |_, count| count >= MIN_GAP_COUNT }.map do |unit, count|
79
+ {
80
+ type: :frequently_missing,
81
+ unit: unit,
82
+ count: count,
83
+ description: "#{unit} reported missing #{count} times"
84
+ }
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'fileutils'
5
+
6
+ module CodebaseIndex
7
+ module Feedback
8
+ # Append-only JSONL file for retrieval feedback: ratings and gap reports.
9
+ #
10
+ # Each line is a JSON object with a `type` field ("rating" or "gap")
11
+ # plus type-specific fields.
12
+ #
13
+ # @example
14
+ # store = Store.new(path: '/tmp/feedback.jsonl')
15
+ # store.record_rating(query: "How does User work?", score: 4)
16
+ # store.record_gap(query: "payments", missing_unit: "PaymentService", unit_type: "service")
17
+ # store.average_score # => 4.0
18
+ #
19
+ class Store
20
+ # @param path [String] Path to the JSONL file
21
+ def initialize(path:)
22
+ @path = path
23
+ end
24
+
25
+ # Record a retrieval quality rating.
26
+ #
27
+ # @param query [String] The original query
28
+ # @param score [Integer] Rating 1-5
29
+ # @param comment [String, nil] Optional comment
30
+ # @return [void]
31
+ def record_rating(query:, score:, comment: nil)
32
+ unless score.is_a?(Integer) && (1..5).cover?(score)
33
+ raise ArgumentError, "score must be an Integer between 1 and 5, got: #{score.inspect}"
34
+ end
35
+
36
+ entry = {
37
+ type: 'rating',
38
+ query: query,
39
+ score: score,
40
+ comment: comment,
41
+ timestamp: Time.now.iso8601
42
+ }
43
+ append(entry)
44
+ end
45
+
46
+ # Record a missing unit gap report.
47
+ #
48
+ # @param query [String] The query that had poor results
49
+ # @param missing_unit [String] Identifier of the expected but missing unit
50
+ # @param unit_type [String] Expected type of the missing unit
51
+ # @return [void]
52
+ def record_gap(query:, missing_unit:, unit_type:)
53
+ entry = {
54
+ type: 'gap',
55
+ query: query,
56
+ missing_unit: missing_unit,
57
+ unit_type: unit_type,
58
+ timestamp: Time.now.iso8601
59
+ }
60
+ append(entry)
61
+ end
62
+
63
+ # Read all feedback entries.
64
+ #
65
+ # @param limit [Integer, nil] Maximum number of entries to return. Returns all if nil.
66
+ # @return [Array<Hash>]
67
+ def all_entries(limit: nil)
68
+ return [] unless File.exist?(@path)
69
+
70
+ entries = []
71
+ File.foreach(@path) do |line|
72
+ entry = JSON.parse(line.strip)
73
+ entries << entry
74
+ break if limit && entries.size >= limit
75
+ rescue JSON::ParserError
76
+ next
77
+ end
78
+ entries
79
+ end
80
+
81
+ # Filter to rating entries only.
82
+ #
83
+ # @return [Array<Hash>]
84
+ def ratings
85
+ all_entries.select { |e| e['type'] == 'rating' }
86
+ end
87
+
88
+ # Filter to gap report entries only.
89
+ #
90
+ # @return [Array<Hash>]
91
+ def gaps
92
+ all_entries.select { |e| e['type'] == 'gap' }
93
+ end
94
+
95
+ # Average score across all ratings.
96
+ #
97
+ # @return [Float, nil] Average score, or nil if no ratings
98
+ def average_score
99
+ scores = ratings.map { |r| r['score'] }
100
+ return nil if scores.empty?
101
+
102
+ scores.sum.to_f / scores.size
103
+ end
104
+
105
+ private
106
+
107
+ # Append a JSON entry as a new line.
108
+ #
109
+ # @param entry [Hash]
110
+ # @return [void]
111
+ def append(entry)
112
+ FileUtils.mkdir_p(File.dirname(@path))
113
+ File.open(@path, 'a') do |f|
114
+ f.puts(JSON.generate(entry))
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast/parser'
4
+ require_relative '../ast/node'
5
+ require_relative '../ast/call_site_extractor'
6
+ require_relative 'response_code_mapper'
7
+
8
+ module CodebaseIndex
9
+ module FlowAnalysis
10
+ # Extracts operations from a method body AST in source line order.
11
+ #
12
+ # Uses Ast::CallSiteExtractor for raw call sites, then classifies each
13
+ # into domain-meaningful operation types: calls, transactions, async
14
+ # enqueues, responses, conditionals, cycles, and dynamic dispatch.
15
+ #
16
+ # @example Extracting operations from a method body
17
+ # parser = Ast::Parser.new
18
+ # root = parser.parse(source)
19
+ # method_node = root.find_all(:def).find { |n| n.method_name == "create" }
20
+ # ops = OperationExtractor.new.extract(method_node)
21
+ # ops.first[:type] #=> :call
22
+ #
23
+ class OperationExtractor
24
+ TRANSACTION_METHODS = %w[transaction with_lock].freeze
25
+ ASYNC_METHODS = %w[perform_async perform_later perform_in perform_at].freeze
26
+ RESPONSE_METHODS = %w[redirect_to head respond_with].freeze
27
+ DYNAMIC_DISPATCH_METHODS = %w[send public_send].freeze
28
+
29
+ # Extract operations from a method definition node in source line order.
30
+ #
31
+ # @param method_node [Ast::Node] A :def or :defs node
32
+ # @return [Array<Hash>] Operations ordered by source line
33
+ def extract(method_node)
34
+ return [] unless method_node.is_a?(Ast::Node)
35
+
36
+ operations = []
37
+ walk(method_node, operations)
38
+ operations
39
+ end
40
+
41
+ private
42
+
43
+ # Recursively walk the AST and extract operations.
44
+ def walk(node, operations)
45
+ return unless node.is_a?(Ast::Node)
46
+
47
+ case node.type
48
+ when :block
49
+ handle_block(node, operations)
50
+ when :send
51
+ handle_send(node, operations)
52
+ when :if
53
+ handle_conditional(node, operations)
54
+ when :case
55
+ handle_case(node, operations)
56
+ else
57
+ walk_children(node, operations)
58
+ end
59
+ end
60
+
61
+ # Walk all children of a node.
62
+ def walk_children(node, operations)
63
+ return unless node.children
64
+
65
+ node.children.each { |child| walk(child, operations) }
66
+ end
67
+
68
+ # Handle :block nodes - check for transaction/with_lock wrappers.
69
+ def handle_block(node, operations)
70
+ send_child = node.children&.first
71
+ if send_child.is_a?(Ast::Node) && send_child.type == :send && transaction_call?(send_child)
72
+ nested = []
73
+ # Walk block body (children after the send node)
74
+ node.children&.drop(1)&.each { |child| walk(child, nested) }
75
+
76
+ operations << {
77
+ type: :transaction,
78
+ receiver: send_child.receiver,
79
+ line: send_child.line,
80
+ nested: nested
81
+ }
82
+ else
83
+ # Non-transaction block: emit the send as a normal call, walk body
84
+ handle_send(send_child, operations) if send_child.is_a?(Ast::Node) && send_child.type == :send
85
+ node.children&.drop(1)&.each { |child| walk(child, operations) }
86
+ end
87
+ end
88
+
89
+ # Handle :send nodes - classify into operation types.
90
+ def handle_send(node, operations)
91
+ return unless node.is_a?(Ast::Node) && node.type == :send
92
+
93
+ if async_call?(node)
94
+ operations << {
95
+ type: :async,
96
+ target: node.receiver,
97
+ method: node.method_name,
98
+ args_hint: node.arguments || [],
99
+ line: node.line
100
+ }
101
+ elsif dynamic_dispatch?(node)
102
+ operations << {
103
+ type: :dynamic_dispatch,
104
+ target: node.receiver,
105
+ method: node.method_name,
106
+ args_hint: node.arguments || [],
107
+ line: node.line
108
+ }
109
+ elsif response_call?(node)
110
+ operations << {
111
+ type: :response,
112
+ status_code: ResponseCodeMapper.resolve_method(node.method_name, arguments: node.arguments || []),
113
+ render_method: node.method_name,
114
+ line: node.line
115
+ }
116
+ elsif significant_call?(node)
117
+ operations << {
118
+ type: :call,
119
+ target: node.receiver,
120
+ method: node.method_name,
121
+ line: node.line
122
+ }
123
+ end
124
+
125
+ # Do NOT recurse into send node children — the walker handles
126
+ # children at the statement level. Recursing here would double-count
127
+ # chained calls and pick up receiver lvars as spurious method calls.
128
+ end
129
+
130
+ # Handle :if nodes - extract conditional with then/else branches.
131
+ def handle_conditional(node, operations)
132
+ then_ops = []
133
+ else_ops = []
134
+
135
+ # children[0] = condition, children[1] = then, children[2] = else
136
+ children = node.children || []
137
+ walk(children[1], then_ops) if children[1].is_a?(Ast::Node)
138
+ walk(children[2], else_ops) if children[2].is_a?(Ast::Node)
139
+
140
+ return if then_ops.empty? && else_ops.empty?
141
+
142
+ condition_text = if children[0].is_a?(Ast::Node)
143
+ children[0].to_source
144
+ elsif children[0].is_a?(String)
145
+ children[0]
146
+ end
147
+
148
+ operations << {
149
+ type: :conditional,
150
+ kind: 'if',
151
+ condition: condition_text || node.source,
152
+ line: node.line,
153
+ then_ops: then_ops,
154
+ else_ops: else_ops
155
+ }
156
+ end
157
+
158
+ # Handle :case nodes as a conditional variant.
159
+ def handle_case(node, operations)
160
+ # Treat case as a conditional - extract ops from all branches
161
+ branch_ops = []
162
+ walk_children(node, branch_ops)
163
+
164
+ return if branch_ops.empty?
165
+
166
+ operations << {
167
+ type: :conditional,
168
+ kind: 'case',
169
+ condition: node.source,
170
+ line: node.line,
171
+ then_ops: branch_ops,
172
+ else_ops: []
173
+ }
174
+ end
175
+
176
+ # Detect transaction/with_lock calls.
177
+ def transaction_call?(node)
178
+ TRANSACTION_METHODS.include?(node.method_name)
179
+ end
180
+
181
+ # Detect async enqueue calls.
182
+ def async_call?(node)
183
+ ASYNC_METHODS.include?(node.method_name)
184
+ end
185
+
186
+ # Detect response calls (render, redirect_to, head, render_*).
187
+ def response_call?(node)
188
+ return true if RESPONSE_METHODS.include?(node.method_name)
189
+ return true if node.method_name&.start_with?('render')
190
+
191
+ false
192
+ end
193
+
194
+ # Detect dynamic dispatch (send, public_send).
195
+ def dynamic_dispatch?(node)
196
+ DYNAMIC_DISPATCH_METHODS.include?(node.method_name)
197
+ end
198
+
199
+ # Determine if a call is significant enough to include.
200
+ def significant_call?(node)
201
+ return false if node.method_name.nil?
202
+ return false if Ast::INSIGNIFICANT_METHODS.include?(node.method_name)
203
+ return false if transaction_call?(node) # Handled by block wrapper
204
+
205
+ true
206
+ end
207
+ end
208
+ end
209
+ end