codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast/parser'
4
+ require_relative '../ast/call_site_extractor'
5
+
6
+ module CodebaseIndex
7
+ module RubyAnalyzer
8
+ # Annotates existing ExtractedUnit objects with data transformation metadata.
9
+ #
10
+ # Conservative v1: detects common data transformation patterns by scanning
11
+ # for specific method calls that indicate construction, serialization, or
12
+ # deserialization.
13
+ #
14
+ # @example
15
+ # analyzer = RubyAnalyzer::DataFlowAnalyzer.new
16
+ # analyzer.annotate(units)
17
+ # units.first.metadata[:data_transformations]
18
+ # #=> [{ method: "to_json", category: :serialization, line: 5 }]
19
+ #
20
+ class DataFlowAnalyzer
21
+ CONSTRUCTION_METHODS = %w[new].freeze
22
+ SERIALIZATION_METHODS = %w[to_h to_json to_a serialize as_json].freeze
23
+ DESERIALIZATION_METHODS = %w[from_json parse].freeze
24
+
25
+ # @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
26
+ def initialize(parser: nil)
27
+ @parser = parser || Ast::Parser.new
28
+ @call_site_extractor = Ast::CallSiteExtractor.new
29
+ end
30
+
31
+ # Annotate units with data transformation metadata.
32
+ #
33
+ # Mutates each unit's metadata hash by adding a :data_transformations key.
34
+ #
35
+ # @param units [Array<ExtractedUnit>] Units to annotate
36
+ # @return [Array<ExtractedUnit>] The same units, now annotated
37
+ def annotate(units)
38
+ units.each do |unit|
39
+ next unless unit.source_code
40
+
41
+ transformations = detect_transformations(unit.source_code)
42
+ unit.metadata[:data_transformations] = transformations
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def detect_transformations(source)
49
+ root = @parser.parse(source)
50
+ calls = @call_site_extractor.extract(root)
51
+
52
+ calls.filter_map do |call|
53
+ category = categorize(call[:method_name])
54
+ next unless category
55
+
56
+ {
57
+ method: call[:method_name],
58
+ category: category,
59
+ receiver: call[:receiver],
60
+ line: call[:line]
61
+ }
62
+ end
63
+ rescue CodebaseIndex::ExtractionError
64
+ []
65
+ end
66
+
67
+ def categorize(method_name)
68
+ if CONSTRUCTION_METHODS.include?(method_name)
69
+ :construction
70
+ elsif SERIALIZATION_METHODS.include?(method_name)
71
+ :serialization
72
+ elsif DESERIALIZATION_METHODS.include?(method_name)
73
+ :deserialization
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module RubyAnalyzer
5
+ # Shared helper for building fully qualified names from a name and namespace stack.
6
+ module FqnBuilder
7
+ private
8
+
9
+ def build_fqn(name, namespace_stack)
10
+ if namespace_stack.empty?
11
+ name
12
+ else
13
+ "#{namespace_stack.join('::')}::#{name}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,275 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module RubyAnalyzer
5
+ # Renders Mermaid-format diagrams from extracted units, dependency graphs,
6
+ # and graph analysis data.
7
+ #
8
+ # Produces valid Mermaid markdown strings for call graphs, dependency maps,
9
+ # dataflow charts, and combined architecture documents.
10
+ #
11
+ # @example Rendering a call graph
12
+ # renderer = MermaidRenderer.new
13
+ # units = RubyAnalyzer.analyze(paths: ["lib/"])
14
+ # puts renderer.render_call_graph(units)
15
+ #
16
+ class MermaidRenderer
17
+ # Render a call graph from extracted units showing method call relationships.
18
+ #
19
+ # Each unit with dependencies produces edges to its targets. Nodes are
20
+ # styled by type (class, module, method).
21
+ #
22
+ # @param units [Array<ExtractedUnit>] Units to render
23
+ # @return [String] Mermaid graph TD markdown
24
+ def render_call_graph(units)
25
+ lines = ['graph TD']
26
+ return lines.join("\n") if units.nil? || units.empty?
27
+
28
+ seen_nodes = {}
29
+ edges = []
30
+
31
+ units.each do |unit|
32
+ node_id = sanitize_id(unit.identifier)
33
+ unless seen_nodes[node_id]
34
+ seen_nodes[node_id] = true
35
+ lines << " #{node_id}[\"#{escape_label(unit.identifier)}\"]"
36
+ end
37
+
38
+ (unit.dependencies || []).each do |dep|
39
+ target = dep[:target] || dep['target']
40
+ next unless target
41
+
42
+ target_id = sanitize_id(target)
43
+ unless seen_nodes[target_id]
44
+ seen_nodes[target_id] = true
45
+ lines << " #{target_id}[\"#{escape_label(target)}\"]"
46
+ end
47
+
48
+ via = dep[:via] || dep['via']
49
+ edge_key = "#{node_id}->#{target_id}"
50
+ next if edges.include?(edge_key)
51
+
52
+ edges << edge_key
53
+ lines << if via
54
+ " #{node_id} -->|#{via}| #{target_id}"
55
+ else
56
+ " #{node_id} --> #{target_id}"
57
+ end
58
+ end
59
+ end
60
+
61
+ lines.join("\n")
62
+ end
63
+
64
+ # Render a dependency map from graph data (as returned by DependencyGraph#to_h).
65
+ #
66
+ # Shows nodes grouped by type with edges representing dependencies.
67
+ #
68
+ # @param graph_data [Hash] Serialized graph data with :nodes and :edges keys
69
+ # @return [String] Mermaid graph TD markdown
70
+ def render_dependency_map(graph_data)
71
+ lines = ['graph TD']
72
+ return lines.join("\n") unless graph_data
73
+
74
+ nodes = graph_data[:nodes] || graph_data['nodes'] || {}
75
+ edges = graph_data[:edges] || graph_data['edges'] || {}
76
+
77
+ return lines.join("\n") if nodes.empty?
78
+
79
+ # Group nodes by type for subgraph rendering
80
+ by_type = {}
81
+ nodes.each do |identifier, meta|
82
+ type = (meta[:type] || meta['type'])&.to_sym || :unknown
83
+ by_type[type] ||= []
84
+ by_type[type] << identifier
85
+ end
86
+
87
+ # Render subgraphs per type
88
+ by_type.each do |type, identifiers|
89
+ lines << " subgraph #{type}"
90
+ identifiers.each do |id|
91
+ node_id = sanitize_id(id)
92
+ lines << " #{node_id}[\"#{escape_label(id)}\"]"
93
+ end
94
+ lines << ' end'
95
+ end
96
+
97
+ # Render edges
98
+ seen_edges = []
99
+ edges.each do |source, targets|
100
+ targets = Array(targets)
101
+ targets.each do |target|
102
+ next unless nodes.key?(target)
103
+
104
+ edge_key = "#{sanitize_id(source)}->#{sanitize_id(target)}"
105
+ next if seen_edges.include?(edge_key)
106
+
107
+ seen_edges << edge_key
108
+ lines << " #{sanitize_id(source)} --> #{sanitize_id(target)}"
109
+ end
110
+ end
111
+
112
+ lines.join("\n")
113
+ end
114
+
115
+ # Render a dataflow diagram from units that have data_transformations metadata.
116
+ #
117
+ # Shows transformation chains: which units construct, serialize, or
118
+ # deserialize data, with edges flowing between them.
119
+ #
120
+ # @param units [Array<ExtractedUnit>] Units with :data_transformations metadata
121
+ # @return [String] Mermaid flowchart TD markdown
122
+ def render_dataflow(units)
123
+ lines = ['flowchart TD']
124
+ return lines.join("\n") if units.nil? || units.empty?
125
+
126
+ seen_nodes = {}
127
+
128
+ units.each do |unit|
129
+ transformations = unit.metadata[:data_transformations] || unit.metadata['data_transformations']
130
+ next unless transformations.is_a?(Array) && transformations.any?
131
+
132
+ node_id = sanitize_id(unit.identifier)
133
+ unless seen_nodes[node_id]
134
+ seen_nodes[node_id] = true
135
+ shape = dataflow_shape(transformations)
136
+ lines << " #{node_id}#{shape}"
137
+ end
138
+
139
+ transformations.each do |t|
140
+ receiver = t[:receiver] || t['receiver']
141
+ next unless receiver
142
+
143
+ receiver_id = sanitize_id(receiver)
144
+ category = (t[:category] || t['category'])&.to_s
145
+ method_name = t[:method] || t['method']
146
+
147
+ unless seen_nodes[receiver_id]
148
+ seen_nodes[receiver_id] = true
149
+ lines << " #{receiver_id}[\"#{escape_label(receiver)}\"]"
150
+ end
151
+
152
+ label = [category, method_name].compact.join(': ')
153
+ lines << " #{node_id} -->|#{label}| #{receiver_id}"
154
+ end
155
+ end
156
+
157
+ lines.join("\n")
158
+ end
159
+
160
+ # Render a combined architecture document with all three diagram types.
161
+ #
162
+ # Returns a markdown document with headers and fenced Mermaid code blocks
163
+ # for call graph, dependency map, and dataflow diagrams, plus a summary
164
+ # of graph analysis findings.
165
+ #
166
+ # @param units [Array<ExtractedUnit>] Extracted units
167
+ # @param graph_data [Hash] Serialized dependency graph data
168
+ # @param analysis [Hash] Graph analysis report from GraphAnalyzer#analyze
169
+ # @return [String] Combined markdown document
170
+ def render_architecture(units, graph_data, analysis)
171
+ sections = []
172
+
173
+ sections << '# Architecture Overview'
174
+ sections << ''
175
+
176
+ # Call graph
177
+ sections << '## Call Graph'
178
+ sections << ''
179
+ sections << '```mermaid'
180
+ sections << render_call_graph(units)
181
+ sections << '```'
182
+ sections << ''
183
+
184
+ # Dependency map
185
+ sections << '## Dependency Map'
186
+ sections << ''
187
+ sections << '```mermaid'
188
+ sections << render_dependency_map(graph_data)
189
+ sections << '```'
190
+ sections << ''
191
+
192
+ # Dataflow
193
+ sections << '## Data Flow'
194
+ sections << ''
195
+ sections << '```mermaid'
196
+ sections << render_dataflow(units)
197
+ sections << '```'
198
+ sections << ''
199
+
200
+ # Analysis summary
201
+ sections << '## Analysis Summary'
202
+ sections << ''
203
+ if analysis
204
+ stats = analysis[:stats] || analysis['stats'] || {}
205
+ sections << "- **Orphans:** #{stats[:orphan_count] || stats['orphan_count'] || 0}"
206
+ sections << "- **Dead ends:** #{stats[:dead_end_count] || stats['dead_end_count'] || 0}"
207
+ sections << "- **Hubs:** #{stats[:hub_count] || stats['hub_count'] || 0}"
208
+ sections << "- **Cycles:** #{stats[:cycle_count] || stats['cycle_count'] || 0}"
209
+
210
+ hubs = analysis[:hubs] || analysis['hubs'] || []
211
+ if hubs.any?
212
+ sections << ''
213
+ sections << '### Top Hubs'
214
+ sections << ''
215
+ hubs.first(5).each do |hub|
216
+ id = hub[:identifier] || hub['identifier']
217
+ count = hub[:dependent_count] || hub['dependent_count']
218
+ sections << "- #{id} (#{count} dependents)"
219
+ end
220
+ end
221
+
222
+ cycles = analysis[:cycles] || analysis['cycles'] || []
223
+ if cycles.any?
224
+ sections << ''
225
+ sections << '### Cycles'
226
+ sections << ''
227
+ cycles.each do |cycle|
228
+ sections << "- #{cycle.join(' -> ')}"
229
+ end
230
+ end
231
+ end
232
+
233
+ sections.join("\n")
234
+ end
235
+
236
+ private
237
+
238
+ # Sanitize an identifier for use as a Mermaid node ID.
239
+ #
240
+ # Replaces characters that Mermaid cannot use in node IDs with underscores.
241
+ #
242
+ # @param identifier [String] Raw identifier
243
+ # @return [String] Safe Mermaid node ID
244
+ def sanitize_id(identifier)
245
+ identifier.to_s.gsub(/[^a-zA-Z0-9_]/, '_')
246
+ end
247
+
248
+ # Escape a label string for use inside Mermaid quoted labels.
249
+ #
250
+ # @param label [String] Raw label text
251
+ # @return [String] Escaped label
252
+ def escape_label(label)
253
+ label.to_s.gsub('"', '&quot;')
254
+ end
255
+
256
+ # Determine Mermaid node shape based on dominant transformation category.
257
+ #
258
+ # @param transformations [Array<Hash>] Transformation metadata
259
+ # @return [String] Mermaid shape syntax
260
+ def dataflow_shape(transformations)
261
+ categories = transformations.map { |t| (t[:category] || t['category'])&.to_sym }
262
+
263
+ if categories.include?(:construction)
264
+ "([\"#{escape_label(transformations.first[:method] || 'new')}\"])"
265
+ elsif categories.include?(:serialization)
266
+ '[/"serialization"/]'
267
+ elsif categories.include?(:deserialization)
268
+ '[\"deserialization"\\]'
269
+ else
270
+ '["data"]'
271
+ end
272
+ end
273
+ end
274
+ end
275
+ end
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast/parser'
4
+ require_relative '../ast/method_extractor'
5
+ require_relative '../ast/call_site_extractor'
6
+ require_relative '../extracted_unit'
7
+ require_relative 'fqn_builder'
8
+
9
+ module CodebaseIndex
10
+ module RubyAnalyzer
11
+ # Extracts method-level units from Ruby source code.
12
+ #
13
+ # For each class/module, extracts methods as ExtractedUnit objects with type
14
+ # :ruby_method. Includes visibility, parameters, call graph, and dependencies.
15
+ #
16
+ # @example
17
+ # analyzer = RubyAnalyzer::MethodAnalyzer.new
18
+ # units = analyzer.analyze(source: File.read(path), file_path: path)
19
+ # units.first.identifier #=> "MyClass#my_method"
20
+ #
21
+ class MethodAnalyzer
22
+ include FqnBuilder
23
+
24
+ # @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
25
+ def initialize(parser: nil)
26
+ @parser = parser || Ast::Parser.new
27
+ @call_site_extractor = Ast::CallSiteExtractor.new
28
+ end
29
+
30
+ # Analyze source code and extract method units.
31
+ #
32
+ # @param source [String] Ruby source code
33
+ # @param file_path [String] Absolute path to the source file
34
+ # @return [Array<ExtractedUnit>] Extracted method units
35
+ def analyze(source:, file_path:)
36
+ root = @parser.parse(source)
37
+ units = []
38
+ extract_methods_from_tree(root, source, file_path, [], units)
39
+ units
40
+ end
41
+
42
+ private
43
+
44
+ def extract_methods_from_tree(node, source, file_path, namespace_stack, units)
45
+ return unless node.is_a?(Ast::Node)
46
+
47
+ case node.type
48
+ when :class
49
+ process_container_methods(node, :class, source, file_path, namespace_stack, units)
50
+ when :module
51
+ process_container_methods(node, :module, source, file_path, namespace_stack, units)
52
+ else
53
+ (node.children || []).each do |child|
54
+ extract_methods_from_tree(child, source, file_path, namespace_stack, units)
55
+ end
56
+ end
57
+ end
58
+
59
+ def process_container_methods(node, type, source, file_path, namespace_stack, units)
60
+ name = node.method_name
61
+ fqn = build_fqn(name, namespace_stack)
62
+ body_offset = type == :class ? 2 : 1
63
+ body_children = (node.children || [])[body_offset..] || []
64
+
65
+ visibility_tracker = VisibilityTracker.new
66
+ inner_ns = namespace_stack + [name]
67
+
68
+ body_children.each do |child|
69
+ next unless child.is_a?(Ast::Node)
70
+
71
+ case child.type
72
+ when :send
73
+ visibility_tracker.process_send(child)
74
+ when :def
75
+ units << build_method_unit(child, fqn, '#', visibility_tracker.current, file_path)
76
+ when :defs
77
+ units << build_method_unit(child, fqn, '.', :public, file_path)
78
+ when :class, :module
79
+ extract_methods_from_tree(child, source, file_path, inner_ns, units)
80
+ end
81
+ end
82
+ end
83
+
84
+ def build_method_unit(method_node, class_fqn, separator, visibility, file_path)
85
+ identifier = "#{class_fqn}#{separator}#{method_node.method_name}"
86
+ call_graph = extract_call_graph(method_node)
87
+ dependencies = build_dependencies(call_graph)
88
+ unit = ExtractedUnit.new(type: :ruby_method, identifier: identifier, file_path: file_path)
89
+ unit.namespace = class_fqn
90
+ unit.source_code = method_node.source
91
+ unit.metadata = {
92
+ visibility: visibility,
93
+ call_graph: call_graph
94
+ }
95
+ unit.dependencies = dependencies
96
+ unit
97
+ end
98
+
99
+ def extract_call_graph(method_node)
100
+ calls = @call_site_extractor.extract(method_node)
101
+ calls.filter_map do |call|
102
+ next unless call[:receiver]
103
+ # Only include calls with a capitalized receiver (likely a class/constant)
104
+ next unless call[:receiver].match?(/\A[A-Z]/)
105
+
106
+ {
107
+ target: call[:receiver],
108
+ method: call[:method_name],
109
+ line: call[:line]
110
+ }
111
+ end
112
+ end
113
+
114
+ def build_dependencies(call_graph)
115
+ call_graph.map { |c| c[:target] }.uniq.map do |target|
116
+ { type: :ruby_class, target: target, via: :method_call }
117
+ end
118
+ end
119
+
120
+ # Tracks visibility state as we walk through class body statements.
121
+ class VisibilityTracker
122
+ VISIBILITY_METHODS = %w[private protected public].freeze
123
+
124
+ attr_reader :current
125
+
126
+ def initialize
127
+ @current = :public
128
+ end
129
+
130
+ # Process a send node that might be a visibility modifier.
131
+ def process_send(send_node)
132
+ return unless send_node.method_name
133
+ return unless VISIBILITY_METHODS.include?(send_node.method_name)
134
+ # Only bare calls (no receiver, no arguments) act as section modifiers
135
+ return if send_node.receiver
136
+ return if send_node.arguments && !send_node.arguments.empty?
137
+
138
+ @current = send_node.method_name.to_sym
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../extracted_unit'
4
+
5
+ module CodebaseIndex
6
+ module RubyAnalyzer
7
+ # Enriches ExtractedUnit objects with runtime trace data.
8
+ #
9
+ # Two modes:
10
+ # - Recording: wraps a block with TracePoint to capture method calls
11
+ # - Merging: enriches existing units with previously collected trace data
12
+ #
13
+ # @example Recording
14
+ # trace_data = TraceEnricher.record { MyApp.run }
15
+ #
16
+ # @example Merging
17
+ # TraceEnricher.merge(units: units, trace_data: trace_data)
18
+ #
19
+ class TraceEnricher
20
+ # Record method calls during block execution using TracePoint.
21
+ #
22
+ # @yield Block to trace
23
+ # @return [Array<Hash>] Collected trace events
24
+ def self.record(&block)
25
+ traces = []
26
+
27
+ trace = TracePoint.new(:call, :return) do |tp|
28
+ traces << {
29
+ class_name: tp.defined_class&.name || tp.defined_class.to_s,
30
+ method_name: tp.method_id.to_s,
31
+ event: tp.event.to_s,
32
+ path: tp.path,
33
+ line: tp.lineno,
34
+ caller_class: extract_caller_class(tp),
35
+ caller_method: extract_caller_method(tp),
36
+ return_class: tp.event == :return ? safe_return_class(tp) : nil
37
+ }
38
+ end
39
+
40
+ trace.enable(&block)
41
+ traces
42
+ end
43
+
44
+ # Merge trace data into existing units.
45
+ #
46
+ # Mutates each matching unit's metadata by adding a :trace key with
47
+ # call count, callers, and return types.
48
+ #
49
+ # @param units [Array<ExtractedUnit>] Units to enrich
50
+ # @param trace_data [Array<Hash>] Trace events (from recording or JSON fixture)
51
+ # @return [Array<ExtractedUnit>] The same units, now enriched
52
+ def self.merge(units:, trace_data:)
53
+ return units if trace_data.nil? || trace_data.empty?
54
+
55
+ # Index traces by class_name + method_name
56
+ grouped = group_traces(trace_data)
57
+
58
+ units.each do |unit|
59
+ class_name, method_name = parse_identifier(unit.identifier)
60
+ next unless class_name && method_name
61
+
62
+ key = "#{class_name}##{method_name}"
63
+ next unless grouped.key?(key)
64
+
65
+ traces = grouped[key]
66
+
67
+ calls = traces.select { |t| t['event'] == 'call' || t[:event] == 'call' }
68
+ returns = traces.select { |t| t['event'] == 'return' || t[:event] == 'return' }
69
+
70
+ callers = calls.filter_map do |t|
71
+ caller_class = t['caller_class'] || t[:caller_class]
72
+ caller_method = t['caller_method'] || t[:caller_method]
73
+ next unless caller_class
74
+
75
+ { 'caller_class' => caller_class, 'caller_method' => caller_method }
76
+ end
77
+
78
+ return_types = returns.filter_map do |t|
79
+ t['return_class'] || t[:return_class]
80
+ end.uniq
81
+
82
+ unit.metadata[:trace] = {
83
+ call_count: calls.size,
84
+ callers: callers,
85
+ return_types: return_types
86
+ }
87
+ end
88
+ end
89
+
90
+ class << self
91
+ private
92
+
93
+ def group_traces(trace_data)
94
+ grouped = Hash.new { |h, k| h[k] = [] }
95
+ trace_data.each do |trace|
96
+ class_name = trace['class_name'] || trace[:class_name]
97
+ method_name = trace['method_name'] || trace[:method_name]
98
+ next unless class_name && method_name
99
+
100
+ key = "#{class_name}##{method_name}"
101
+ grouped[key] << trace
102
+ end
103
+ grouped
104
+ end
105
+
106
+ def parse_identifier(identifier)
107
+ # Handle both "Class#method" and "Class.method" formats
108
+ if identifier.include?('#')
109
+ identifier.split('#', 2)
110
+ elsif identifier.include?('.')
111
+ identifier.split('.', 2)
112
+ end
113
+ end
114
+
115
+ def extract_caller_class(tp)
116
+ binding_obj = tp.binding
117
+ receiver = binding_obj.receiver
118
+ receiver.is_a?(Class) || receiver.is_a?(Module) ? receiver.name : receiver.class.name
119
+ rescue StandardError
120
+ nil
121
+ end
122
+
123
+ def extract_caller_method(_tp)
124
+ # TracePoint doesn't directly expose caller method,
125
+ # but we can get it from the call stack
126
+ caller_locations(3, 1)&.first&.label
127
+ rescue StandardError
128
+ nil
129
+ end
130
+
131
+ def safe_return_class(tp)
132
+ tp.return_value.class.name
133
+ rescue StandardError
134
+ nil
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end