codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ast/parser'
|
|
4
|
+
require_relative '../ast/call_site_extractor'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module RubyAnalyzer
|
|
8
|
+
# Annotates existing ExtractedUnit objects with data transformation metadata.
|
|
9
|
+
#
|
|
10
|
+
# Conservative v1: detects common data transformation patterns by scanning
|
|
11
|
+
# for specific method calls that indicate construction, serialization, or
|
|
12
|
+
# deserialization.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# analyzer = RubyAnalyzer::DataFlowAnalyzer.new
|
|
16
|
+
# analyzer.annotate(units)
|
|
17
|
+
# units.first.metadata[:data_transformations]
|
|
18
|
+
# #=> [{ method: "to_json", category: :serialization, line: 5 }]
|
|
19
|
+
#
|
|
20
|
+
class DataFlowAnalyzer
|
|
21
|
+
CONSTRUCTION_METHODS = %w[new].freeze
|
|
22
|
+
SERIALIZATION_METHODS = %w[to_h to_json to_a serialize as_json].freeze
|
|
23
|
+
DESERIALIZATION_METHODS = %w[from_json parse].freeze
|
|
24
|
+
|
|
25
|
+
# @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
|
|
26
|
+
def initialize(parser: nil)
|
|
27
|
+
@parser = parser || Ast::Parser.new
|
|
28
|
+
@call_site_extractor = Ast::CallSiteExtractor.new
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Annotate units with data transformation metadata.
|
|
32
|
+
#
|
|
33
|
+
# Mutates each unit's metadata hash by adding a :data_transformations key.
|
|
34
|
+
#
|
|
35
|
+
# @param units [Array<ExtractedUnit>] Units to annotate
|
|
36
|
+
# @return [Array<ExtractedUnit>] The same units, now annotated
|
|
37
|
+
def annotate(units)
|
|
38
|
+
units.each do |unit|
|
|
39
|
+
next unless unit.source_code
|
|
40
|
+
|
|
41
|
+
transformations = detect_transformations(unit.source_code)
|
|
42
|
+
unit.metadata[:data_transformations] = transformations
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def detect_transformations(source)
|
|
49
|
+
root = @parser.parse(source)
|
|
50
|
+
calls = @call_site_extractor.extract(root)
|
|
51
|
+
|
|
52
|
+
calls.filter_map do |call|
|
|
53
|
+
category = categorize(call[:method_name])
|
|
54
|
+
next unless category
|
|
55
|
+
|
|
56
|
+
{
|
|
57
|
+
method: call[:method_name],
|
|
58
|
+
category: category,
|
|
59
|
+
receiver: call[:receiver],
|
|
60
|
+
line: call[:line]
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
rescue CodebaseIndex::ExtractionError
|
|
64
|
+
[]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def categorize(method_name)
|
|
68
|
+
if CONSTRUCTION_METHODS.include?(method_name)
|
|
69
|
+
:construction
|
|
70
|
+
elsif SERIALIZATION_METHODS.include?(method_name)
|
|
71
|
+
:serialization
|
|
72
|
+
elsif DESERIALIZATION_METHODS.include?(method_name)
|
|
73
|
+
:deserialization
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module RubyAnalyzer
|
|
5
|
+
# Shared helper for building fully qualified names from a name and namespace stack.
|
|
6
|
+
module FqnBuilder
|
|
7
|
+
private
|
|
8
|
+
|
|
9
|
+
def build_fqn(name, namespace_stack)
|
|
10
|
+
if namespace_stack.empty?
|
|
11
|
+
name
|
|
12
|
+
else
|
|
13
|
+
"#{namespace_stack.join('::')}::#{name}"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module RubyAnalyzer
|
|
5
|
+
# Renders Mermaid-format diagrams from extracted units, dependency graphs,
|
|
6
|
+
# and graph analysis data.
|
|
7
|
+
#
|
|
8
|
+
# Produces valid Mermaid markdown strings for call graphs, dependency maps,
|
|
9
|
+
# dataflow charts, and combined architecture documents.
|
|
10
|
+
#
|
|
11
|
+
# @example Rendering a call graph
|
|
12
|
+
# renderer = MermaidRenderer.new
|
|
13
|
+
# units = RubyAnalyzer.analyze(paths: ["lib/"])
|
|
14
|
+
# puts renderer.render_call_graph(units)
|
|
15
|
+
#
|
|
16
|
+
class MermaidRenderer
|
|
17
|
+
# Render a call graph from extracted units showing method call relationships.
|
|
18
|
+
#
|
|
19
|
+
# Each unit with dependencies produces edges to its targets. Nodes are
|
|
20
|
+
# styled by type (class, module, method).
|
|
21
|
+
#
|
|
22
|
+
# @param units [Array<ExtractedUnit>] Units to render
|
|
23
|
+
# @return [String] Mermaid graph TD markdown
|
|
24
|
+
def render_call_graph(units)
|
|
25
|
+
lines = ['graph TD']
|
|
26
|
+
return lines.join("\n") if units.nil? || units.empty?
|
|
27
|
+
|
|
28
|
+
seen_nodes = {}
|
|
29
|
+
edges = []
|
|
30
|
+
|
|
31
|
+
units.each do |unit|
|
|
32
|
+
node_id = sanitize_id(unit.identifier)
|
|
33
|
+
unless seen_nodes[node_id]
|
|
34
|
+
seen_nodes[node_id] = true
|
|
35
|
+
lines << " #{node_id}[\"#{escape_label(unit.identifier)}\"]"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
(unit.dependencies || []).each do |dep|
|
|
39
|
+
target = dep[:target] || dep['target']
|
|
40
|
+
next unless target
|
|
41
|
+
|
|
42
|
+
target_id = sanitize_id(target)
|
|
43
|
+
unless seen_nodes[target_id]
|
|
44
|
+
seen_nodes[target_id] = true
|
|
45
|
+
lines << " #{target_id}[\"#{escape_label(target)}\"]"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
via = dep[:via] || dep['via']
|
|
49
|
+
edge_key = "#{node_id}->#{target_id}"
|
|
50
|
+
next if edges.include?(edge_key)
|
|
51
|
+
|
|
52
|
+
edges << edge_key
|
|
53
|
+
lines << if via
|
|
54
|
+
" #{node_id} -->|#{via}| #{target_id}"
|
|
55
|
+
else
|
|
56
|
+
" #{node_id} --> #{target_id}"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
lines.join("\n")
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Render a dependency map from graph data (as returned by DependencyGraph#to_h).
|
|
65
|
+
#
|
|
66
|
+
# Shows nodes grouped by type with edges representing dependencies.
|
|
67
|
+
#
|
|
68
|
+
# @param graph_data [Hash] Serialized graph data with :nodes and :edges keys
|
|
69
|
+
# @return [String] Mermaid graph TD markdown
|
|
70
|
+
def render_dependency_map(graph_data)
|
|
71
|
+
lines = ['graph TD']
|
|
72
|
+
return lines.join("\n") unless graph_data
|
|
73
|
+
|
|
74
|
+
nodes = graph_data[:nodes] || graph_data['nodes'] || {}
|
|
75
|
+
edges = graph_data[:edges] || graph_data['edges'] || {}
|
|
76
|
+
|
|
77
|
+
return lines.join("\n") if nodes.empty?
|
|
78
|
+
|
|
79
|
+
# Group nodes by type for subgraph rendering
|
|
80
|
+
by_type = {}
|
|
81
|
+
nodes.each do |identifier, meta|
|
|
82
|
+
type = (meta[:type] || meta['type'])&.to_sym || :unknown
|
|
83
|
+
by_type[type] ||= []
|
|
84
|
+
by_type[type] << identifier
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Render subgraphs per type
|
|
88
|
+
by_type.each do |type, identifiers|
|
|
89
|
+
lines << " subgraph #{type}"
|
|
90
|
+
identifiers.each do |id|
|
|
91
|
+
node_id = sanitize_id(id)
|
|
92
|
+
lines << " #{node_id}[\"#{escape_label(id)}\"]"
|
|
93
|
+
end
|
|
94
|
+
lines << ' end'
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Render edges
|
|
98
|
+
seen_edges = []
|
|
99
|
+
edges.each do |source, targets|
|
|
100
|
+
targets = Array(targets)
|
|
101
|
+
targets.each do |target|
|
|
102
|
+
next unless nodes.key?(target)
|
|
103
|
+
|
|
104
|
+
edge_key = "#{sanitize_id(source)}->#{sanitize_id(target)}"
|
|
105
|
+
next if seen_edges.include?(edge_key)
|
|
106
|
+
|
|
107
|
+
seen_edges << edge_key
|
|
108
|
+
lines << " #{sanitize_id(source)} --> #{sanitize_id(target)}"
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
lines.join("\n")
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Render a dataflow diagram from units that have data_transformations metadata.
|
|
116
|
+
#
|
|
117
|
+
# Shows transformation chains: which units construct, serialize, or
|
|
118
|
+
# deserialize data, with edges flowing between them.
|
|
119
|
+
#
|
|
120
|
+
# @param units [Array<ExtractedUnit>] Units with :data_transformations metadata
|
|
121
|
+
# @return [String] Mermaid flowchart TD markdown
|
|
122
|
+
def render_dataflow(units)
|
|
123
|
+
lines = ['flowchart TD']
|
|
124
|
+
return lines.join("\n") if units.nil? || units.empty?
|
|
125
|
+
|
|
126
|
+
seen_nodes = {}
|
|
127
|
+
|
|
128
|
+
units.each do |unit|
|
|
129
|
+
transformations = unit.metadata[:data_transformations] || unit.metadata['data_transformations']
|
|
130
|
+
next unless transformations.is_a?(Array) && transformations.any?
|
|
131
|
+
|
|
132
|
+
node_id = sanitize_id(unit.identifier)
|
|
133
|
+
unless seen_nodes[node_id]
|
|
134
|
+
seen_nodes[node_id] = true
|
|
135
|
+
shape = dataflow_shape(transformations)
|
|
136
|
+
lines << " #{node_id}#{shape}"
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
transformations.each do |t|
|
|
140
|
+
receiver = t[:receiver] || t['receiver']
|
|
141
|
+
next unless receiver
|
|
142
|
+
|
|
143
|
+
receiver_id = sanitize_id(receiver)
|
|
144
|
+
category = (t[:category] || t['category'])&.to_s
|
|
145
|
+
method_name = t[:method] || t['method']
|
|
146
|
+
|
|
147
|
+
unless seen_nodes[receiver_id]
|
|
148
|
+
seen_nodes[receiver_id] = true
|
|
149
|
+
lines << " #{receiver_id}[\"#{escape_label(receiver)}\"]"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
label = [category, method_name].compact.join(': ')
|
|
153
|
+
lines << " #{node_id} -->|#{label}| #{receiver_id}"
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
lines.join("\n")
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Render a combined architecture document with all three diagram types.
|
|
161
|
+
#
|
|
162
|
+
# Returns a markdown document with headers and fenced Mermaid code blocks
|
|
163
|
+
# for call graph, dependency map, and dataflow diagrams, plus a summary
|
|
164
|
+
# of graph analysis findings.
|
|
165
|
+
#
|
|
166
|
+
# @param units [Array<ExtractedUnit>] Extracted units
|
|
167
|
+
# @param graph_data [Hash] Serialized dependency graph data
|
|
168
|
+
# @param analysis [Hash] Graph analysis report from GraphAnalyzer#analyze
|
|
169
|
+
# @return [String] Combined markdown document
|
|
170
|
+
def render_architecture(units, graph_data, analysis)
|
|
171
|
+
sections = []
|
|
172
|
+
|
|
173
|
+
sections << '# Architecture Overview'
|
|
174
|
+
sections << ''
|
|
175
|
+
|
|
176
|
+
# Call graph
|
|
177
|
+
sections << '## Call Graph'
|
|
178
|
+
sections << ''
|
|
179
|
+
sections << '```mermaid'
|
|
180
|
+
sections << render_call_graph(units)
|
|
181
|
+
sections << '```'
|
|
182
|
+
sections << ''
|
|
183
|
+
|
|
184
|
+
# Dependency map
|
|
185
|
+
sections << '## Dependency Map'
|
|
186
|
+
sections << ''
|
|
187
|
+
sections << '```mermaid'
|
|
188
|
+
sections << render_dependency_map(graph_data)
|
|
189
|
+
sections << '```'
|
|
190
|
+
sections << ''
|
|
191
|
+
|
|
192
|
+
# Dataflow
|
|
193
|
+
sections << '## Data Flow'
|
|
194
|
+
sections << ''
|
|
195
|
+
sections << '```mermaid'
|
|
196
|
+
sections << render_dataflow(units)
|
|
197
|
+
sections << '```'
|
|
198
|
+
sections << ''
|
|
199
|
+
|
|
200
|
+
# Analysis summary
|
|
201
|
+
sections << '## Analysis Summary'
|
|
202
|
+
sections << ''
|
|
203
|
+
if analysis
|
|
204
|
+
stats = analysis[:stats] || analysis['stats'] || {}
|
|
205
|
+
sections << "- **Orphans:** #{stats[:orphan_count] || stats['orphan_count'] || 0}"
|
|
206
|
+
sections << "- **Dead ends:** #{stats[:dead_end_count] || stats['dead_end_count'] || 0}"
|
|
207
|
+
sections << "- **Hubs:** #{stats[:hub_count] || stats['hub_count'] || 0}"
|
|
208
|
+
sections << "- **Cycles:** #{stats[:cycle_count] || stats['cycle_count'] || 0}"
|
|
209
|
+
|
|
210
|
+
hubs = analysis[:hubs] || analysis['hubs'] || []
|
|
211
|
+
if hubs.any?
|
|
212
|
+
sections << ''
|
|
213
|
+
sections << '### Top Hubs'
|
|
214
|
+
sections << ''
|
|
215
|
+
hubs.first(5).each do |hub|
|
|
216
|
+
id = hub[:identifier] || hub['identifier']
|
|
217
|
+
count = hub[:dependent_count] || hub['dependent_count']
|
|
218
|
+
sections << "- #{id} (#{count} dependents)"
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
cycles = analysis[:cycles] || analysis['cycles'] || []
|
|
223
|
+
if cycles.any?
|
|
224
|
+
sections << ''
|
|
225
|
+
sections << '### Cycles'
|
|
226
|
+
sections << ''
|
|
227
|
+
cycles.each do |cycle|
|
|
228
|
+
sections << "- #{cycle.join(' -> ')}"
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
sections.join("\n")
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
private
|
|
237
|
+
|
|
238
|
+
# Sanitize an identifier for use as a Mermaid node ID.
|
|
239
|
+
#
|
|
240
|
+
# Replaces characters that Mermaid cannot use in node IDs with underscores.
|
|
241
|
+
#
|
|
242
|
+
# @param identifier [String] Raw identifier
|
|
243
|
+
# @return [String] Safe Mermaid node ID
|
|
244
|
+
def sanitize_id(identifier)
|
|
245
|
+
identifier.to_s.gsub(/[^a-zA-Z0-9_]/, '_')
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Escape a label string for use inside Mermaid quoted labels.
|
|
249
|
+
#
|
|
250
|
+
# @param label [String] Raw label text
|
|
251
|
+
# @return [String] Escaped label
|
|
252
|
+
def escape_label(label)
|
|
253
|
+
label.to_s.gsub('"', '"')
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Determine Mermaid node shape based on dominant transformation category.
|
|
257
|
+
#
|
|
258
|
+
# @param transformations [Array<Hash>] Transformation metadata
|
|
259
|
+
# @return [String] Mermaid shape syntax
|
|
260
|
+
def dataflow_shape(transformations)
|
|
261
|
+
categories = transformations.map { |t| (t[:category] || t['category'])&.to_sym }
|
|
262
|
+
|
|
263
|
+
if categories.include?(:construction)
|
|
264
|
+
"([\"#{escape_label(transformations.first[:method] || 'new')}\"])"
|
|
265
|
+
elsif categories.include?(:serialization)
|
|
266
|
+
'[/"serialization"/]'
|
|
267
|
+
elsif categories.include?(:deserialization)
|
|
268
|
+
'[\"deserialization"\\]'
|
|
269
|
+
else
|
|
270
|
+
'["data"]'
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
end
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ast/parser'
|
|
4
|
+
require_relative '../ast/method_extractor'
|
|
5
|
+
require_relative '../ast/call_site_extractor'
|
|
6
|
+
require_relative '../extracted_unit'
|
|
7
|
+
require_relative 'fqn_builder'
|
|
8
|
+
|
|
9
|
+
module CodebaseIndex
|
|
10
|
+
module RubyAnalyzer
|
|
11
|
+
# Extracts method-level units from Ruby source code.
|
|
12
|
+
#
|
|
13
|
+
# For each class/module, extracts methods as ExtractedUnit objects with type
|
|
14
|
+
# :ruby_method. Includes visibility, parameters, call graph, and dependencies.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# analyzer = RubyAnalyzer::MethodAnalyzer.new
|
|
18
|
+
# units = analyzer.analyze(source: File.read(path), file_path: path)
|
|
19
|
+
# units.first.identifier #=> "MyClass#my_method"
|
|
20
|
+
#
|
|
21
|
+
class MethodAnalyzer
|
|
22
|
+
include FqnBuilder
|
|
23
|
+
|
|
24
|
+
# @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
|
|
25
|
+
def initialize(parser: nil)
|
|
26
|
+
@parser = parser || Ast::Parser.new
|
|
27
|
+
@call_site_extractor = Ast::CallSiteExtractor.new
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Analyze source code and extract method units.
|
|
31
|
+
#
|
|
32
|
+
# @param source [String] Ruby source code
|
|
33
|
+
# @param file_path [String] Absolute path to the source file
|
|
34
|
+
# @return [Array<ExtractedUnit>] Extracted method units
|
|
35
|
+
def analyze(source:, file_path:)
|
|
36
|
+
root = @parser.parse(source)
|
|
37
|
+
units = []
|
|
38
|
+
extract_methods_from_tree(root, source, file_path, [], units)
|
|
39
|
+
units
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def extract_methods_from_tree(node, source, file_path, namespace_stack, units)
|
|
45
|
+
return unless node.is_a?(Ast::Node)
|
|
46
|
+
|
|
47
|
+
case node.type
|
|
48
|
+
when :class
|
|
49
|
+
process_container_methods(node, :class, source, file_path, namespace_stack, units)
|
|
50
|
+
when :module
|
|
51
|
+
process_container_methods(node, :module, source, file_path, namespace_stack, units)
|
|
52
|
+
else
|
|
53
|
+
(node.children || []).each do |child|
|
|
54
|
+
extract_methods_from_tree(child, source, file_path, namespace_stack, units)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def process_container_methods(node, type, source, file_path, namespace_stack, units)
|
|
60
|
+
name = node.method_name
|
|
61
|
+
fqn = build_fqn(name, namespace_stack)
|
|
62
|
+
body_offset = type == :class ? 2 : 1
|
|
63
|
+
body_children = (node.children || [])[body_offset..] || []
|
|
64
|
+
|
|
65
|
+
visibility_tracker = VisibilityTracker.new
|
|
66
|
+
inner_ns = namespace_stack + [name]
|
|
67
|
+
|
|
68
|
+
body_children.each do |child|
|
|
69
|
+
next unless child.is_a?(Ast::Node)
|
|
70
|
+
|
|
71
|
+
case child.type
|
|
72
|
+
when :send
|
|
73
|
+
visibility_tracker.process_send(child)
|
|
74
|
+
when :def
|
|
75
|
+
units << build_method_unit(child, fqn, '#', visibility_tracker.current, file_path)
|
|
76
|
+
when :defs
|
|
77
|
+
units << build_method_unit(child, fqn, '.', :public, file_path)
|
|
78
|
+
when :class, :module
|
|
79
|
+
extract_methods_from_tree(child, source, file_path, inner_ns, units)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def build_method_unit(method_node, class_fqn, separator, visibility, file_path)
|
|
85
|
+
identifier = "#{class_fqn}#{separator}#{method_node.method_name}"
|
|
86
|
+
call_graph = extract_call_graph(method_node)
|
|
87
|
+
dependencies = build_dependencies(call_graph)
|
|
88
|
+
unit = ExtractedUnit.new(type: :ruby_method, identifier: identifier, file_path: file_path)
|
|
89
|
+
unit.namespace = class_fqn
|
|
90
|
+
unit.source_code = method_node.source
|
|
91
|
+
unit.metadata = {
|
|
92
|
+
visibility: visibility,
|
|
93
|
+
call_graph: call_graph
|
|
94
|
+
}
|
|
95
|
+
unit.dependencies = dependencies
|
|
96
|
+
unit
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def extract_call_graph(method_node)
|
|
100
|
+
calls = @call_site_extractor.extract(method_node)
|
|
101
|
+
calls.filter_map do |call|
|
|
102
|
+
next unless call[:receiver]
|
|
103
|
+
# Only include calls with a capitalized receiver (likely a class/constant)
|
|
104
|
+
next unless call[:receiver].match?(/\A[A-Z]/)
|
|
105
|
+
|
|
106
|
+
{
|
|
107
|
+
target: call[:receiver],
|
|
108
|
+
method: call[:method_name],
|
|
109
|
+
line: call[:line]
|
|
110
|
+
}
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def build_dependencies(call_graph)
|
|
115
|
+
call_graph.map { |c| c[:target] }.uniq.map do |target|
|
|
116
|
+
{ type: :ruby_class, target: target, via: :method_call }
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Tracks visibility state as we walk through class body statements.
|
|
121
|
+
class VisibilityTracker
|
|
122
|
+
VISIBILITY_METHODS = %w[private protected public].freeze
|
|
123
|
+
|
|
124
|
+
attr_reader :current
|
|
125
|
+
|
|
126
|
+
def initialize
|
|
127
|
+
@current = :public
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Process a send node that might be a visibility modifier.
|
|
131
|
+
def process_send(send_node)
|
|
132
|
+
return unless send_node.method_name
|
|
133
|
+
return unless VISIBILITY_METHODS.include?(send_node.method_name)
|
|
134
|
+
# Only bare calls (no receiver, no arguments) act as section modifiers
|
|
135
|
+
return if send_node.receiver
|
|
136
|
+
return if send_node.arguments && !send_node.arguments.empty?
|
|
137
|
+
|
|
138
|
+
@current = send_node.method_name.to_sym
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../extracted_unit'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module RubyAnalyzer
|
|
7
|
+
# Enriches ExtractedUnit objects with runtime trace data.
|
|
8
|
+
#
|
|
9
|
+
# Two modes:
|
|
10
|
+
# - Recording: wraps a block with TracePoint to capture method calls
|
|
11
|
+
# - Merging: enriches existing units with previously collected trace data
|
|
12
|
+
#
|
|
13
|
+
# @example Recording
|
|
14
|
+
# trace_data = TraceEnricher.record { MyApp.run }
|
|
15
|
+
#
|
|
16
|
+
# @example Merging
|
|
17
|
+
# TraceEnricher.merge(units: units, trace_data: trace_data)
|
|
18
|
+
#
|
|
19
|
+
class TraceEnricher
|
|
20
|
+
# Record method calls during block execution using TracePoint.
|
|
21
|
+
#
|
|
22
|
+
# @yield Block to trace
|
|
23
|
+
# @return [Array<Hash>] Collected trace events
|
|
24
|
+
def self.record(&block)
|
|
25
|
+
traces = []
|
|
26
|
+
|
|
27
|
+
trace = TracePoint.new(:call, :return) do |tp|
|
|
28
|
+
traces << {
|
|
29
|
+
class_name: tp.defined_class&.name || tp.defined_class.to_s,
|
|
30
|
+
method_name: tp.method_id.to_s,
|
|
31
|
+
event: tp.event.to_s,
|
|
32
|
+
path: tp.path,
|
|
33
|
+
line: tp.lineno,
|
|
34
|
+
caller_class: extract_caller_class(tp),
|
|
35
|
+
caller_method: extract_caller_method(tp),
|
|
36
|
+
return_class: tp.event == :return ? safe_return_class(tp) : nil
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
trace.enable(&block)
|
|
41
|
+
traces
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Merge trace data into existing units.
|
|
45
|
+
#
|
|
46
|
+
# Mutates each matching unit's metadata by adding a :trace key with
|
|
47
|
+
# call count, callers, and return types.
|
|
48
|
+
#
|
|
49
|
+
# @param units [Array<ExtractedUnit>] Units to enrich
|
|
50
|
+
# @param trace_data [Array<Hash>] Trace events (from recording or JSON fixture)
|
|
51
|
+
# @return [Array<ExtractedUnit>] The same units, now enriched
|
|
52
|
+
def self.merge(units:, trace_data:)
|
|
53
|
+
return units if trace_data.nil? || trace_data.empty?
|
|
54
|
+
|
|
55
|
+
# Index traces by class_name + method_name
|
|
56
|
+
grouped = group_traces(trace_data)
|
|
57
|
+
|
|
58
|
+
units.each do |unit|
|
|
59
|
+
class_name, method_name = parse_identifier(unit.identifier)
|
|
60
|
+
next unless class_name && method_name
|
|
61
|
+
|
|
62
|
+
key = "#{class_name}##{method_name}"
|
|
63
|
+
next unless grouped.key?(key)
|
|
64
|
+
|
|
65
|
+
traces = grouped[key]
|
|
66
|
+
|
|
67
|
+
calls = traces.select { |t| t['event'] == 'call' || t[:event] == 'call' }
|
|
68
|
+
returns = traces.select { |t| t['event'] == 'return' || t[:event] == 'return' }
|
|
69
|
+
|
|
70
|
+
callers = calls.filter_map do |t|
|
|
71
|
+
caller_class = t['caller_class'] || t[:caller_class]
|
|
72
|
+
caller_method = t['caller_method'] || t[:caller_method]
|
|
73
|
+
next unless caller_class
|
|
74
|
+
|
|
75
|
+
{ 'caller_class' => caller_class, 'caller_method' => caller_method }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
return_types = returns.filter_map do |t|
|
|
79
|
+
t['return_class'] || t[:return_class]
|
|
80
|
+
end.uniq
|
|
81
|
+
|
|
82
|
+
unit.metadata[:trace] = {
|
|
83
|
+
call_count: calls.size,
|
|
84
|
+
callers: callers,
|
|
85
|
+
return_types: return_types
|
|
86
|
+
}
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
class << self
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
def group_traces(trace_data)
|
|
94
|
+
grouped = Hash.new { |h, k| h[k] = [] }
|
|
95
|
+
trace_data.each do |trace|
|
|
96
|
+
class_name = trace['class_name'] || trace[:class_name]
|
|
97
|
+
method_name = trace['method_name'] || trace[:method_name]
|
|
98
|
+
next unless class_name && method_name
|
|
99
|
+
|
|
100
|
+
key = "#{class_name}##{method_name}"
|
|
101
|
+
grouped[key] << trace
|
|
102
|
+
end
|
|
103
|
+
grouped
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def parse_identifier(identifier)
|
|
107
|
+
# Handle both "Class#method" and "Class.method" formats
|
|
108
|
+
if identifier.include?('#')
|
|
109
|
+
identifier.split('#', 2)
|
|
110
|
+
elsif identifier.include?('.')
|
|
111
|
+
identifier.split('.', 2)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def extract_caller_class(tp)
|
|
116
|
+
binding_obj = tp.binding
|
|
117
|
+
receiver = binding_obj.receiver
|
|
118
|
+
receiver.is_a?(Class) || receiver.is_a?(Module) ? receiver.name : receiver.class.name
|
|
119
|
+
rescue StandardError
|
|
120
|
+
nil
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def extract_caller_method(_tp)
|
|
124
|
+
# TracePoint doesn't directly expose caller method,
|
|
125
|
+
# but we can get it from the call stack
|
|
126
|
+
caller_locations(3, 1)&.first&.label
|
|
127
|
+
rescue StandardError
|
|
128
|
+
nil
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def safe_return_class(tp)
|
|
132
|
+
tp.return_value.class.name
|
|
133
|
+
rescue StandardError
|
|
134
|
+
nil
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|