codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module FlowAnalysis
|
|
5
|
+
# Maps render/redirect AST nodes to HTTP status codes.
|
|
6
|
+
#
|
|
7
|
+
# Uses a built-in STATUS_CODES hash rather than depending on Rack at runtime.
|
|
8
|
+
# Handles explicit status kwargs, render_<status> conventions, head calls,
|
|
9
|
+
# and redirect_to defaults.
|
|
10
|
+
#
|
|
11
|
+
# @example Resolving a render call
|
|
12
|
+
# ResponseCodeMapper.resolve_method("render_created", arguments: []) #=> 201
|
|
13
|
+
# ResponseCodeMapper.resolve_method("redirect_to", arguments: ["/home"]) #=> 302
|
|
14
|
+
# ResponseCodeMapper.resolve_method("head", arguments: [":no_content"]) #=> 204
|
|
15
|
+
#
|
|
16
|
+
class ResponseCodeMapper
|
|
17
|
+
# Subset of Rack::Utils::SYMBOL_TO_STATUS_CODE, inlined to avoid runtime Rack dependency.
|
|
18
|
+
STATUS_CODES = {
|
|
19
|
+
continue: 100,
|
|
20
|
+
switching_protocols: 101,
|
|
21
|
+
processing: 102,
|
|
22
|
+
early_hints: 103,
|
|
23
|
+
ok: 200,
|
|
24
|
+
created: 201,
|
|
25
|
+
accepted: 202,
|
|
26
|
+
non_authoritative_information: 203,
|
|
27
|
+
no_content: 204,
|
|
28
|
+
reset_content: 205,
|
|
29
|
+
partial_content: 206,
|
|
30
|
+
multi_status: 207,
|
|
31
|
+
already_reported: 208,
|
|
32
|
+
im_used: 226,
|
|
33
|
+
multiple_choices: 300,
|
|
34
|
+
moved_permanently: 301,
|
|
35
|
+
found: 302,
|
|
36
|
+
see_other: 303,
|
|
37
|
+
not_modified: 304,
|
|
38
|
+
use_proxy: 305,
|
|
39
|
+
temporary_redirect: 307,
|
|
40
|
+
permanent_redirect: 308,
|
|
41
|
+
bad_request: 400,
|
|
42
|
+
unauthorized: 401,
|
|
43
|
+
payment_required: 402,
|
|
44
|
+
forbidden: 403,
|
|
45
|
+
not_found: 404,
|
|
46
|
+
method_not_allowed: 405,
|
|
47
|
+
not_acceptable: 406,
|
|
48
|
+
proxy_authentication_required: 407,
|
|
49
|
+
request_timeout: 408,
|
|
50
|
+
conflict: 409,
|
|
51
|
+
gone: 410,
|
|
52
|
+
length_required: 411,
|
|
53
|
+
precondition_failed: 412,
|
|
54
|
+
payload_too_large: 413,
|
|
55
|
+
uri_too_long: 414,
|
|
56
|
+
unsupported_media_type: 415,
|
|
57
|
+
range_not_satisfiable: 416,
|
|
58
|
+
expectation_failed: 417,
|
|
59
|
+
misdirected_request: 421,
|
|
60
|
+
unprocessable_entity: 422,
|
|
61
|
+
locked: 423,
|
|
62
|
+
failed_dependency: 424,
|
|
63
|
+
too_early: 425,
|
|
64
|
+
upgrade_required: 426,
|
|
65
|
+
precondition_required: 428,
|
|
66
|
+
too_many_requests: 429,
|
|
67
|
+
request_header_fields_too_large: 431,
|
|
68
|
+
unavailable_for_legal_reasons: 451,
|
|
69
|
+
internal_server_error: 500,
|
|
70
|
+
not_implemented: 501,
|
|
71
|
+
bad_gateway: 502,
|
|
72
|
+
service_unavailable: 503,
|
|
73
|
+
gateway_timeout: 504,
|
|
74
|
+
http_version_not_supported: 505,
|
|
75
|
+
variant_also_negotiates: 506,
|
|
76
|
+
insufficient_storage: 507,
|
|
77
|
+
loop_detected: 508,
|
|
78
|
+
not_extended: 510,
|
|
79
|
+
network_authentication_required: 511
|
|
80
|
+
}.freeze
|
|
81
|
+
|
|
82
|
+
# Resolve a render/redirect/head method call to an HTTP status code.
|
|
83
|
+
#
|
|
84
|
+
# Strategies tried in order:
|
|
85
|
+
# 1. Explicit status kwarg: `render json: x, status: :created` -> 201
|
|
86
|
+
# 2. render_<status> convention: `render_created` -> 201
|
|
87
|
+
# 3. head with status arg: `head :no_content` -> 204
|
|
88
|
+
# 4. redirect_to default: 302
|
|
89
|
+
#
|
|
90
|
+
# @param method_name [String] The method name (render, redirect_to, head, render_created, etc.)
|
|
91
|
+
# @param arguments [Array<String>] Argument representations from AST
|
|
92
|
+
# @return [Integer, nil] HTTP status code or nil if unresolvable
|
|
93
|
+
def self.resolve_method(method_name, arguments: [])
|
|
94
|
+
# Case 1: Look for explicit status kwarg in arguments
|
|
95
|
+
status_from_kwarg = extract_status_from_args(arguments)
|
|
96
|
+
return resolve_status(status_from_kwarg) if status_from_kwarg
|
|
97
|
+
|
|
98
|
+
# Case 2: render_<status> convention
|
|
99
|
+
if method_name.start_with?('render_')
|
|
100
|
+
status_name = method_name.delete_prefix('render_')
|
|
101
|
+
code = STATUS_CODES[status_name.to_sym]
|
|
102
|
+
return code if code
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Case 3: head :status
|
|
106
|
+
return resolve_status(arguments.first) if method_name == 'head' && arguments.first
|
|
107
|
+
|
|
108
|
+
# Case 4: redirect_to defaults to 302
|
|
109
|
+
return 302 if method_name == 'redirect_to'
|
|
110
|
+
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Resolve a status value (symbol name, integer, or string) to an integer code.
|
|
115
|
+
#
|
|
116
|
+
# @param value [String, Integer, Symbol] Status representation
|
|
117
|
+
# @return [Integer, nil] HTTP status code or nil
|
|
118
|
+
def self.resolve_status(value)
|
|
119
|
+
case value
|
|
120
|
+
when Integer
|
|
121
|
+
value
|
|
122
|
+
when Symbol
|
|
123
|
+
STATUS_CODES[value]
|
|
124
|
+
when String
|
|
125
|
+
# Strip leading colon from AST symbol representation (":created" -> "created")
|
|
126
|
+
cleaned = value.delete_prefix(':')
|
|
127
|
+
# Try as symbol name first
|
|
128
|
+
code = STATUS_CODES[cleaned.to_sym]
|
|
129
|
+
return code if code
|
|
130
|
+
|
|
131
|
+
# Try as integer string
|
|
132
|
+
return cleaned.to_i if cleaned.match?(/\A\d+\z/)
|
|
133
|
+
|
|
134
|
+
nil
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Extract a status value from argument strings.
|
|
139
|
+
#
|
|
140
|
+
# Looks for patterns like "status: :created" or "status: 201" in argument list.
|
|
141
|
+
#
|
|
142
|
+
# @param arguments [Array<String>] Argument representations
|
|
143
|
+
# @return [String, nil] The status value if found
|
|
144
|
+
def self.extract_status_from_args(arguments)
|
|
145
|
+
arguments.each do |arg|
|
|
146
|
+
if arg.is_a?(String) && (match = arg.match(/status:\s*(.+)/))
|
|
147
|
+
return match[1].strip
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
nil
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'json'
|
|
5
|
+
require 'set'
|
|
6
|
+
require_relative 'ast/parser'
|
|
7
|
+
require_relative 'ast/method_extractor'
|
|
8
|
+
require_relative 'flow_analysis/operation_extractor'
|
|
9
|
+
require_relative 'flow_document'
|
|
10
|
+
|
|
11
|
+
module CodebaseIndex
|
|
12
|
+
# Orchestrates execution flow tracing from an entry point through the dependency graph.
|
|
13
|
+
#
|
|
14
|
+
# Given an entry point (e.g., "PostsController#create"), FlowAssembler:
|
|
15
|
+
# 1. Loads the ExtractedUnit JSON from disk
|
|
16
|
+
# 2. Parses its source_code with the AST layer
|
|
17
|
+
# 3. Extracts operations in source line order
|
|
18
|
+
# 4. Recursively expands targets that resolve to other units
|
|
19
|
+
# 5. Detects cycles and respects max_depth
|
|
20
|
+
# 6. Assembles a FlowDocument
|
|
21
|
+
#
|
|
22
|
+
# @example Assembling a flow
|
|
23
|
+
# assembler = FlowAssembler.new(graph: graph, extracted_dir: "/tmp/codebase_index")
|
|
24
|
+
# flow = assembler.assemble("PostsController#create", max_depth: 5)
|
|
25
|
+
# puts flow.to_markdown
|
|
26
|
+
#
|
|
27
|
+
class FlowAssembler
|
|
28
|
+
# @param graph [DependencyGraph] The dependency graph for resolving targets
|
|
29
|
+
# @param extracted_dir [String] Directory containing extracted unit JSON files
|
|
30
|
+
def initialize(graph:, extracted_dir:)
|
|
31
|
+
@graph = graph
|
|
32
|
+
@extracted_dir = extracted_dir
|
|
33
|
+
@parser = Ast::Parser.new
|
|
34
|
+
@method_extractor = Ast::MethodExtractor.new(parser: @parser)
|
|
35
|
+
@operation_extractor = FlowAnalysis::OperationExtractor.new
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Assemble an execution flow from the given entry point.
|
|
39
|
+
#
|
|
40
|
+
# @param entry_point [String] Unit identifier, optionally with #method_name
|
|
41
|
+
# @param max_depth [Integer] Maximum recursion depth
|
|
42
|
+
# @return [FlowDocument] The assembled flow document
|
|
43
|
+
def assemble(entry_point, max_depth: 5)
|
|
44
|
+
visited = Set.new
|
|
45
|
+
steps = []
|
|
46
|
+
|
|
47
|
+
expand(entry_point, steps, visited, depth: 0, max_depth: max_depth)
|
|
48
|
+
|
|
49
|
+
route = extract_route(entry_point)
|
|
50
|
+
|
|
51
|
+
FlowDocument.new(
|
|
52
|
+
entry_point: entry_point,
|
|
53
|
+
route: route,
|
|
54
|
+
max_depth: max_depth,
|
|
55
|
+
steps: steps
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
# Recursively expand a unit into flow steps.
|
|
62
|
+
#
|
|
63
|
+
# @param identifier [String] Unit identifier (may include #method)
|
|
64
|
+
# @param steps [Array<Hash>] Accumulator for step hashes
|
|
65
|
+
# @param visited [Set<String>] Visited unit identifiers for cycle detection
|
|
66
|
+
# @param depth [Integer] Current recursion depth
|
|
67
|
+
# @param max_depth [Integer] Maximum recursion depth
|
|
68
|
+
def expand(identifier, steps, visited, depth:, max_depth:)
|
|
69
|
+
return if depth > max_depth
|
|
70
|
+
|
|
71
|
+
# Parse identifier into unit name and optional method
|
|
72
|
+
unit_id, method_name = parse_identifier(identifier)
|
|
73
|
+
|
|
74
|
+
if visited.include?(unit_id)
|
|
75
|
+
# Cycle detected - emit a marker step
|
|
76
|
+
steps << {
|
|
77
|
+
unit: unit_id,
|
|
78
|
+
type: 'cycle',
|
|
79
|
+
operations: [{ type: :cycle, target: unit_id, line: nil }]
|
|
80
|
+
}
|
|
81
|
+
return
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
visited.add(unit_id)
|
|
85
|
+
|
|
86
|
+
# Load the unit data from disk
|
|
87
|
+
unit_data = load_unit(unit_id)
|
|
88
|
+
return unless unit_data
|
|
89
|
+
|
|
90
|
+
source_code = unit_data[:source_code]
|
|
91
|
+
return unless source_code && !source_code.empty?
|
|
92
|
+
|
|
93
|
+
metadata = unit_data[:metadata] || {}
|
|
94
|
+
unit_type = unit_data[:type]&.to_s
|
|
95
|
+
file_path = unit_data[:file_path]
|
|
96
|
+
|
|
97
|
+
# Extract operations from the relevant method
|
|
98
|
+
operations = extract_operations(source_code, method_name, metadata, unit_type)
|
|
99
|
+
|
|
100
|
+
step = {
|
|
101
|
+
unit: identifier,
|
|
102
|
+
type: unit_type,
|
|
103
|
+
file_path: file_path,
|
|
104
|
+
operations: operations
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
steps << step
|
|
108
|
+
|
|
109
|
+
# Recursively expand targets that resolve to known units
|
|
110
|
+
operations.each do |op|
|
|
111
|
+
expand_operation(op, identifier, steps, visited, depth: depth, max_depth: max_depth)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Extract operations from source code for a specific method.
|
|
116
|
+
def extract_operations(source_code, method_name, metadata, unit_type)
|
|
117
|
+
operations = []
|
|
118
|
+
|
|
119
|
+
# For controllers, prepend before_action callbacks
|
|
120
|
+
prepend_callbacks(operations, metadata, method_name) if unit_type == 'controller'
|
|
121
|
+
|
|
122
|
+
if method_name
|
|
123
|
+
# Extract specific method
|
|
124
|
+
method_node = @method_extractor.extract_method(source_code, method_name)
|
|
125
|
+
if method_node
|
|
126
|
+
ops = @operation_extractor.extract(method_node)
|
|
127
|
+
operations.concat(ops)
|
|
128
|
+
end
|
|
129
|
+
else
|
|
130
|
+
# No specific method - parse entire source
|
|
131
|
+
root = @parser.parse(source_code)
|
|
132
|
+
ops = @operation_extractor.extract(root)
|
|
133
|
+
operations.concat(ops)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
operations
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Prepend before_action callbacks from controller metadata.
|
|
140
|
+
#
|
|
141
|
+
# Handles two metadata formats:
|
|
142
|
+
# - metadata[:callbacks] with :name key (legacy/test format)
|
|
143
|
+
# - metadata[:filters] with :filter key (ControllerExtractor format)
|
|
144
|
+
def prepend_callbacks(operations, metadata, method_name)
|
|
145
|
+
callbacks = metadata[:callbacks] || metadata[:filters]
|
|
146
|
+
return unless callbacks.is_a?(Array)
|
|
147
|
+
|
|
148
|
+
callbacks.each do |cb|
|
|
149
|
+
cb_kind = cb[:kind]&.to_s
|
|
150
|
+
next unless cb_kind == 'before'
|
|
151
|
+
|
|
152
|
+
# Handle both :name (callbacks format) and :filter (controller filters format)
|
|
153
|
+
cb_name = cb[:name] || cb[:filter]
|
|
154
|
+
next unless cb_name
|
|
155
|
+
|
|
156
|
+
# Check if callback applies to this action (via :only/:except)
|
|
157
|
+
only = cb[:only]
|
|
158
|
+
except = cb[:except]
|
|
159
|
+
|
|
160
|
+
next if only.is_a?(Array) && method_name && !only.map(&:to_s).include?(method_name.to_s)
|
|
161
|
+
|
|
162
|
+
next if except.is_a?(Array) && method_name && except.map(&:to_s).include?(method_name.to_s)
|
|
163
|
+
|
|
164
|
+
operations << {
|
|
165
|
+
type: :call,
|
|
166
|
+
target: nil,
|
|
167
|
+
method: cb_name.to_s,
|
|
168
|
+
line: nil
|
|
169
|
+
}
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Recursively expand an operation's target if it resolves to a known unit.
|
|
174
|
+
#
|
|
175
|
+
# @param op [Hash] The operation to potentially expand
|
|
176
|
+
# @param current_unit [String] The identifier of the unit containing this operation
|
|
177
|
+
# @param steps [Array<Hash>] Accumulator for step hashes
|
|
178
|
+
# @param visited [Set<String>] Visited unit identifiers for cycle detection
|
|
179
|
+
# @param depth [Integer] Current recursion depth
|
|
180
|
+
# @param max_depth [Integer] Maximum recursion depth
|
|
181
|
+
def expand_operation(op, current_unit, steps, visited, depth:, max_depth:)
|
|
182
|
+
case op[:type]
|
|
183
|
+
when :call, :async
|
|
184
|
+
target = op[:target]
|
|
185
|
+
return unless target
|
|
186
|
+
|
|
187
|
+
candidate = resolve_target(target)
|
|
188
|
+
return unless candidate
|
|
189
|
+
|
|
190
|
+
expand(candidate, steps, visited, depth: depth + 1, max_depth: max_depth)
|
|
191
|
+
when :transaction
|
|
192
|
+
(op[:nested] || []).each do |nested_op|
|
|
193
|
+
expand_operation(nested_op, current_unit, steps, visited, depth: depth, max_depth: max_depth)
|
|
194
|
+
end
|
|
195
|
+
when :conditional
|
|
196
|
+
((op[:then_ops] || []) + (op[:else_ops] || [])).each do |branch_op|
|
|
197
|
+
expand_operation(branch_op, current_unit, steps, visited, depth: depth, max_depth: max_depth)
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Resolve a call target to a unit identifier using graph-wide lookup.
|
|
203
|
+
#
|
|
204
|
+
# Uses node existence checks rather than dependency edges, because
|
|
205
|
+
# dependency edges are structural (associations, includes) and don't
|
|
206
|
+
# represent actual call relationships in execution flows.
|
|
207
|
+
#
|
|
208
|
+
# Tier 1: Graph-wide lookup — checks if the node exists anywhere in the graph,
|
|
209
|
+
# including suffix matching for unqualified class names.
|
|
210
|
+
# Tier 2: Disk fallback — attempts to load the unit JSON from disk, covering
|
|
211
|
+
# units that exist in the index but were not loaded into the graph.
|
|
212
|
+
#
|
|
213
|
+
# @param target [String] The call target name to resolve
|
|
214
|
+
# @return [String, nil] The resolved unit identifier, or nil if not found
|
|
215
|
+
def resolve_target(target)
|
|
216
|
+
# Tier 1: Graph-wide lookup
|
|
217
|
+
return target if @graph.node_exists?(target)
|
|
218
|
+
|
|
219
|
+
graph_match = @graph.find_node_by_suffix(target)
|
|
220
|
+
return graph_match if graph_match
|
|
221
|
+
|
|
222
|
+
# Tier 2: Disk fallback (unit JSON exists but isn't in the graph)
|
|
223
|
+
unit_data = load_unit(target)
|
|
224
|
+
return target if unit_data
|
|
225
|
+
|
|
226
|
+
nil
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Parse an identifier into [unit_id, method_name].
|
|
230
|
+
# "PostsController#create" => ["PostsController", "create"]
|
|
231
|
+
# "PostService" => ["PostService", nil]
|
|
232
|
+
def parse_identifier(identifier)
|
|
233
|
+
if identifier.include?('#')
|
|
234
|
+
identifier.split('#', 2)
|
|
235
|
+
else
|
|
236
|
+
[identifier, nil]
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Load an ExtractedUnit's data from its JSON file on disk.
|
|
241
|
+
#
|
|
242
|
+
# Uses {Extractor#collision_safe_filename} convention (with SHA256 digest suffix).
|
|
243
|
+
# Falls back to legacy {Extractor#safe_filename} for older indexes.
|
|
244
|
+
# Searches across type subdirectories since the extractor writes to
|
|
245
|
+
# `<output_dir>/<type>/<filename>.json`.
|
|
246
|
+
def load_unit(unit_id)
|
|
247
|
+
base = unit_id.gsub('::', '__').gsub(/[^a-zA-Z0-9_-]/, '_')
|
|
248
|
+
digest = Digest::SHA256.hexdigest(unit_id)[0, 8]
|
|
249
|
+
filenames = [
|
|
250
|
+
"#{base}_#{digest}.json",
|
|
251
|
+
"#{base}.json"
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
filenames.each do |filename|
|
|
255
|
+
Dir[File.join(@extracted_dir, '*', filename)].each do |path|
|
|
256
|
+
return JSON.parse(File.read(path), symbolize_names: true)
|
|
257
|
+
rescue JSON::ParserError
|
|
258
|
+
next
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
nil
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Extract route information from controller metadata.
|
|
266
|
+
def extract_route(entry_point)
|
|
267
|
+
unit_id, method_name = parse_identifier(entry_point)
|
|
268
|
+
unit_data = load_unit(unit_id)
|
|
269
|
+
return nil unless unit_data
|
|
270
|
+
|
|
271
|
+
metadata = unit_data[:metadata] || {}
|
|
272
|
+
routes = metadata[:routes]
|
|
273
|
+
return nil unless routes.is_a?(Array)
|
|
274
|
+
|
|
275
|
+
# Find route matching the method name
|
|
276
|
+
route = if method_name
|
|
277
|
+
routes.find { |r| r[:action]&.to_s == method_name }
|
|
278
|
+
else
|
|
279
|
+
routes.first
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
return nil unless route
|
|
283
|
+
|
|
284
|
+
{
|
|
285
|
+
verb: route[:verb],
|
|
286
|
+
path: route[:path]
|
|
287
|
+
}
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
end
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
# Value object representing an assembled execution flow trace.
|
|
7
|
+
#
|
|
8
|
+
# Contains an ordered list of steps from an entry point through the dependency graph,
|
|
9
|
+
# with each step holding operations extracted from source code in line order.
|
|
10
|
+
#
|
|
11
|
+
# @example Creating and serializing a flow document
|
|
12
|
+
# doc = FlowDocument.new(
|
|
13
|
+
# entry_point: "PostsController#create",
|
|
14
|
+
# route: { verb: "POST", path: "/posts" },
|
|
15
|
+
# max_depth: 5,
|
|
16
|
+
# steps: [{ unit: "PostsController#create", type: "controller", operations: [...] }]
|
|
17
|
+
# )
|
|
18
|
+
# doc.to_h # => JSON-serializable Hash
|
|
19
|
+
# doc.to_markdown # => human-readable table
|
|
20
|
+
#
|
|
21
|
+
class FlowDocument
|
|
22
|
+
attr_reader :entry_point, :route, :max_depth, :steps, :generated_at
|
|
23
|
+
|
|
24
|
+
# @param entry_point [String] The entry point identifier (e.g., "PostsController#create")
|
|
25
|
+
# @param route [Hash, nil] Route info with :verb and :path keys
|
|
26
|
+
# @param max_depth [Integer] Maximum recursion depth used during assembly
|
|
27
|
+
# @param steps [Array<Hash>] Ordered list of step hashes
|
|
28
|
+
# @param generated_at [String, nil] ISO8601 timestamp (defaults to now)
|
|
29
|
+
def initialize(entry_point:, route: nil, max_depth: 5, steps: [], generated_at: nil)
|
|
30
|
+
@entry_point = entry_point
|
|
31
|
+
@route = route
|
|
32
|
+
@max_depth = max_depth
|
|
33
|
+
@steps = steps
|
|
34
|
+
@generated_at = generated_at || Time.now.iso8601
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Serialize to a JSON-compatible Hash.
|
|
38
|
+
#
|
|
39
|
+
# @return [Hash] Complete flow document data
|
|
40
|
+
def to_h
|
|
41
|
+
{
|
|
42
|
+
entry_point: @entry_point,
|
|
43
|
+
route: @route,
|
|
44
|
+
max_depth: @max_depth,
|
|
45
|
+
generated_at: @generated_at,
|
|
46
|
+
steps: @steps
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Reconstruct a FlowDocument from a serialized Hash.
|
|
51
|
+
#
|
|
52
|
+
# Handles both symbol and string keys for JSON round-trip compatibility.
|
|
53
|
+
#
|
|
54
|
+
# @param data [Hash] Previously serialized flow document data
|
|
55
|
+
# @return [FlowDocument]
|
|
56
|
+
def self.from_h(data)
|
|
57
|
+
data = deep_symbolize_keys(data)
|
|
58
|
+
|
|
59
|
+
new(
|
|
60
|
+
entry_point: data[:entry_point],
|
|
61
|
+
route: data[:route],
|
|
62
|
+
max_depth: data[:max_depth] || 5,
|
|
63
|
+
steps: data[:steps] || [],
|
|
64
|
+
generated_at: data[:generated_at]
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.deep_symbolize_keys(obj)
|
|
69
|
+
case obj
|
|
70
|
+
when Hash
|
|
71
|
+
obj.each_with_object({}) do |(key, value), result|
|
|
72
|
+
result[key.to_sym] = deep_symbolize_keys(value)
|
|
73
|
+
end
|
|
74
|
+
when Array
|
|
75
|
+
obj.map { |item| deep_symbolize_keys(item) }
|
|
76
|
+
else
|
|
77
|
+
obj
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
private_class_method :deep_symbolize_keys
|
|
81
|
+
|
|
82
|
+
# Render as human-readable Markdown.
|
|
83
|
+
#
|
|
84
|
+
# Produces a document with a header showing the route and entry point,
|
|
85
|
+
# followed by one section per step with an operations table.
|
|
86
|
+
#
|
|
87
|
+
# @return [String] Markdown-formatted flow document
|
|
88
|
+
def to_markdown
|
|
89
|
+
lines = []
|
|
90
|
+
lines << format_header
|
|
91
|
+
lines << ''
|
|
92
|
+
|
|
93
|
+
@steps.each_with_index do |step, idx|
|
|
94
|
+
lines << format_step(step, idx + 1)
|
|
95
|
+
lines << ''
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
lines.join("\n")
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
# Format the document header with route and entry point info.
|
|
104
|
+
def format_header
|
|
105
|
+
if @route
|
|
106
|
+
verb = @route[:verb] || '?'
|
|
107
|
+
path = @route[:path] || '?'
|
|
108
|
+
"## #{verb} #{path} → #{@entry_point}"
|
|
109
|
+
else
|
|
110
|
+
"## #{@entry_point}"
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Format a single step as a Markdown section with operations table.
|
|
115
|
+
def format_step(step, number)
|
|
116
|
+
unit = step[:unit]
|
|
117
|
+
file_path = step[:file_path]
|
|
118
|
+
operations = step[:operations] || []
|
|
119
|
+
|
|
120
|
+
lines = []
|
|
121
|
+
lines << "### #{number}. #{unit}"
|
|
122
|
+
lines << "_#{file_path}_" if file_path
|
|
123
|
+
lines << ''
|
|
124
|
+
|
|
125
|
+
if operations.any?
|
|
126
|
+
lines << '| # | Operation | Target | Line |'
|
|
127
|
+
lines << '|---|-----------|--------|------|'
|
|
128
|
+
format_operations(operations, lines)
|
|
129
|
+
else
|
|
130
|
+
lines << '_No significant operations_'
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
lines.join("\n")
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Format operations into table rows, handling nesting for transactions and conditionals.
|
|
137
|
+
def format_operations(operations, lines, prefix: '')
|
|
138
|
+
operations.each_with_index do |op, idx|
|
|
139
|
+
num = "#{prefix}#{idx + 1}"
|
|
140
|
+
op_type = op[:type]
|
|
141
|
+
op_type_str = op_type.to_s
|
|
142
|
+
|
|
143
|
+
case op_type_str
|
|
144
|
+
when 'transaction'
|
|
145
|
+
receiver = op[:receiver]
|
|
146
|
+
line = op[:line]
|
|
147
|
+
lines << "| #{num} | transaction | #{receiver}.transaction | #{line} |"
|
|
148
|
+
nested = op[:nested] || []
|
|
149
|
+
format_operations(nested, lines, prefix: "#{num}.")
|
|
150
|
+
when 'conditional'
|
|
151
|
+
condition = op[:condition]
|
|
152
|
+
kind = op[:kind] || 'if'
|
|
153
|
+
line = op[:line]
|
|
154
|
+
lines << "| #{num} | #{kind} #{condition} | | #{line} |"
|
|
155
|
+
then_ops = op[:then_ops] || []
|
|
156
|
+
else_ops = op[:else_ops] || []
|
|
157
|
+
format_operations(then_ops, lines, prefix: "#{num}a.")
|
|
158
|
+
format_operations(else_ops, lines, prefix: "#{num}b.")
|
|
159
|
+
when 'response'
|
|
160
|
+
status = op[:status_code]
|
|
161
|
+
method = op[:render_method]
|
|
162
|
+
line = op[:line]
|
|
163
|
+
status_text = status ? status.to_s : '?'
|
|
164
|
+
lines << "| #{num} | response | #{status_text} (via #{method}) | #{line} |"
|
|
165
|
+
when 'async'
|
|
166
|
+
target = op[:target]
|
|
167
|
+
method = op[:method]
|
|
168
|
+
args = op[:args_hint]
|
|
169
|
+
line = op[:line]
|
|
170
|
+
args_text = args&.any? ? "(#{args.join(', ')})" : ''
|
|
171
|
+
lines << "| #{num} | async | #{target}.#{method}#{args_text} | #{line} |"
|
|
172
|
+
when 'cycle'
|
|
173
|
+
target = op[:target]
|
|
174
|
+
line = op[:line]
|
|
175
|
+
lines << "| #{num} | cycle | #{target} (revisit) | #{line} |"
|
|
176
|
+
when 'dynamic_dispatch'
|
|
177
|
+
target = op[:target]
|
|
178
|
+
method = op[:method]
|
|
179
|
+
line = op[:line]
|
|
180
|
+
lines << "| #{num} | dynamic_dispatch | #{target}.#{method} | #{line} |"
|
|
181
|
+
else
|
|
182
|
+
target = op[:target]
|
|
183
|
+
method = op[:method]
|
|
184
|
+
line = op[:line]
|
|
185
|
+
target_text = [target, method].compact.join('.')
|
|
186
|
+
lines << "| #{num} | #{op_type_str} | #{target_text} | #{line} |"
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|