codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module FlowAnalysis
5
+ # Maps render/redirect AST nodes to HTTP status codes.
6
+ #
7
+ # Uses a built-in STATUS_CODES hash rather than depending on Rack at runtime.
8
+ # Handles explicit status kwargs, render_<status> conventions, head calls,
9
+ # and redirect_to defaults.
10
+ #
11
+ # @example Resolving a render call
12
+ # ResponseCodeMapper.resolve_method("render_created", arguments: []) #=> 201
13
+ # ResponseCodeMapper.resolve_method("redirect_to", arguments: ["/home"]) #=> 302
14
+ # ResponseCodeMapper.resolve_method("head", arguments: [":no_content"]) #=> 204
15
+ #
16
+ class ResponseCodeMapper
17
+ # Subset of Rack::Utils::SYMBOL_TO_STATUS_CODE, inlined to avoid runtime Rack dependency.
18
+ STATUS_CODES = {
19
+ continue: 100,
20
+ switching_protocols: 101,
21
+ processing: 102,
22
+ early_hints: 103,
23
+ ok: 200,
24
+ created: 201,
25
+ accepted: 202,
26
+ non_authoritative_information: 203,
27
+ no_content: 204,
28
+ reset_content: 205,
29
+ partial_content: 206,
30
+ multi_status: 207,
31
+ already_reported: 208,
32
+ im_used: 226,
33
+ multiple_choices: 300,
34
+ moved_permanently: 301,
35
+ found: 302,
36
+ see_other: 303,
37
+ not_modified: 304,
38
+ use_proxy: 305,
39
+ temporary_redirect: 307,
40
+ permanent_redirect: 308,
41
+ bad_request: 400,
42
+ unauthorized: 401,
43
+ payment_required: 402,
44
+ forbidden: 403,
45
+ not_found: 404,
46
+ method_not_allowed: 405,
47
+ not_acceptable: 406,
48
+ proxy_authentication_required: 407,
49
+ request_timeout: 408,
50
+ conflict: 409,
51
+ gone: 410,
52
+ length_required: 411,
53
+ precondition_failed: 412,
54
+ payload_too_large: 413,
55
+ uri_too_long: 414,
56
+ unsupported_media_type: 415,
57
+ range_not_satisfiable: 416,
58
+ expectation_failed: 417,
59
+ misdirected_request: 421,
60
+ unprocessable_entity: 422,
61
+ locked: 423,
62
+ failed_dependency: 424,
63
+ too_early: 425,
64
+ upgrade_required: 426,
65
+ precondition_required: 428,
66
+ too_many_requests: 429,
67
+ request_header_fields_too_large: 431,
68
+ unavailable_for_legal_reasons: 451,
69
+ internal_server_error: 500,
70
+ not_implemented: 501,
71
+ bad_gateway: 502,
72
+ service_unavailable: 503,
73
+ gateway_timeout: 504,
74
+ http_version_not_supported: 505,
75
+ variant_also_negotiates: 506,
76
+ insufficient_storage: 507,
77
+ loop_detected: 508,
78
+ not_extended: 510,
79
+ network_authentication_required: 511
80
+ }.freeze
81
+
82
+ # Resolve a render/redirect/head method call to an HTTP status code.
83
+ #
84
+ # Strategies tried in order:
85
+ # 1. Explicit status kwarg: `render json: x, status: :created` -> 201
86
+ # 2. render_<status> convention: `render_created` -> 201
87
+ # 3. head with status arg: `head :no_content` -> 204
88
+ # 4. redirect_to default: 302
89
+ #
90
+ # @param method_name [String] The method name (render, redirect_to, head, render_created, etc.)
91
+ # @param arguments [Array<String>] Argument representations from AST
92
+ # @return [Integer, nil] HTTP status code or nil if unresolvable
93
+ def self.resolve_method(method_name, arguments: [])
94
+ # Case 1: Look for explicit status kwarg in arguments
95
+ status_from_kwarg = extract_status_from_args(arguments)
96
+ return resolve_status(status_from_kwarg) if status_from_kwarg
97
+
98
+ # Case 2: render_<status> convention
99
+ if method_name.start_with?('render_')
100
+ status_name = method_name.delete_prefix('render_')
101
+ code = STATUS_CODES[status_name.to_sym]
102
+ return code if code
103
+ end
104
+
105
+ # Case 3: head :status
106
+ return resolve_status(arguments.first) if method_name == 'head' && arguments.first
107
+
108
+ # Case 4: redirect_to defaults to 302
109
+ return 302 if method_name == 'redirect_to'
110
+
111
+ nil
112
+ end
113
+
114
+ # Resolve a status value (symbol name, integer, or string) to an integer code.
115
+ #
116
+ # @param value [String, Integer, Symbol] Status representation
117
+ # @return [Integer, nil] HTTP status code or nil
118
+ def self.resolve_status(value)
119
+ case value
120
+ when Integer
121
+ value
122
+ when Symbol
123
+ STATUS_CODES[value]
124
+ when String
125
+ # Strip leading colon from AST symbol representation (":created" -> "created")
126
+ cleaned = value.delete_prefix(':')
127
+ # Try as symbol name first
128
+ code = STATUS_CODES[cleaned.to_sym]
129
+ return code if code
130
+
131
+ # Try as integer string
132
+ return cleaned.to_i if cleaned.match?(/\A\d+\z/)
133
+
134
+ nil
135
+ end
136
+ end
137
+
138
+ # Extract a status value from argument strings.
139
+ #
140
+ # Looks for patterns like "status: :created" or "status: 201" in argument list.
141
+ #
142
+ # @param arguments [Array<String>] Argument representations
143
+ # @return [String, nil] The status value if found
144
+ def self.extract_status_from_args(arguments)
145
+ arguments.each do |arg|
146
+ if arg.is_a?(String) && (match = arg.match(/status:\s*(.+)/))
147
+ return match[1].strip
148
+ end
149
+ end
150
+ nil
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,290 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'json'
5
+ require 'set'
6
+ require_relative 'ast/parser'
7
+ require_relative 'ast/method_extractor'
8
+ require_relative 'flow_analysis/operation_extractor'
9
+ require_relative 'flow_document'
10
+
11
+ module CodebaseIndex
12
+ # Orchestrates execution flow tracing from an entry point through the dependency graph.
13
+ #
14
+ # Given an entry point (e.g., "PostsController#create"), FlowAssembler:
15
+ # 1. Loads the ExtractedUnit JSON from disk
16
+ # 2. Parses its source_code with the AST layer
17
+ # 3. Extracts operations in source line order
18
+ # 4. Recursively expands targets that resolve to other units
19
+ # 5. Detects cycles and respects max_depth
20
+ # 6. Assembles a FlowDocument
21
+ #
22
+ # @example Assembling a flow
23
+ # assembler = FlowAssembler.new(graph: graph, extracted_dir: "/tmp/codebase_index")
24
+ # flow = assembler.assemble("PostsController#create", max_depth: 5)
25
+ # puts flow.to_markdown
26
+ #
27
+ class FlowAssembler
28
+ # @param graph [DependencyGraph] The dependency graph for resolving targets
29
+ # @param extracted_dir [String] Directory containing extracted unit JSON files
30
+ def initialize(graph:, extracted_dir:)
31
+ @graph = graph
32
+ @extracted_dir = extracted_dir
33
+ @parser = Ast::Parser.new
34
+ @method_extractor = Ast::MethodExtractor.new(parser: @parser)
35
+ @operation_extractor = FlowAnalysis::OperationExtractor.new
36
+ end
37
+
38
+ # Assemble an execution flow from the given entry point.
39
+ #
40
+ # @param entry_point [String] Unit identifier, optionally with #method_name
41
+ # @param max_depth [Integer] Maximum recursion depth
42
+ # @return [FlowDocument] The assembled flow document
43
+ def assemble(entry_point, max_depth: 5)
44
+ visited = Set.new
45
+ steps = []
46
+
47
+ expand(entry_point, steps, visited, depth: 0, max_depth: max_depth)
48
+
49
+ route = extract_route(entry_point)
50
+
51
+ FlowDocument.new(
52
+ entry_point: entry_point,
53
+ route: route,
54
+ max_depth: max_depth,
55
+ steps: steps
56
+ )
57
+ end
58
+
59
+ private
60
+
61
+ # Recursively expand a unit into flow steps.
62
+ #
63
+ # @param identifier [String] Unit identifier (may include #method)
64
+ # @param steps [Array<Hash>] Accumulator for step hashes
65
+ # @param visited [Set<String>] Visited unit identifiers for cycle detection
66
+ # @param depth [Integer] Current recursion depth
67
+ # @param max_depth [Integer] Maximum recursion depth
68
+ def expand(identifier, steps, visited, depth:, max_depth:)
69
+ return if depth > max_depth
70
+
71
+ # Parse identifier into unit name and optional method
72
+ unit_id, method_name = parse_identifier(identifier)
73
+
74
+ if visited.include?(unit_id)
75
+ # Cycle detected - emit a marker step
76
+ steps << {
77
+ unit: unit_id,
78
+ type: 'cycle',
79
+ operations: [{ type: :cycle, target: unit_id, line: nil }]
80
+ }
81
+ return
82
+ end
83
+
84
+ visited.add(unit_id)
85
+
86
+ # Load the unit data from disk
87
+ unit_data = load_unit(unit_id)
88
+ return unless unit_data
89
+
90
+ source_code = unit_data[:source_code]
91
+ return unless source_code && !source_code.empty?
92
+
93
+ metadata = unit_data[:metadata] || {}
94
+ unit_type = unit_data[:type]&.to_s
95
+ file_path = unit_data[:file_path]
96
+
97
+ # Extract operations from the relevant method
98
+ operations = extract_operations(source_code, method_name, metadata, unit_type)
99
+
100
+ step = {
101
+ unit: identifier,
102
+ type: unit_type,
103
+ file_path: file_path,
104
+ operations: operations
105
+ }
106
+
107
+ steps << step
108
+
109
+ # Recursively expand targets that resolve to known units
110
+ operations.each do |op|
111
+ expand_operation(op, identifier, steps, visited, depth: depth, max_depth: max_depth)
112
+ end
113
+ end
114
+
115
+ # Extract operations from source code for a specific method.
116
+ def extract_operations(source_code, method_name, metadata, unit_type)
117
+ operations = []
118
+
119
+ # For controllers, prepend before_action callbacks
120
+ prepend_callbacks(operations, metadata, method_name) if unit_type == 'controller'
121
+
122
+ if method_name
123
+ # Extract specific method
124
+ method_node = @method_extractor.extract_method(source_code, method_name)
125
+ if method_node
126
+ ops = @operation_extractor.extract(method_node)
127
+ operations.concat(ops)
128
+ end
129
+ else
130
+ # No specific method - parse entire source
131
+ root = @parser.parse(source_code)
132
+ ops = @operation_extractor.extract(root)
133
+ operations.concat(ops)
134
+ end
135
+
136
+ operations
137
+ end
138
+
139
+ # Prepend before_action callbacks from controller metadata.
140
+ #
141
+ # Handles two metadata formats:
142
+ # - metadata[:callbacks] with :name key (legacy/test format)
143
+ # - metadata[:filters] with :filter key (ControllerExtractor format)
144
+ def prepend_callbacks(operations, metadata, method_name)
145
+ callbacks = metadata[:callbacks] || metadata[:filters]
146
+ return unless callbacks.is_a?(Array)
147
+
148
+ callbacks.each do |cb|
149
+ cb_kind = cb[:kind]&.to_s
150
+ next unless cb_kind == 'before'
151
+
152
+ # Handle both :name (callbacks format) and :filter (controller filters format)
153
+ cb_name = cb[:name] || cb[:filter]
154
+ next unless cb_name
155
+
156
+ # Check if callback applies to this action (via :only/:except)
157
+ only = cb[:only]
158
+ except = cb[:except]
159
+
160
+ next if only.is_a?(Array) && method_name && !only.map(&:to_s).include?(method_name.to_s)
161
+
162
+ next if except.is_a?(Array) && method_name && except.map(&:to_s).include?(method_name.to_s)
163
+
164
+ operations << {
165
+ type: :call,
166
+ target: nil,
167
+ method: cb_name.to_s,
168
+ line: nil
169
+ }
170
+ end
171
+ end
172
+
173
+ # Recursively expand an operation's target if it resolves to a known unit.
174
+ #
175
+ # @param op [Hash] The operation to potentially expand
176
+ # @param current_unit [String] The identifier of the unit containing this operation
177
+ # @param steps [Array<Hash>] Accumulator for step hashes
178
+ # @param visited [Set<String>] Visited unit identifiers for cycle detection
179
+ # @param depth [Integer] Current recursion depth
180
+ # @param max_depth [Integer] Maximum recursion depth
181
+ def expand_operation(op, current_unit, steps, visited, depth:, max_depth:)
182
+ case op[:type]
183
+ when :call, :async
184
+ target = op[:target]
185
+ return unless target
186
+
187
+ candidate = resolve_target(target)
188
+ return unless candidate
189
+
190
+ expand(candidate, steps, visited, depth: depth + 1, max_depth: max_depth)
191
+ when :transaction
192
+ (op[:nested] || []).each do |nested_op|
193
+ expand_operation(nested_op, current_unit, steps, visited, depth: depth, max_depth: max_depth)
194
+ end
195
+ when :conditional
196
+ ((op[:then_ops] || []) + (op[:else_ops] || [])).each do |branch_op|
197
+ expand_operation(branch_op, current_unit, steps, visited, depth: depth, max_depth: max_depth)
198
+ end
199
+ end
200
+ end
201
+
202
+ # Resolve a call target to a unit identifier using graph-wide lookup.
203
+ #
204
+ # Uses node existence checks rather than dependency edges, because
205
+ # dependency edges are structural (associations, includes) and don't
206
+ # represent actual call relationships in execution flows.
207
+ #
208
+ # Tier 1: Graph-wide lookup — checks if the node exists anywhere in the graph,
209
+ # including suffix matching for unqualified class names.
210
+ # Tier 2: Disk fallback — attempts to load the unit JSON from disk, covering
211
+ # units that exist in the index but were not loaded into the graph.
212
+ #
213
+ # @param target [String] The call target name to resolve
214
+ # @return [String, nil] The resolved unit identifier, or nil if not found
215
+ def resolve_target(target)
216
+ # Tier 1: Graph-wide lookup
217
+ return target if @graph.node_exists?(target)
218
+
219
+ graph_match = @graph.find_node_by_suffix(target)
220
+ return graph_match if graph_match
221
+
222
+ # Tier 2: Disk fallback (unit JSON exists but isn't in the graph)
223
+ unit_data = load_unit(target)
224
+ return target if unit_data
225
+
226
+ nil
227
+ end
228
+
229
+ # Parse an identifier into [unit_id, method_name].
230
+ # "PostsController#create" => ["PostsController", "create"]
231
+ # "PostService" => ["PostService", nil]
232
+ def parse_identifier(identifier)
233
+ if identifier.include?('#')
234
+ identifier.split('#', 2)
235
+ else
236
+ [identifier, nil]
237
+ end
238
+ end
239
+
240
+ # Load an ExtractedUnit's data from its JSON file on disk.
241
+ #
242
+ # Uses {Extractor#collision_safe_filename} convention (with SHA256 digest suffix).
243
+ # Falls back to legacy {Extractor#safe_filename} for older indexes.
244
+ # Searches across type subdirectories since the extractor writes to
245
+ # `<output_dir>/<type>/<filename>.json`.
246
+ def load_unit(unit_id)
247
+ base = unit_id.gsub('::', '__').gsub(/[^a-zA-Z0-9_-]/, '_')
248
+ digest = Digest::SHA256.hexdigest(unit_id)[0, 8]
249
+ filenames = [
250
+ "#{base}_#{digest}.json",
251
+ "#{base}.json"
252
+ ]
253
+
254
+ filenames.each do |filename|
255
+ Dir[File.join(@extracted_dir, '*', filename)].each do |path|
256
+ return JSON.parse(File.read(path), symbolize_names: true)
257
+ rescue JSON::ParserError
258
+ next
259
+ end
260
+ end
261
+
262
+ nil
263
+ end
264
+
265
+ # Extract route information from controller metadata.
266
+ def extract_route(entry_point)
267
+ unit_id, method_name = parse_identifier(entry_point)
268
+ unit_data = load_unit(unit_id)
269
+ return nil unless unit_data
270
+
271
+ metadata = unit_data[:metadata] || {}
272
+ routes = metadata[:routes]
273
+ return nil unless routes.is_a?(Array)
274
+
275
+ # Find route matching the method name
276
+ route = if method_name
277
+ routes.find { |r| r[:action]&.to_s == method_name }
278
+ else
279
+ routes.first
280
+ end
281
+
282
+ return nil unless route
283
+
284
+ {
285
+ verb: route[:verb],
286
+ path: route[:path]
287
+ }
288
+ end
289
+ end
290
+ end
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module CodebaseIndex
6
+ # Value object representing an assembled execution flow trace.
7
+ #
8
+ # Contains an ordered list of steps from an entry point through the dependency graph,
9
+ # with each step holding operations extracted from source code in line order.
10
+ #
11
+ # @example Creating and serializing a flow document
12
+ # doc = FlowDocument.new(
13
+ # entry_point: "PostsController#create",
14
+ # route: { verb: "POST", path: "/posts" },
15
+ # max_depth: 5,
16
+ # steps: [{ unit: "PostsController#create", type: "controller", operations: [...] }]
17
+ # )
18
+ # doc.to_h # => JSON-serializable Hash
19
+ # doc.to_markdown # => human-readable table
20
+ #
21
+ class FlowDocument
22
+ attr_reader :entry_point, :route, :max_depth, :steps, :generated_at
23
+
24
+ # @param entry_point [String] The entry point identifier (e.g., "PostsController#create")
25
+ # @param route [Hash, nil] Route info with :verb and :path keys
26
+ # @param max_depth [Integer] Maximum recursion depth used during assembly
27
+ # @param steps [Array<Hash>] Ordered list of step hashes
28
+ # @param generated_at [String, nil] ISO8601 timestamp (defaults to now)
29
+ def initialize(entry_point:, route: nil, max_depth: 5, steps: [], generated_at: nil)
30
+ @entry_point = entry_point
31
+ @route = route
32
+ @max_depth = max_depth
33
+ @steps = steps
34
+ @generated_at = generated_at || Time.now.iso8601
35
+ end
36
+
37
+ # Serialize to a JSON-compatible Hash.
38
+ #
39
+ # @return [Hash] Complete flow document data
40
+ def to_h
41
+ {
42
+ entry_point: @entry_point,
43
+ route: @route,
44
+ max_depth: @max_depth,
45
+ generated_at: @generated_at,
46
+ steps: @steps
47
+ }
48
+ end
49
+
50
+ # Reconstruct a FlowDocument from a serialized Hash.
51
+ #
52
+ # Handles both symbol and string keys for JSON round-trip compatibility.
53
+ #
54
+ # @param data [Hash] Previously serialized flow document data
55
+ # @return [FlowDocument]
56
+ def self.from_h(data)
57
+ data = deep_symbolize_keys(data)
58
+
59
+ new(
60
+ entry_point: data[:entry_point],
61
+ route: data[:route],
62
+ max_depth: data[:max_depth] || 5,
63
+ steps: data[:steps] || [],
64
+ generated_at: data[:generated_at]
65
+ )
66
+ end
67
+
68
+ def self.deep_symbolize_keys(obj)
69
+ case obj
70
+ when Hash
71
+ obj.each_with_object({}) do |(key, value), result|
72
+ result[key.to_sym] = deep_symbolize_keys(value)
73
+ end
74
+ when Array
75
+ obj.map { |item| deep_symbolize_keys(item) }
76
+ else
77
+ obj
78
+ end
79
+ end
80
+ private_class_method :deep_symbolize_keys
81
+
82
+ # Render as human-readable Markdown.
83
+ #
84
+ # Produces a document with a header showing the route and entry point,
85
+ # followed by one section per step with an operations table.
86
+ #
87
+ # @return [String] Markdown-formatted flow document
88
+ def to_markdown
89
+ lines = []
90
+ lines << format_header
91
+ lines << ''
92
+
93
+ @steps.each_with_index do |step, idx|
94
+ lines << format_step(step, idx + 1)
95
+ lines << ''
96
+ end
97
+
98
+ lines.join("\n")
99
+ end
100
+
101
+ private
102
+
103
+ # Format the document header with route and entry point info.
104
+ def format_header
105
+ if @route
106
+ verb = @route[:verb] || '?'
107
+ path = @route[:path] || '?'
108
+ "## #{verb} #{path} → #{@entry_point}"
109
+ else
110
+ "## #{@entry_point}"
111
+ end
112
+ end
113
+
114
+ # Format a single step as a Markdown section with operations table.
115
+ def format_step(step, number)
116
+ unit = step[:unit]
117
+ file_path = step[:file_path]
118
+ operations = step[:operations] || []
119
+
120
+ lines = []
121
+ lines << "### #{number}. #{unit}"
122
+ lines << "_#{file_path}_" if file_path
123
+ lines << ''
124
+
125
+ if operations.any?
126
+ lines << '| # | Operation | Target | Line |'
127
+ lines << '|---|-----------|--------|------|'
128
+ format_operations(operations, lines)
129
+ else
130
+ lines << '_No significant operations_'
131
+ end
132
+
133
+ lines.join("\n")
134
+ end
135
+
136
+ # Format operations into table rows, handling nesting for transactions and conditionals.
137
+ def format_operations(operations, lines, prefix: '')
138
+ operations.each_with_index do |op, idx|
139
+ num = "#{prefix}#{idx + 1}"
140
+ op_type = op[:type]
141
+ op_type_str = op_type.to_s
142
+
143
+ case op_type_str
144
+ when 'transaction'
145
+ receiver = op[:receiver]
146
+ line = op[:line]
147
+ lines << "| #{num} | transaction | #{receiver}.transaction | #{line} |"
148
+ nested = op[:nested] || []
149
+ format_operations(nested, lines, prefix: "#{num}.")
150
+ when 'conditional'
151
+ condition = op[:condition]
152
+ kind = op[:kind] || 'if'
153
+ line = op[:line]
154
+ lines << "| #{num} | #{kind} #{condition} | | #{line} |"
155
+ then_ops = op[:then_ops] || []
156
+ else_ops = op[:else_ops] || []
157
+ format_operations(then_ops, lines, prefix: "#{num}a.")
158
+ format_operations(else_ops, lines, prefix: "#{num}b.")
159
+ when 'response'
160
+ status = op[:status_code]
161
+ method = op[:render_method]
162
+ line = op[:line]
163
+ status_text = status ? status.to_s : '?'
164
+ lines << "| #{num} | response | #{status_text} (via #{method}) | #{line} |"
165
+ when 'async'
166
+ target = op[:target]
167
+ method = op[:method]
168
+ args = op[:args_hint]
169
+ line = op[:line]
170
+ args_text = args&.any? ? "(#{args.join(', ')})" : ''
171
+ lines << "| #{num} | async | #{target}.#{method}#{args_text} | #{line} |"
172
+ when 'cycle'
173
+ target = op[:target]
174
+ line = op[:line]
175
+ lines << "| #{num} | cycle | #{target} (revisit) | #{line} |"
176
+ when 'dynamic_dispatch'
177
+ target = op[:target]
178
+ method = op[:method]
179
+ line = op[:line]
180
+ lines << "| #{num} | dynamic_dispatch | #{target}.#{method} | #{line} |"
181
+ else
182
+ target = op[:target]
183
+ method = op[:method]
184
+ line = op[:line]
185
+ target_text = [target, method].compact.join('.')
186
+ lines << "| #{num} | #{op_type_str} | #{target_text} | #{line} |"
187
+ end
188
+ end
189
+ end
190
+ end
191
+ end