codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,374 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ module CodebaseIndex
6
+ # GraphAnalyzer computes structural properties of the dependency graph.
7
+ #
8
+ # Given a {DependencyGraph}, it identifies architectural patterns like orphaned
9
+ # units, circular dependencies, hub nodes, and bridge nodes. These metrics help
10
+ # surface dead code, architectural bottlenecks, and high-risk change targets.
11
+ #
12
+ # Inspired by FlowMapper's Comparator pattern — takes a graph, produces a
13
+ # structural report without mutating anything.
14
+ #
15
+ # @example Basic usage
16
+ # graph = CodebaseIndex::DependencyGraph.new
17
+ # # ... register units ...
18
+ # analyzer = CodebaseIndex::GraphAnalyzer.new(graph)
19
+ # report = analyzer.analyze
20
+ # report[:cycles] # => [["A", "B", "A"], ...]
21
+ # report[:hubs] # => [{ identifier: "User", type: :model, ... }, ...]
22
+ #
23
+ class GraphAnalyzer
24
+ # Types that are naturally root nodes and should not be flagged as orphans.
25
+ # Framework and gem sources are consumed but never referenced by application code
26
+ # in the dependency graph's reverse index.
27
+ EXCLUDED_ORPHAN_TYPES = %i[rails_source gem_source].freeze
28
+
29
+ # @param dependency_graph [DependencyGraph] The graph to analyze
30
+ def initialize(dependency_graph)
31
+ @graph = dependency_graph
32
+ end
33
+
34
+ # ══════════════════════════════════════════════════════════════════════
35
+ # Public Analysis Methods
36
+ # ══════════════════════════════════════════════════════════════════════
37
+
38
+ # Units with no dependents (nothing references them).
39
+ #
40
+ # These are potential dead code or entry points. Framework and gem sources
41
+ # are excluded since they are naturally unreferenced in the reverse index.
42
+ #
43
+ # @return [Array<String>] Identifiers of orphaned units
44
+ def orphans
45
+ @orphans ||= begin
46
+ nodes = graph_nodes
47
+ nodes.each_with_object([]) do |(identifier, meta), result|
48
+ next if EXCLUDED_ORPHAN_TYPES.include?(meta[:type])
49
+
50
+ dependents = @graph.dependents_of(identifier)
51
+ result << identifier if dependents.empty?
52
+ end
53
+ end
54
+ end
55
+
56
+ # Units with no dependencies (leaf nodes).
57
+ #
58
+ # These are self-contained units that don't reference anything else —
59
+ # typically utility classes, value objects, or standalone services.
60
+ #
61
+ # @return [Array<String>] Identifiers of dead-end units
62
+ def dead_ends
63
+ @dead_ends ||= begin
64
+ nodes = graph_nodes
65
+ nodes.each_with_object([]) do |(identifier, _meta), result|
66
+ dependencies = @graph.dependencies_of(identifier)
67
+ result << identifier if dependencies.empty?
68
+ end
69
+ end
70
+ end
71
+
72
+ # Units with the highest number of dependents (architectural hotspots).
73
+ #
74
+ # A high dependent count means many other units reference this one. Changes
75
+ # to hub nodes have the widest blast radius.
76
+ #
77
+ # @param limit [Integer] Maximum number of hubs to return
78
+ # @return [Array<Hash>] Sorted by dependent_count descending.
79
+ # Each hash contains :identifier, :type, :dependent_count, :dependents
80
+ def hubs(limit: 20)
81
+ nodes = graph_nodes
82
+
83
+ identifiers_with_dependents = nodes.map do |identifier, meta|
84
+ dependents = @graph.dependents_of(identifier)
85
+ {
86
+ identifier: identifier,
87
+ type: meta[:type],
88
+ dependent_count: dependents.size,
89
+ dependents: dependents
90
+ }
91
+ end
92
+ identifiers_with_dependents.sort_by { |h| -h[:dependent_count] }.first(limit)
93
+ end
94
+
95
+ # Detect circular dependency chains in the graph.
96
+ #
97
+ # Uses iterative DFS with a three-color marking scheme (white/gray/black).
98
+ # When a gray (in-progress) node is revisited, a cycle has been found.
99
+ # The cycle path is extracted from the recursion stack.
100
+ #
101
+ # @return [Array<Array<String>>] Each element is a cycle represented as
102
+ # an ordered array of identifiers, ending with the repeated node.
103
+ # For example: ["A", "B", "C", "A"]
104
+ def cycles
105
+ @cycles ||= detect_cycles
106
+ end
107
+
108
+ # Units that bridge different types in the graph.
109
+ #
110
+ # Computes a simplified betweenness centrality metric — for each unit, we
111
+ # estimate how many shortest paths between sampled node pairs pass through
112
+ # it. High-scoring nodes are architectural bottlenecks whose failure or
113
+ # change would disrupt many cross-type communication paths.
114
+ #
115
+ # For performance, samples a subset of node pairs rather than computing
116
+ # all-pairs shortest paths.
117
+ #
118
+ # @param limit [Integer] Maximum number of bridges to return
119
+ # @param sample_size [Integer] Number of node pairs to sample for estimation
120
+ # @return [Array<Hash>] Sorted by score descending.
121
+ # Each hash contains :identifier, :type, :score
122
+ def bridges(limit: 20, sample_size: 200)
123
+ nodes = graph_nodes
124
+ return [] if nodes.size < 3
125
+
126
+ node_ids = nodes.keys
127
+ scores = Hash.new(0)
128
+
129
+ # Sample random pairs of nodes for shortest-path computation.
130
+ # Use a deterministic seed so results are reproducible for the same graph.
131
+ rng = Random.new(node_ids.size)
132
+ pairs = generate_sample_pairs(node_ids, sample_size, rng)
133
+
134
+ pairs.each do |source, target|
135
+ path = bfs_shortest_path(source, target)
136
+ next unless path && path.size > 2
137
+
138
+ # Credit intermediate nodes (exclude source and target)
139
+ path[1..-2].each do |intermediate|
140
+ scores[intermediate] += 1
141
+ end
142
+ end
143
+
144
+ scores
145
+ .sort_by { |_id, score| -score }
146
+ .first(limit)
147
+ .map do |identifier, score|
148
+ meta = nodes[identifier] || {}
149
+ {
150
+ identifier: identifier,
151
+ type: meta[:type],
152
+ score: score
153
+ }
154
+ end
155
+ end
156
+
157
+ # Full analysis report combining all structural metrics.
158
+ #
159
+ # @return [Hash] Complete analysis with :orphans, :dead_ends, :hubs,
160
+ # :cycles, :bridges, and :stats
161
+ def analyze
162
+ computed_orphans = orphans
163
+ computed_dead_ends = dead_ends
164
+ computed_hubs = hubs
165
+ computed_cycles = cycles
166
+ computed_bridges = bridges(limit: 10)
167
+
168
+ {
169
+ orphans: computed_orphans,
170
+ dead_ends: computed_dead_ends,
171
+ hubs: computed_hubs,
172
+ cycles: computed_cycles,
173
+ bridges: computed_bridges,
174
+ stats: {
175
+ orphan_count: computed_orphans.size,
176
+ dead_end_count: computed_dead_ends.size,
177
+ hub_count: computed_hubs.size,
178
+ cycle_count: computed_cycles.size
179
+ }
180
+ }
181
+ end
182
+
183
+ private
184
+
185
+ # ──────────────────────────────────────────────────────────────────────
186
+ # Graph Accessors
187
+ # ──────────────────────────────────────────────────────────────────────
188
+
189
+ # Cache the full graph serialization once, avoiding repeated to_h calls.
190
+ #
191
+ # @return [Hash] Full graph data
192
+ def graph_data
193
+ @graph_data ||= @graph.to_h
194
+ end
195
+
196
+ # Access graph nodes from cached graph data.
197
+ #
198
+ # @return [Hash] identifier => { type:, file_path:, namespace: }
199
+ def graph_nodes
200
+ @graph_nodes ||= graph_data[:nodes]
201
+ end
202
+
203
+ # Access graph forward edges from cached graph data.
204
+ #
205
+ # @return [Hash] identifier => [dependency identifiers]
206
+ def graph_edges
207
+ @graph_edges ||= graph_data[:edges]
208
+ end
209
+
210
+ # ──────────────────────────────────────────────────────────────────────
211
+ # Cycle Detection (Three-Color DFS)
212
+ # ──────────────────────────────────────────────────────────────────────
213
+
214
+ # Detects all cycles using iterative DFS with white/gray/black coloring.
215
+ #
216
+ # - White (unvisited): node has not been seen
217
+ # - Gray (in-progress): node is on the current DFS stack
218
+ # - Black (complete): node and all its descendants are fully explored
219
+ #
220
+ # When we encounter a gray node, we've found a cycle. We extract it
221
+ # from the path stack.
222
+ #
223
+ # @return [Array<Array<String>>] Detected cycles
224
+ def detect_cycles
225
+ nodes = graph_nodes
226
+ return [] if nodes.empty?
227
+
228
+ white = 0
229
+ gray = 1
230
+ black = 2
231
+
232
+ color = Hash.new(white)
233
+ parent = {}
234
+ found_cycles = []
235
+ seen_cycle_signatures = Set.new
236
+
237
+ nodes.each_key do |start_node|
238
+ next unless color[start_node] == white
239
+
240
+ # Iterative DFS using an explicit stack.
241
+ # Each entry is [node, :enter] or [node, :exit].
242
+ stack = [[start_node, :enter]]
243
+
244
+ # Track the current DFS path for cycle extraction.
245
+ path = []
246
+
247
+ while stack.any?
248
+ node, action = stack.pop
249
+
250
+ if action == :exit
251
+ color[node] = black
252
+ path.pop
253
+ next
254
+ end
255
+
256
+ # :enter action
257
+ next unless color[node] == white
258
+
259
+ color[node] = gray
260
+ path.push(node)
261
+ stack.push([node, :exit])
262
+
263
+ neighbors = @graph.dependencies_of(node)
264
+ neighbors.each do |neighbor|
265
+ case color[neighbor]
266
+ when white
267
+ parent[neighbor] = node
268
+ stack.push([neighbor, :enter])
269
+ when gray
270
+ # Found a cycle — extract it from the path
271
+ cycle = extract_cycle_from_path(path, neighbor)
272
+ if cycle
273
+ sig = normalize_cycle_signature(cycle)
274
+ unless seen_cycle_signatures.include?(sig)
275
+ seen_cycle_signatures.add(sig)
276
+ found_cycles << cycle
277
+ end
278
+ end
279
+ end
280
+ # black nodes are fully explored, skip them
281
+ end
282
+ end
283
+ end
284
+
285
+ found_cycles
286
+ end
287
+
288
+ # Extracts a cycle from the current DFS path when a back-edge to
289
+ # +cycle_start+ is found.
290
+ #
291
+ # @param path [Array<String>] Current DFS path
292
+ # @param cycle_start [String] The node that closes the cycle
293
+ # @return [Array<String>, nil] The cycle path ending with cycle_start repeated,
294
+ # or nil if cycle_start is not in the path
295
+ def extract_cycle_from_path(path, cycle_start)
296
+ start_index = path.index(cycle_start)
297
+ return nil unless start_index
298
+
299
+ path[start_index..] + [cycle_start]
300
+ end
301
+
302
+ # Normalize a cycle so that duplicate rotations are treated as the same cycle.
303
+ # For example, [A, B, C, A] and [B, C, A, B] are the same cycle.
304
+ #
305
+ # @param cycle [Array<String>] Cycle path with repeated last element
306
+ # @return [String] Canonical string representation
307
+ def normalize_cycle_signature(cycle)
308
+ # Remove the trailing repeated element to get the raw loop
309
+ loop_nodes = cycle[0..-2]
310
+ return loop_nodes.join('->') if loop_nodes.empty?
311
+
312
+ # Rotate so the lexicographically smallest element is first
313
+ min_index = loop_nodes.each_with_index.min_by { |node, _i| node }.last
314
+ rotated = loop_nodes.rotate(min_index)
315
+ rotated.join('->')
316
+ end
317
+
318
+ # ──────────────────────────────────────────────────────────────────────
319
+ # Bridge Detection (Sampled Betweenness Centrality)
320
+ # ──────────────────────────────────────────────────────────────────────
321
+
322
+ # Generate random pairs of distinct nodes for betweenness sampling.
323
+ #
324
+ # @param node_ids [Array<String>] All node identifiers
325
+ # @param sample_size [Integer] Number of pairs to generate
326
+ # @param rng [Random] Random number generator for reproducibility
327
+ # @return [Array<Array<String>>] Pairs of [source, target]
328
+ def generate_sample_pairs(node_ids, sample_size, rng)
329
+ max_possible = node_ids.size * (node_ids.size - 1)
330
+ effective_sample = [sample_size, max_possible].min
331
+
332
+ pairs = Set.new
333
+ attempts = 0
334
+ max_attempts = effective_sample * 3
335
+
336
+ while pairs.size < effective_sample && attempts < max_attempts
337
+ a = node_ids[rng.rand(node_ids.size)]
338
+ b = node_ids[rng.rand(node_ids.size)]
339
+ pairs.add([a, b]) unless a == b
340
+ attempts += 1
341
+ end
342
+
343
+ pairs.to_a
344
+ end
345
+
346
+ # BFS shortest path between two nodes, following forward edges.
347
+ #
348
+ # @param source [String] Starting node identifier
349
+ # @param target [String] Target node identifier
350
+ # @return [Array<String>, nil] Shortest path or nil if unreachable
351
+ def bfs_shortest_path(source, target)
352
+ return [source] if source == target
353
+
354
+ visited = Set.new([source])
355
+ queue = [[source, [source]]]
356
+
357
+ while queue.any?
358
+ current, path = queue.shift
359
+
360
+ @graph.dependencies_of(current).each do |neighbor|
361
+ next if visited.include?(neighbor)
362
+
363
+ new_path = path + [neighbor]
364
+ return new_path if neighbor == target
365
+
366
+ visited.add(neighbor)
367
+ queue.push([neighbor, new_path])
368
+ end
369
+ end
370
+
371
+ nil
372
+ end
373
+ end
374
+ end