codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
# GraphAnalyzer computes structural properties of the dependency graph.
|
|
7
|
+
#
|
|
8
|
+
# Given a {DependencyGraph}, it identifies architectural patterns like orphaned
|
|
9
|
+
# units, circular dependencies, hub nodes, and bridge nodes. These metrics help
|
|
10
|
+
# surface dead code, architectural bottlenecks, and high-risk change targets.
|
|
11
|
+
#
|
|
12
|
+
# Inspired by FlowMapper's Comparator pattern — takes a graph, produces a
|
|
13
|
+
# structural report without mutating anything.
|
|
14
|
+
#
|
|
15
|
+
# @example Basic usage
|
|
16
|
+
# graph = CodebaseIndex::DependencyGraph.new
|
|
17
|
+
# # ... register units ...
|
|
18
|
+
# analyzer = CodebaseIndex::GraphAnalyzer.new(graph)
|
|
19
|
+
# report = analyzer.analyze
|
|
20
|
+
# report[:cycles] # => [["A", "B", "A"], ...]
|
|
21
|
+
# report[:hubs] # => [{ identifier: "User", type: :model, ... }, ...]
|
|
22
|
+
#
|
|
23
|
+
class GraphAnalyzer
|
|
24
|
+
# Types that are naturally root nodes and should not be flagged as orphans.
|
|
25
|
+
# Framework and gem sources are consumed but never referenced by application code
|
|
26
|
+
# in the dependency graph's reverse index.
|
|
27
|
+
EXCLUDED_ORPHAN_TYPES = %i[rails_source gem_source].freeze
|
|
28
|
+
|
|
29
|
+
# @param dependency_graph [DependencyGraph] The graph to analyze
|
|
30
|
+
def initialize(dependency_graph)
|
|
31
|
+
@graph = dependency_graph
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
35
|
+
# Public Analysis Methods
|
|
36
|
+
# ══════════════════════════════════════════════════════════════════════
|
|
37
|
+
|
|
38
|
+
# Units with no dependents (nothing references them).
|
|
39
|
+
#
|
|
40
|
+
# These are potential dead code or entry points. Framework and gem sources
|
|
41
|
+
# are excluded since they are naturally unreferenced in the reverse index.
|
|
42
|
+
#
|
|
43
|
+
# @return [Array<String>] Identifiers of orphaned units
|
|
44
|
+
def orphans
|
|
45
|
+
@orphans ||= begin
|
|
46
|
+
nodes = graph_nodes
|
|
47
|
+
nodes.each_with_object([]) do |(identifier, meta), result|
|
|
48
|
+
next if EXCLUDED_ORPHAN_TYPES.include?(meta[:type])
|
|
49
|
+
|
|
50
|
+
dependents = @graph.dependents_of(identifier)
|
|
51
|
+
result << identifier if dependents.empty?
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Units with no dependencies (leaf nodes).
|
|
57
|
+
#
|
|
58
|
+
# These are self-contained units that don't reference anything else —
|
|
59
|
+
# typically utility classes, value objects, or standalone services.
|
|
60
|
+
#
|
|
61
|
+
# @return [Array<String>] Identifiers of dead-end units
|
|
62
|
+
def dead_ends
|
|
63
|
+
@dead_ends ||= begin
|
|
64
|
+
nodes = graph_nodes
|
|
65
|
+
nodes.each_with_object([]) do |(identifier, _meta), result|
|
|
66
|
+
dependencies = @graph.dependencies_of(identifier)
|
|
67
|
+
result << identifier if dependencies.empty?
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Units with the highest number of dependents (architectural hotspots).
|
|
73
|
+
#
|
|
74
|
+
# A high dependent count means many other units reference this one. Changes
|
|
75
|
+
# to hub nodes have the widest blast radius.
|
|
76
|
+
#
|
|
77
|
+
# @param limit [Integer] Maximum number of hubs to return
|
|
78
|
+
# @return [Array<Hash>] Sorted by dependent_count descending.
|
|
79
|
+
# Each hash contains :identifier, :type, :dependent_count, :dependents
|
|
80
|
+
def hubs(limit: 20)
|
|
81
|
+
nodes = graph_nodes
|
|
82
|
+
|
|
83
|
+
identifiers_with_dependents = nodes.map do |identifier, meta|
|
|
84
|
+
dependents = @graph.dependents_of(identifier)
|
|
85
|
+
{
|
|
86
|
+
identifier: identifier,
|
|
87
|
+
type: meta[:type],
|
|
88
|
+
dependent_count: dependents.size,
|
|
89
|
+
dependents: dependents
|
|
90
|
+
}
|
|
91
|
+
end
|
|
92
|
+
identifiers_with_dependents.sort_by { |h| -h[:dependent_count] }.first(limit)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Detect circular dependency chains in the graph.
|
|
96
|
+
#
|
|
97
|
+
# Uses iterative DFS with a three-color marking scheme (white/gray/black).
|
|
98
|
+
# When a gray (in-progress) node is revisited, a cycle has been found.
|
|
99
|
+
# The cycle path is extracted from the recursion stack.
|
|
100
|
+
#
|
|
101
|
+
# @return [Array<Array<String>>] Each element is a cycle represented as
|
|
102
|
+
# an ordered array of identifiers, ending with the repeated node.
|
|
103
|
+
# For example: ["A", "B", "C", "A"]
|
|
104
|
+
def cycles
|
|
105
|
+
@cycles ||= detect_cycles
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Units that bridge different types in the graph.
|
|
109
|
+
#
|
|
110
|
+
# Computes a simplified betweenness centrality metric — for each unit, we
|
|
111
|
+
# estimate how many shortest paths between sampled node pairs pass through
|
|
112
|
+
# it. High-scoring nodes are architectural bottlenecks whose failure or
|
|
113
|
+
# change would disrupt many cross-type communication paths.
|
|
114
|
+
#
|
|
115
|
+
# For performance, samples a subset of node pairs rather than computing
|
|
116
|
+
# all-pairs shortest paths.
|
|
117
|
+
#
|
|
118
|
+
# @param limit [Integer] Maximum number of bridges to return
|
|
119
|
+
# @param sample_size [Integer] Number of node pairs to sample for estimation
|
|
120
|
+
# @return [Array<Hash>] Sorted by score descending.
|
|
121
|
+
# Each hash contains :identifier, :type, :score
|
|
122
|
+
def bridges(limit: 20, sample_size: 200)
|
|
123
|
+
nodes = graph_nodes
|
|
124
|
+
return [] if nodes.size < 3
|
|
125
|
+
|
|
126
|
+
node_ids = nodes.keys
|
|
127
|
+
scores = Hash.new(0)
|
|
128
|
+
|
|
129
|
+
# Sample random pairs of nodes for shortest-path computation.
|
|
130
|
+
# Use a deterministic seed so results are reproducible for the same graph.
|
|
131
|
+
rng = Random.new(node_ids.size)
|
|
132
|
+
pairs = generate_sample_pairs(node_ids, sample_size, rng)
|
|
133
|
+
|
|
134
|
+
pairs.each do |source, target|
|
|
135
|
+
path = bfs_shortest_path(source, target)
|
|
136
|
+
next unless path && path.size > 2
|
|
137
|
+
|
|
138
|
+
# Credit intermediate nodes (exclude source and target)
|
|
139
|
+
path[1..-2].each do |intermediate|
|
|
140
|
+
scores[intermediate] += 1
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
scores
|
|
145
|
+
.sort_by { |_id, score| -score }
|
|
146
|
+
.first(limit)
|
|
147
|
+
.map do |identifier, score|
|
|
148
|
+
meta = nodes[identifier] || {}
|
|
149
|
+
{
|
|
150
|
+
identifier: identifier,
|
|
151
|
+
type: meta[:type],
|
|
152
|
+
score: score
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Full analysis report combining all structural metrics.
|
|
158
|
+
#
|
|
159
|
+
# @return [Hash] Complete analysis with :orphans, :dead_ends, :hubs,
|
|
160
|
+
# :cycles, :bridges, and :stats
|
|
161
|
+
def analyze
|
|
162
|
+
computed_orphans = orphans
|
|
163
|
+
computed_dead_ends = dead_ends
|
|
164
|
+
computed_hubs = hubs
|
|
165
|
+
computed_cycles = cycles
|
|
166
|
+
computed_bridges = bridges(limit: 10)
|
|
167
|
+
|
|
168
|
+
{
|
|
169
|
+
orphans: computed_orphans,
|
|
170
|
+
dead_ends: computed_dead_ends,
|
|
171
|
+
hubs: computed_hubs,
|
|
172
|
+
cycles: computed_cycles,
|
|
173
|
+
bridges: computed_bridges,
|
|
174
|
+
stats: {
|
|
175
|
+
orphan_count: computed_orphans.size,
|
|
176
|
+
dead_end_count: computed_dead_ends.size,
|
|
177
|
+
hub_count: computed_hubs.size,
|
|
178
|
+
cycle_count: computed_cycles.size
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
private
|
|
184
|
+
|
|
185
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
186
|
+
# Graph Accessors
|
|
187
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
# Cache the full graph serialization once, avoiding repeated to_h calls.
|
|
190
|
+
#
|
|
191
|
+
# @return [Hash] Full graph data
|
|
192
|
+
def graph_data
|
|
193
|
+
@graph_data ||= @graph.to_h
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Access graph nodes from cached graph data.
|
|
197
|
+
#
|
|
198
|
+
# @return [Hash] identifier => { type:, file_path:, namespace: }
|
|
199
|
+
def graph_nodes
|
|
200
|
+
@graph_nodes ||= graph_data[:nodes]
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Access graph forward edges from cached graph data.
|
|
204
|
+
#
|
|
205
|
+
# @return [Hash] identifier => [dependency identifiers]
|
|
206
|
+
def graph_edges
|
|
207
|
+
@graph_edges ||= graph_data[:edges]
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
211
|
+
# Cycle Detection (Three-Color DFS)
|
|
212
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
213
|
+
|
|
214
|
+
# Detects all cycles using iterative DFS with white/gray/black coloring.
|
|
215
|
+
#
|
|
216
|
+
# - White (unvisited): node has not been seen
|
|
217
|
+
# - Gray (in-progress): node is on the current DFS stack
|
|
218
|
+
# - Black (complete): node and all its descendants are fully explored
|
|
219
|
+
#
|
|
220
|
+
# When we encounter a gray node, we've found a cycle. We extract it
|
|
221
|
+
# from the path stack.
|
|
222
|
+
#
|
|
223
|
+
# @return [Array<Array<String>>] Detected cycles
|
|
224
|
+
def detect_cycles
|
|
225
|
+
nodes = graph_nodes
|
|
226
|
+
return [] if nodes.empty?
|
|
227
|
+
|
|
228
|
+
white = 0
|
|
229
|
+
gray = 1
|
|
230
|
+
black = 2
|
|
231
|
+
|
|
232
|
+
color = Hash.new(white)
|
|
233
|
+
parent = {}
|
|
234
|
+
found_cycles = []
|
|
235
|
+
seen_cycle_signatures = Set.new
|
|
236
|
+
|
|
237
|
+
nodes.each_key do |start_node|
|
|
238
|
+
next unless color[start_node] == white
|
|
239
|
+
|
|
240
|
+
# Iterative DFS using an explicit stack.
|
|
241
|
+
# Each entry is [node, :enter] or [node, :exit].
|
|
242
|
+
stack = [[start_node, :enter]]
|
|
243
|
+
|
|
244
|
+
# Track the current DFS path for cycle extraction.
|
|
245
|
+
path = []
|
|
246
|
+
|
|
247
|
+
while stack.any?
|
|
248
|
+
node, action = stack.pop
|
|
249
|
+
|
|
250
|
+
if action == :exit
|
|
251
|
+
color[node] = black
|
|
252
|
+
path.pop
|
|
253
|
+
next
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# :enter action
|
|
257
|
+
next unless color[node] == white
|
|
258
|
+
|
|
259
|
+
color[node] = gray
|
|
260
|
+
path.push(node)
|
|
261
|
+
stack.push([node, :exit])
|
|
262
|
+
|
|
263
|
+
neighbors = @graph.dependencies_of(node)
|
|
264
|
+
neighbors.each do |neighbor|
|
|
265
|
+
case color[neighbor]
|
|
266
|
+
when white
|
|
267
|
+
parent[neighbor] = node
|
|
268
|
+
stack.push([neighbor, :enter])
|
|
269
|
+
when gray
|
|
270
|
+
# Found a cycle — extract it from the path
|
|
271
|
+
cycle = extract_cycle_from_path(path, neighbor)
|
|
272
|
+
if cycle
|
|
273
|
+
sig = normalize_cycle_signature(cycle)
|
|
274
|
+
unless seen_cycle_signatures.include?(sig)
|
|
275
|
+
seen_cycle_signatures.add(sig)
|
|
276
|
+
found_cycles << cycle
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
# black nodes are fully explored, skip them
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
found_cycles
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# Extracts a cycle from the current DFS path when a back-edge to
|
|
289
|
+
# +cycle_start+ is found.
|
|
290
|
+
#
|
|
291
|
+
# @param path [Array<String>] Current DFS path
|
|
292
|
+
# @param cycle_start [String] The node that closes the cycle
|
|
293
|
+
# @return [Array<String>, nil] The cycle path ending with cycle_start repeated,
|
|
294
|
+
# or nil if cycle_start is not in the path
|
|
295
|
+
def extract_cycle_from_path(path, cycle_start)
|
|
296
|
+
start_index = path.index(cycle_start)
|
|
297
|
+
return nil unless start_index
|
|
298
|
+
|
|
299
|
+
path[start_index..] + [cycle_start]
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Normalize a cycle so that duplicate rotations are treated as the same cycle.
|
|
303
|
+
# For example, [A, B, C, A] and [B, C, A, B] are the same cycle.
|
|
304
|
+
#
|
|
305
|
+
# @param cycle [Array<String>] Cycle path with repeated last element
|
|
306
|
+
# @return [String] Canonical string representation
|
|
307
|
+
def normalize_cycle_signature(cycle)
|
|
308
|
+
# Remove the trailing repeated element to get the raw loop
|
|
309
|
+
loop_nodes = cycle[0..-2]
|
|
310
|
+
return loop_nodes.join('->') if loop_nodes.empty?
|
|
311
|
+
|
|
312
|
+
# Rotate so the lexicographically smallest element is first
|
|
313
|
+
min_index = loop_nodes.each_with_index.min_by { |node, _i| node }.last
|
|
314
|
+
rotated = loop_nodes.rotate(min_index)
|
|
315
|
+
rotated.join('->')
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
319
|
+
# Bridge Detection (Sampled Betweenness Centrality)
|
|
320
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
321
|
+
|
|
322
|
+
# Generate random pairs of distinct nodes for betweenness sampling.
|
|
323
|
+
#
|
|
324
|
+
# @param node_ids [Array<String>] All node identifiers
|
|
325
|
+
# @param sample_size [Integer] Number of pairs to generate
|
|
326
|
+
# @param rng [Random] Random number generator for reproducibility
|
|
327
|
+
# @return [Array<Array<String>>] Pairs of [source, target]
|
|
328
|
+
def generate_sample_pairs(node_ids, sample_size, rng)
|
|
329
|
+
max_possible = node_ids.size * (node_ids.size - 1)
|
|
330
|
+
effective_sample = [sample_size, max_possible].min
|
|
331
|
+
|
|
332
|
+
pairs = Set.new
|
|
333
|
+
attempts = 0
|
|
334
|
+
max_attempts = effective_sample * 3
|
|
335
|
+
|
|
336
|
+
while pairs.size < effective_sample && attempts < max_attempts
|
|
337
|
+
a = node_ids[rng.rand(node_ids.size)]
|
|
338
|
+
b = node_ids[rng.rand(node_ids.size)]
|
|
339
|
+
pairs.add([a, b]) unless a == b
|
|
340
|
+
attempts += 1
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
pairs.to_a
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
# BFS shortest path between two nodes, following forward edges.
|
|
347
|
+
#
|
|
348
|
+
# @param source [String] Starting node identifier
|
|
349
|
+
# @param target [String] Target node identifier
|
|
350
|
+
# @return [Array<String>, nil] Shortest path or nil if unreachable
|
|
351
|
+
def bfs_shortest_path(source, target)
|
|
352
|
+
return [source] if source == target
|
|
353
|
+
|
|
354
|
+
visited = Set.new([source])
|
|
355
|
+
queue = [[source, [source]]]
|
|
356
|
+
|
|
357
|
+
while queue.any?
|
|
358
|
+
current, path = queue.shift
|
|
359
|
+
|
|
360
|
+
@graph.dependencies_of(current).each do |neighbor|
|
|
361
|
+
next if visited.include?(neighbor)
|
|
362
|
+
|
|
363
|
+
new_path = path + [neighbor]
|
|
364
|
+
return new_path if neighbor == target
|
|
365
|
+
|
|
366
|
+
visited.add(neighbor)
|
|
367
|
+
queue.push([neighbor, new_path])
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
nil
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
end
|