woods 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +169 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +15 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +3 -4
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +737 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +1 -1
  102. data/lib/woods/unblocked/document_builder.rb +35 -10
  103. data/lib/woods/unblocked/exporter.rb +1 -1
  104. data/lib/woods/util/host_guard.rb +61 -0
  105. data/lib/woods/version.rb +1 -1
  106. data/lib/woods.rb +126 -6
  107. metadata +69 -4
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../token_utils'
4
+
3
5
  module Woods
4
6
  module Embedding
5
7
  # Prepares ExtractedUnit data for embedding by building context-prefixed text.
@@ -19,12 +21,25 @@ module Woods
19
21
  # chunks = preparer.prepare_chunks(unit)
20
22
  class TextPreparer
21
23
  DEFAULT_MAX_TOKENS = 8192
24
+ # Aliased to the single source of truth in {Woods::TokenUtils} so the
25
+ # OpenAI 4.0 / Ollama 1.5 ratios stay consistent across TextPreparer,
26
+ # ContextAssembler, Builder, and cost_model/. See
27
+ # docs/TOKEN_BENCHMARK.md and lib/woods/token_utils.rb.
28
+ DEFAULT_CHARS_PER_TOKEN = TokenUtils::DEFAULT_CHARS_PER_TOKEN
22
29
 
23
30
  # @param max_tokens [Integer] maximum token budget for prepared text
24
- def initialize(max_tokens: DEFAULT_MAX_TOKENS)
31
+ # @param chars_per_token [Float] tokenizer-calibrated char/token ratio
32
+ def initialize(max_tokens: DEFAULT_MAX_TOKENS, chars_per_token: DEFAULT_CHARS_PER_TOKEN)
25
33
  @max_tokens = max_tokens
34
+ @chars_per_token = chars_per_token
26
35
  end
27
36
 
37
+ # @return [Float] configured chars-per-token ratio
38
+ attr_reader :chars_per_token
39
+
40
+ # @return [Integer] configured token budget
41
+ attr_reader :max_tokens
42
+
28
43
  # Prepare text for embedding from an ExtractedUnit.
29
44
  #
30
45
  # Builds a context prefix and appends the unit's source code (or first
@@ -98,13 +113,18 @@ module Woods
98
113
 
99
114
  # Truncate text to fit within the token budget.
100
115
  #
116
+ # Uses the configured `chars_per_token` ratio to estimate both the
117
+ # token count and the safe character cap. Truncation is a last
118
+ # resort — by the time text reaches here the chunker should have
119
+ # already split oversize units into pieces that fit.
120
+ #
101
121
  # @param text [String] the text to truncate
102
122
  # @return [String] text within token limits
103
123
  def enforce_token_limit(text)
104
- estimated = (text.length / 4.0).ceil
124
+ estimated = (text.length / @chars_per_token).ceil
105
125
  return text if estimated <= @max_tokens
106
126
 
107
- max_chars = (@max_tokens * 4.0).floor
127
+ max_chars = (@max_tokens * @chars_per_token).floor
108
128
  text[0...max_chars]
109
129
  end
110
130
  end
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ module Woods
6
+ module Embedding
7
+ # Exact or estimated token counts for embedding inputs.
8
+ #
9
+ # When the optional `tokenizers` gem (ankane) is installed, loads the
10
+ # `bert-base-uncased` WordPiece tokenizer that nomic-embed-text is
11
+ # built on and returns exact token counts. Otherwise falls back to a
12
+ # conservative chars/token ratio and warns once.
13
+ #
14
+ # Exact counting is strictly preferred for the Ollama path — Ollama
15
+ # v0.13.5+ stopped honouring the `truncate: true` flag on
16
+ # `/api/embed` (see ollama/ollama#14186), so chunks that exceed
17
+ # `num_ctx` return a 400 instead of being truncated. Client-side
18
+ # sizing is the only reliable option until the regression is fixed
19
+ # upstream, and chars/token ratios vary too widely across Rails
20
+ # internals to cover every case with a fixed number.
21
+ #
22
+ # @example
23
+ # counter = Woods::Embedding::TokenCounter.new
24
+ # counter.count("ActionController::Metal::ConditionalGet") # => 13
25
+ class TokenCounter
26
+ # HuggingFace tokenizer id shared by every nomic-embed-text variant.
27
+ BERT_MODEL = 'bert-base-uncased'
28
+
29
+ # Conservative floor for when the tokenizer gem isn't installed.
30
+ # Lower than any ratio we've observed failing in the testbed
31
+ # against dense Rails source. Still approximate — install
32
+ # `tokenizers` for exact counts.
33
+ CONSERVATIVE_CHARS_PER_TOKEN = 1.2
34
+
35
+ # @param chars_per_token [Float] fallback ratio when the tokenizer
36
+ # is unavailable
37
+ # @param tokenizer_id [String] HuggingFace model id passed to
38
+ # `Tokenizers.from_pretrained`
39
+ def initialize(chars_per_token: CONSERVATIVE_CHARS_PER_TOKEN, tokenizer_id: BERT_MODEL)
40
+ @chars_per_token = chars_per_token
41
+ @tokenizer_id = tokenizer_id
42
+ @load_attempted = false
43
+ @load_mutex = Mutex.new
44
+ end
45
+
46
+ # @return [Float] fallback chars-per-token ratio
47
+ attr_reader :chars_per_token
48
+
49
+ # Exact token count when the tokenizer is loaded, chars/token
50
+ # estimate otherwise.
51
+ #
52
+ # @param text [String, nil]
53
+ # @return [Integer]
54
+ def count(text)
55
+ return 0 if text.nil? || text.empty?
56
+
57
+ tok = tokenizer
58
+ tok ? tok.encode(text).ids.length : estimate(text)
59
+ end
60
+
61
+ # True when the real tokenizer is loaded and in use.
62
+ #
63
+ # @return [Boolean]
64
+ def exact?
65
+ !tokenizer.nil?
66
+ end
67
+
68
+ private
69
+
70
+ def estimate(text)
71
+ (text.length / @chars_per_token).ceil
72
+ end
73
+
74
+ # Lazy-load the tokenizer under a mutex so concurrent first-calls
75
+ # don't each trigger a separate download. After the first attempt
76
+ # (successful or not) we memoize the result and skip the load path.
77
+ def tokenizer
78
+ @load_mutex.synchronize do
79
+ return @tokenizer if @load_attempted
80
+
81
+ @load_attempted = true
82
+ @tokenizer = try_load
83
+ end
84
+ end
85
+
86
+ def try_load
87
+ require 'tokenizers'
88
+ Tokenizers.from_pretrained(@tokenizer_id)
89
+ rescue LoadError
90
+ warn_once(
91
+ 'Exact token counting disabled: `tokenizers` gem not installed. ' \
92
+ "Falling back to #{@chars_per_token} chars/token estimation. " \
93
+ "Add `gem 'tokenizers', '~> 0.5'` to your Gemfile for exact sizing on Ollama."
94
+ )
95
+ nil
96
+ rescue StandardError => e
97
+ warn_once(
98
+ "Could not load tokenizer #{@tokenizer_id.inspect} " \
99
+ "(#{e.class}: #{e.message}). Falling back to chars/token estimate."
100
+ )
101
+ nil
102
+ end
103
+
104
+ # Per-process dedup so multiple TokenCounter instances (one per
105
+ # retriever build, plus one per chunker, plus tests) don't each
106
+ # spam the same fallback warning. The mutex keeps the dedup set
107
+ # consistent under the same concurrent-first-call pattern that
108
+ # the per-instance load mutex protects against.
109
+ @warned_messages = Set.new
110
+ @warned_mutex = Mutex.new
111
+
112
+ class << self
113
+ attr_reader :warned_messages, :warned_mutex
114
+
115
+ # Reset the per-process warning dedup. For tests only — production
116
+ # callers should never need to clear it.
117
+ def reset_warned!
118
+ @warned_mutex.synchronize { @warned_messages.clear }
119
+ end
120
+ end
121
+
122
+ def warn_once(message)
123
+ full = "[woods] #{message}"
124
+ self.class.warned_mutex.synchronize do
125
+ return if self.class.warned_messages.include?(full)
126
+
127
+ self.class.warned_messages << full
128
+ end
129
+ Kernel.warn(full)
130
+ end
131
+ end
132
+ end
133
+ end
@@ -19,8 +19,19 @@ module Woods
19
19
  VALID_STRATEGIES = %i[grep random file_level].freeze
20
20
 
21
21
  # @param metadata_store [Object] Store that responds to #all_identifiers and #find_by_type
22
- def initialize(metadata_store:)
22
+ # @param seed [Integer, nil] Optional RNG seed for the `:random`
23
+ # baseline. Seeding makes evaluation runs reproducible — essential
24
+ # for comparing the real retriever against the baseline on the same
25
+ # query set across two invocations. `nil` (default) keeps the
26
+ # historical behavior of drawing from system entropy.
27
+ def initialize(metadata_store:, seed: nil)
23
28
  @metadata_store = metadata_store
29
+ @random = seed.nil? ? Random.new : Random.new(seed)
30
+ # Ruby's `Random` instance isn't documented thread-safe; multiple
31
+ # evaluator threads sharing one runner would otherwise interleave
32
+ # `sample` calls and drift from the seeded sequence. A plain Mutex
33
+ # around the read path is enough — `sample` is the only caller.
34
+ @random_mutex = Mutex.new
24
35
  end
25
36
 
26
37
  # Run a baseline strategy for a query.
@@ -64,11 +75,18 @@ module Woods
64
75
 
65
76
  # Random strategy: random selection from all available units.
66
77
  #
78
+ # Uses the instance's injected {Random} generator so results are
79
+ # reproducible when {#initialize} was called with a seed. Guarded by
80
+ # a mutex so concurrent evaluator threads don't interleave calls on
81
+ # the shared `Random` and drift from the seeded sequence.
82
+ #
67
83
  # @param _query [String] Query string (unused)
68
84
  # @param limit [Integer] Max results
69
85
  # @return [Array<String>]
70
86
  def run_random(_query, limit)
71
- @metadata_store.all_identifiers.sample(limit)
87
+ @random_mutex.synchronize do
88
+ @metadata_store.all_identifiers.sample(limit, random: @random)
89
+ end
72
90
  end
73
91
 
74
92
  # File-level strategy: matches identifiers that look like file paths
@@ -22,7 +22,10 @@ module Woods
22
22
  top_k = retrieved.first(cutoff)
23
23
  relevant_set = relevant.to_set
24
24
  hits = top_k.count { |id| relevant_set.include?(id) }
25
- hits.to_f / cutoff
25
+ # Divide by actual slice size, not the cutoff — when fewer than
26
+ # `cutoff` items are retrieved, dividing by `cutoff` understates
27
+ # precision (returns 0.2 for 1-of-1 at cutoff=5 instead of 1.0).
28
+ hits.to_f / top_k.size
26
29
  end
27
30
 
28
31
  # Fraction of relevant items that were retrieved.
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'digest'
4
4
  require 'json'
5
+ require 'time' # Time#iso8601
5
6
 
6
7
  module Woods
7
8
  # ExtractedUnit represents a single meaningful unit of code from the codebase.
@@ -403,9 +403,15 @@ module Woods
403
403
  def extract_all_concurrent
404
404
  # Pre-compute ModelNameCache to avoid race on lazy memoization.
405
405
  # Multiple threads calling model_names concurrently could trigger
406
- # duplicate compute_model_names calls without this warm-up.
406
+ # duplicate compute_model_names calls without this warm-up. All four
407
+ # derived caches must be warmed — `short_name_map` and
408
+ # `short_names_regex` were added for the three-pass dependency
409
+ # scanner and are reached from every extractor that calls
410
+ # `scan_model_dependencies`.
407
411
  ModelNameCache.model_names
408
412
  ModelNameCache.model_names_regex
413
+ ModelNameCache.short_name_map if ModelNameCache.respond_to?(:short_name_map)
414
+ ModelNameCache.short_names_regex if ModelNameCache.respond_to?(:short_names_regex)
409
415
 
410
416
  results_mutex = Mutex.new
411
417
  threads = EXTRACTORS.map do |type, extractor_class|
@@ -4,6 +4,7 @@ require 'digest'
4
4
  require_relative 'ast_source_extraction'
5
5
  require_relative 'shared_utility_methods'
6
6
  require_relative 'shared_dependency_scanner'
7
+ require_relative 'route_helper_resolver'
7
8
 
8
9
  module Woods
9
10
  module Extractors
@@ -25,9 +26,11 @@ module Woods
25
26
  include AstSourceExtraction
26
27
  include SharedUtilityMethods
27
28
  include SharedDependencyScanner
29
+ include RouteHelperResolver
28
30
 
29
31
  def initialize
30
32
  @routes_map = build_routes_map
33
+ build_route_helper_map
31
34
  end
32
35
 
33
36
  # Extract all controllers in the application
@@ -312,6 +315,9 @@ module Woods
312
315
  source.scan(%r{render\s+["'](\w+/\w+)["']}).flatten.uniq.each do |template|
313
316
  deps << { type: :view, target: template, via: :render }
314
317
  end
318
+
319
+ # redirect_to with named route helpers
320
+ deps.concat(scan_navigation_dependencies(source, via_type: :redirect_to))
315
321
  end
316
322
 
317
323
  deps.uniq { |d| [d[:type], d[:target]] }
@@ -4,6 +4,7 @@ require 'digest'
4
4
  require_relative 'ast_source_extraction'
5
5
  require_relative 'shared_utility_methods'
6
6
  require_relative 'shared_dependency_scanner'
7
+ require_relative 'route_helper_resolver'
7
8
 
8
9
  module Woods
9
10
  module Extractors
@@ -23,9 +24,11 @@ module Woods
23
24
  include AstSourceExtraction
24
25
  include SharedUtilityMethods
25
26
  include SharedDependencyScanner
27
+ include RouteHelperResolver
26
28
 
27
29
  def initialize
28
30
  @mailer_base = defined?(ApplicationMailer) ? ApplicationMailer : ActionMailer::Base
31
+ build_route_helper_map
29
32
  end
30
33
 
31
34
  # Extract all mailers in the application
@@ -223,8 +226,19 @@ module Woods
223
226
  deps = []
224
227
  deps.concat(scan_model_dependencies(source))
225
228
  deps.concat(scan_service_dependencies(source))
226
-
227
- # URL helpers (indicates what resources emails link to)
229
+ # Navigation edges — resolve `_path`/`_url` helpers to real
230
+ # controllers via RouteHelperResolver (wired through the include +
231
+ # build_route_helper_map call in #initialize). This adds resolved
232
+ # {type: :controller, via: :link_to} edges on top of the raw
233
+ # helper scan below.
234
+ deps.concat(scan_navigation_dependencies(source))
235
+ deps.concat(scan_form_dependencies(source))
236
+
237
+ # Raw-helper fallback — emits {type: :route, target: 'confirmation'}
238
+ # for every `_path`/`_url` helper referenced, regardless of whether
239
+ # RouteHelperResolver could resolve it. Kept so mailers that link to
240
+ # engine-mounted routes (not in the main routes table) still produce
241
+ # a dependency edge.
228
242
  source.scan(/(\w+)_(?:url|path)/).flatten.uniq.each do |route|
229
243
  deps << { type: :route, target: route, via: :url_helper }
230
244
  end
@@ -530,7 +530,12 @@ module Woods
530
530
  conditions: format_callback_conditions(cb)
531
531
  }
532
532
  end
533
- rescue NoMethodError
533
+ rescue StandardError
534
+ # Widen beyond NoMethodError per CLAUDE.md — callback-chain
535
+ # introspection can raise a variety of errors across Rails
536
+ # versions (NameError, TypeError, LoadError for missing
537
+ # concerns), and silently swallowing only NoMethodError left
538
+ # the rest to crash extraction.
534
539
  []
535
540
  end.compact
536
541
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require_relative 'shared_utility_methods'
4
4
  require_relative 'shared_dependency_scanner'
5
+ require_relative 'route_helper_resolver'
5
6
 
6
7
  module Woods
7
8
  module Extractors
@@ -23,6 +24,7 @@ module Woods
23
24
  class PhlexExtractor
24
25
  include SharedUtilityMethods
25
26
  include SharedDependencyScanner
27
+ include RouteHelperResolver
26
28
 
27
29
  # Common Phlex base classes to look for
28
30
  PHLEX_BASES = %w[
@@ -33,6 +35,10 @@ module Woods
33
35
 
34
36
  def initialize
35
37
  @component_base = find_component_base
38
+ # Precompute the _path/_url → controller#action map once per
39
+ # extraction run so navigation edges resolve to real targets
40
+ # instead of the unresolved helper literal.
41
+ build_route_helper_map
36
42
  end
37
43
 
38
44
  # Extract all Phlex/ViewComponent components
@@ -240,10 +246,13 @@ module Woods
240
246
  deps << { type: :stimulus_controller, target: controller, via: :html_attribute }
241
247
  end
242
248
 
243
- # URL helpers
244
- source.scan(/(\w+)_(?:path|url)/).flatten.uniq.each do |route|
245
- deps << { type: :route, target: route, via: :url_helper }
246
- end
249
+ # Navigation edges — resolve _path / _url helpers to real controllers
250
+ # via RouteHelperResolver (wired through the include + build_route_helper_map
251
+ # call in #initialize). Replaces an earlier manual regex that emitted
252
+ # unresolved {type: :route, target: 'users'} edges; the graph now
253
+ # gets {type: :controller, target: 'UsersController', via: :link_to}.
254
+ deps.concat(scan_navigation_dependencies(source))
255
+ deps.concat(scan_form_dependencies(source))
247
256
 
248
257
  deps.uniq { |d| [d[:type], d[:target]] }
249
258
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../extracted_unit'
4
+
3
5
  module Woods
4
6
  module Extractors
5
7
  # RailsSourceExtractor indexes selected parts of the Rails framework
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Extractors
5
+ # Shared module for resolving named route helpers to controller#action targets.
6
+ #
7
+ # Builds an inverse lookup from `Rails.application.routes.named_routes`,
8
+ # mapping route helper names (e.g., "new_post") to their controller and action.
9
+ # Include this module and call {#build_route_helper_map} in your initializer.
10
+ #
11
+ # @example
12
+ # class MyExtractor
13
+ # include RouteHelperResolver
14
+ #
15
+ # def initialize
16
+ # build_route_helper_map
17
+ # end
18
+ #
19
+ # def find_target(source)
20
+ # resolve_route_helper("posts_path")
21
+ # #=> { controller: "PostsController", action: "index", path: "/posts", verb: "GET" }
22
+ # end
23
+ # end
24
+ #
25
+ module RouteHelperResolver
26
+ # Route helper prefixes that produce non-navigation dependencies.
27
+ # These generate asset URLs or are common false positives from
28
+ # non-route uses of _path/_url suffixes in Ruby code.
29
+ #
30
+ # NOTE: `root` is intentionally excluded — root_path is the most common
31
+ # Rails route helper, but it appears so frequently in non-navigation contexts
32
+ # (path construction, config, tests) that it generates excessive noise.
33
+ # The tradeoff: "what links to the home page?" won't appear in graph queries.
34
+ # Add new prefixes here when false positives are discovered in host apps.
35
+ IGNORED_HELPER_PREFIXES = %w[
36
+ asset
37
+ image
38
+ stylesheet
39
+ javascript
40
+ font
41
+ audio
42
+ video
43
+ turbo_stream
44
+ file
45
+ tmp
46
+ base
47
+ root
48
+ log
49
+ socket
50
+ download
51
+ ].freeze
52
+
53
+ # Build the route helper lookup map from Rails named routes.
54
+ # Call this once in your extractor's initialize method.
55
+ #
56
+ # Resilient to partial test doubles: any exception raised while
57
+ # traversing Rails routes (unstubbed `application` on a double,
58
+ # missing `named_routes`, etc.) is swallowed and leaves the map
59
+ # empty — extractors fall back to returning the helper name
60
+ # literal as the dependency target.
61
+ def build_route_helper_map
62
+ @route_helper_map = {}
63
+ return unless defined?(Rails)
64
+
65
+ routes = safe_rails_application_routes
66
+ return unless routes
67
+
68
+ routes.named_routes.each do |name, route|
69
+ controller = route.defaults[:controller]
70
+ action = route.defaults[:action]
71
+ next unless controller && action
72
+
73
+ @route_helper_map[name.to_s] = {
74
+ controller: "#{controller.camelize}Controller",
75
+ action: action,
76
+ path: route.path.spec.to_s.gsub('(.:format)', ''),
77
+ verb: extract_route_verb(route)
78
+ }
79
+ end
80
+ rescue StandardError
81
+ # Leave @route_helper_map empty — navigation-edge extractors will
82
+ # fall back to the helper-name literal.
83
+ @route_helper_map = {}
84
+ end
85
+
86
+ # True when Rails.application.routes is reachable. Probing via
87
+ # `respond_to?` first so partial RSpec doubles that haven't
88
+ # stubbed `.application` don't raise MockExpectationError (which
89
+ # descends from Exception, not StandardError — `rescue StandardError`
90
+ # would not catch it).
91
+ def safe_rails_application_routes
92
+ return nil unless Rails.respond_to?(:application)
93
+
94
+ app = Rails.application
95
+ return nil unless app.respond_to?(:routes)
96
+
97
+ app.routes
98
+ rescue StandardError
99
+ nil
100
+ end
101
+
102
+ # Resolve a _path/_url helper to its controller#action target.
103
+ #
104
+ # @param helper_name [String] e.g., "new_post_path", "users_url"
105
+ # @return [Hash, nil] { controller:, action:, path:, verb: } or nil if unresolvable
106
+ def resolve_route_helper(helper_name)
107
+ base = helper_name.sub(/_(path|url)\z/, '')
108
+ return nil if IGNORED_HELPER_PREFIXES.any? { |prefix| base.start_with?("#{prefix}_") || base == prefix }
109
+
110
+ @route_helper_map&.[](base)
111
+ end
112
+
113
+ private
114
+
115
+ # Extract the HTTP verb from a route.
116
+ #
117
+ # @param route [ActionDispatch::Journey::Route] The route object
118
+ # @return [String] HTTP verb (GET, POST, etc.)
119
+ def extract_route_verb(route)
120
+ if route.respond_to?(:verb) && route.verb.is_a?(String)
121
+ route.verb
122
+ elsif route.respond_to?(:verb)
123
+ route.verb.to_s.gsub(/[^A-Z|]/, '')
124
+ else
125
+ 'GET'
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end