woods 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +169 -0
- data/README.md +20 -8
- data/exe/woods-console +51 -6
- data/exe/woods-console-mcp +24 -4
- data/exe/woods-mcp +30 -7
- data/exe/woods-mcp-http +47 -6
- data/lib/generators/woods/install_generator.rb +13 -4
- data/lib/generators/woods/templates/woods.rb.tt +155 -0
- data/lib/tasks/woods.rake +15 -50
- data/lib/woods/builder.rb +174 -9
- data/lib/woods/cache/cache_middleware.rb +360 -31
- data/lib/woods/chunking/semantic_chunker.rb +334 -7
- data/lib/woods/console/adapters/job_adapter.rb +10 -4
- data/lib/woods/console/audit_logger.rb +76 -4
- data/lib/woods/console/bridge.rb +48 -15
- data/lib/woods/console/bridge_protocol.rb +44 -0
- data/lib/woods/console/confirmation.rb +3 -4
- data/lib/woods/console/console_response_renderer.rb +56 -18
- data/lib/woods/console/credential_index.rb +201 -0
- data/lib/woods/console/credential_scanner.rb +302 -0
- data/lib/woods/console/dispatch_pipeline.rb +138 -0
- data/lib/woods/console/embedded_executor.rb +682 -35
- data/lib/woods/console/eval_guard.rb +319 -0
- data/lib/woods/console/model_validator.rb +1 -3
- data/lib/woods/console/rack_middleware.rb +185 -29
- data/lib/woods/console/redactor.rb +161 -0
- data/lib/woods/console/response_context.rb +127 -0
- data/lib/woods/console/safe_context.rb +220 -23
- data/lib/woods/console/scope_predicate_parser.rb +131 -0
- data/lib/woods/console/server.rb +417 -486
- data/lib/woods/console/sql_noise_stripper.rb +87 -0
- data/lib/woods/console/sql_table_scanner.rb +213 -0
- data/lib/woods/console/sql_validator.rb +81 -31
- data/lib/woods/console/table_gate.rb +93 -0
- data/lib/woods/console/tool_specs.rb +552 -0
- data/lib/woods/console/tools/tier1.rb +3 -3
- data/lib/woods/console/tools/tier4.rb +7 -1
- data/lib/woods/dependency_graph.rb +66 -7
- data/lib/woods/embedding/indexer.rb +190 -6
- data/lib/woods/embedding/openai.rb +40 -4
- data/lib/woods/embedding/provider.rb +104 -8
- data/lib/woods/embedding/text_preparer.rb +23 -3
- data/lib/woods/embedding/token_counter.rb +133 -0
- data/lib/woods/evaluation/baseline_runner.rb +20 -2
- data/lib/woods/evaluation/metrics.rb +4 -1
- data/lib/woods/extracted_unit.rb +1 -0
- data/lib/woods/extractor.rb +7 -1
- data/lib/woods/extractors/controller_extractor.rb +6 -0
- data/lib/woods/extractors/mailer_extractor.rb +16 -2
- data/lib/woods/extractors/model_extractor.rb +6 -1
- data/lib/woods/extractors/phlex_extractor.rb +13 -4
- data/lib/woods/extractors/rails_source_extractor.rb +2 -0
- data/lib/woods/extractors/route_helper_resolver.rb +130 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
- data/lib/woods/extractors/view_component_extractor.rb +12 -1
- data/lib/woods/extractors/view_engines/base.rb +141 -0
- data/lib/woods/extractors/view_engines/erb.rb +145 -0
- data/lib/woods/extractors/view_template_extractor.rb +92 -133
- data/lib/woods/flow_assembler.rb +23 -15
- data/lib/woods/flow_precomputer.rb +21 -2
- data/lib/woods/graph_analyzer.rb +3 -4
- data/lib/woods/index_artifact.rb +173 -0
- data/lib/woods/mcp/bearer_auth.rb +45 -0
- data/lib/woods/mcp/bootstrap_state.rb +94 -0
- data/lib/woods/mcp/bootstrapper.rb +337 -16
- data/lib/woods/mcp/config_resolver.rb +288 -0
- data/lib/woods/mcp/errors.rb +134 -0
- data/lib/woods/mcp/index_reader.rb +265 -30
- data/lib/woods/mcp/origin_guard.rb +132 -0
- data/lib/woods/mcp/provider_probe.rb +166 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
- data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
- data/lib/woods/mcp/server.rb +737 -137
- data/lib/woods/model_name_cache.rb +78 -2
- data/lib/woods/notion/client.rb +25 -2
- data/lib/woods/notion/mappers/model_mapper.rb +36 -2
- data/lib/woods/railtie.rb +55 -15
- data/lib/woods/resilience/circuit_breaker.rb +9 -2
- data/lib/woods/resilience/retryable_provider.rb +40 -3
- data/lib/woods/resolved_config.rb +299 -0
- data/lib/woods/retrieval/context_assembler.rb +112 -5
- data/lib/woods/retrieval/query_classifier.rb +1 -1
- data/lib/woods/retrieval/ranker.rb +55 -6
- data/lib/woods/retrieval/search_executor.rb +42 -13
- data/lib/woods/retriever.rb +330 -24
- data/lib/woods/session_tracer/middleware.rb +35 -1
- data/lib/woods/storage/graph_store.rb +39 -0
- data/lib/woods/storage/inapplicable_backend.rb +14 -0
- data/lib/woods/storage/metadata_store.rb +129 -1
- data/lib/woods/storage/pgvector.rb +70 -8
- data/lib/woods/storage/qdrant.rb +196 -5
- data/lib/woods/storage/snapshotter/metadata.rb +172 -0
- data/lib/woods/storage/snapshotter/vector.rb +238 -0
- data/lib/woods/storage/snapshotter.rb +24 -0
- data/lib/woods/storage/vector_store.rb +184 -35
- data/lib/woods/tasks.rb +85 -0
- data/lib/woods/temporal/snapshot_store.rb +49 -1
- data/lib/woods/token_utils.rb +44 -5
- data/lib/woods/unblocked/client.rb +1 -1
- data/lib/woods/unblocked/document_builder.rb +35 -10
- data/lib/woods/unblocked/exporter.rb +1 -1
- data/lib/woods/util/host_guard.rb +61 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +126 -6
- metadata +69 -4
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative '../token_utils'
|
|
4
|
+
|
|
3
5
|
module Woods
|
|
4
6
|
module Embedding
|
|
5
7
|
# Prepares ExtractedUnit data for embedding by building context-prefixed text.
|
|
@@ -19,12 +21,25 @@ module Woods
|
|
|
19
21
|
# chunks = preparer.prepare_chunks(unit)
|
|
20
22
|
class TextPreparer
|
|
21
23
|
DEFAULT_MAX_TOKENS = 8192
|
|
24
|
+
# Aliased to the single source of truth in {Woods::TokenUtils} so the
|
|
25
|
+
# OpenAI 4.0 / Ollama 1.5 ratios stay consistent across TextPreparer,
|
|
26
|
+
# ContextAssembler, Builder, and cost_model/. See
|
|
27
|
+
# docs/TOKEN_BENCHMARK.md and lib/woods/token_utils.rb.
|
|
28
|
+
DEFAULT_CHARS_PER_TOKEN = TokenUtils::DEFAULT_CHARS_PER_TOKEN
|
|
22
29
|
|
|
23
30
|
# @param max_tokens [Integer] maximum token budget for prepared text
|
|
24
|
-
|
|
31
|
+
# @param chars_per_token [Float] tokenizer-calibrated char/token ratio
|
|
32
|
+
def initialize(max_tokens: DEFAULT_MAX_TOKENS, chars_per_token: DEFAULT_CHARS_PER_TOKEN)
|
|
25
33
|
@max_tokens = max_tokens
|
|
34
|
+
@chars_per_token = chars_per_token
|
|
26
35
|
end
|
|
27
36
|
|
|
37
|
+
# @return [Float] configured chars-per-token ratio
|
|
38
|
+
attr_reader :chars_per_token
|
|
39
|
+
|
|
40
|
+
# @return [Integer] configured token budget
|
|
41
|
+
attr_reader :max_tokens
|
|
42
|
+
|
|
28
43
|
# Prepare text for embedding from an ExtractedUnit.
|
|
29
44
|
#
|
|
30
45
|
# Builds a context prefix and appends the unit's source code (or first
|
|
@@ -98,13 +113,18 @@ module Woods
|
|
|
98
113
|
|
|
99
114
|
# Truncate text to fit within the token budget.
|
|
100
115
|
#
|
|
116
|
+
# Uses the configured `chars_per_token` ratio to estimate both the
|
|
117
|
+
# token count and the safe character cap. Truncation is a last
|
|
118
|
+
# resort — by the time text reaches here the chunker should have
|
|
119
|
+
# already split oversize units into pieces that fit.
|
|
120
|
+
#
|
|
101
121
|
# @param text [String] the text to truncate
|
|
102
122
|
# @return [String] text within token limits
|
|
103
123
|
def enforce_token_limit(text)
|
|
104
|
-
estimated = (text.length /
|
|
124
|
+
estimated = (text.length / @chars_per_token).ceil
|
|
105
125
|
return text if estimated <= @max_tokens
|
|
106
126
|
|
|
107
|
-
max_chars = (@max_tokens *
|
|
127
|
+
max_chars = (@max_tokens * @chars_per_token).floor
|
|
108
128
|
text[0...max_chars]
|
|
109
129
|
end
|
|
110
130
|
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
|
|
5
|
+
module Woods
|
|
6
|
+
module Embedding
|
|
7
|
+
# Exact or estimated token counts for embedding inputs.
|
|
8
|
+
#
|
|
9
|
+
# When the optional `tokenizers` gem (ankane) is installed, loads the
|
|
10
|
+
# `bert-base-uncased` WordPiece tokenizer that nomic-embed-text is
|
|
11
|
+
# built on and returns exact token counts. Otherwise falls back to a
|
|
12
|
+
# conservative chars/token ratio and warns once.
|
|
13
|
+
#
|
|
14
|
+
# Exact counting is strictly preferred for the Ollama path — Ollama
|
|
15
|
+
# v0.13.5+ stopped honouring the `truncate: true` flag on
|
|
16
|
+
# `/api/embed` (see ollama/ollama#14186), so chunks that exceed
|
|
17
|
+
# `num_ctx` return a 400 instead of being truncated. Client-side
|
|
18
|
+
# sizing is the only reliable option until the regression is fixed
|
|
19
|
+
# upstream, and chars/token ratios vary too widely across Rails
|
|
20
|
+
# internals to cover every case with a fixed number.
|
|
21
|
+
#
|
|
22
|
+
# @example
|
|
23
|
+
# counter = Woods::Embedding::TokenCounter.new
|
|
24
|
+
# counter.count("ActionController::Metal::ConditionalGet") # => 13
|
|
25
|
+
class TokenCounter
|
|
26
|
+
# HuggingFace tokenizer id shared by every nomic-embed-text variant.
|
|
27
|
+
BERT_MODEL = 'bert-base-uncased'
|
|
28
|
+
|
|
29
|
+
# Conservative floor for when the tokenizer gem isn't installed.
|
|
30
|
+
# Lower than any ratio we've observed failing in the testbed
|
|
31
|
+
# against dense Rails source. Still approximate — install
|
|
32
|
+
# `tokenizers` for exact counts.
|
|
33
|
+
CONSERVATIVE_CHARS_PER_TOKEN = 1.2
|
|
34
|
+
|
|
35
|
+
# @param chars_per_token [Float] fallback ratio when the tokenizer
|
|
36
|
+
# is unavailable
|
|
37
|
+
# @param tokenizer_id [String] HuggingFace model id passed to
|
|
38
|
+
# `Tokenizers.from_pretrained`
|
|
39
|
+
def initialize(chars_per_token: CONSERVATIVE_CHARS_PER_TOKEN, tokenizer_id: BERT_MODEL)
|
|
40
|
+
@chars_per_token = chars_per_token
|
|
41
|
+
@tokenizer_id = tokenizer_id
|
|
42
|
+
@load_attempted = false
|
|
43
|
+
@load_mutex = Mutex.new
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# @return [Float] fallback chars-per-token ratio
|
|
47
|
+
attr_reader :chars_per_token
|
|
48
|
+
|
|
49
|
+
# Exact token count when the tokenizer is loaded, chars/token
|
|
50
|
+
# estimate otherwise.
|
|
51
|
+
#
|
|
52
|
+
# @param text [String, nil]
|
|
53
|
+
# @return [Integer]
|
|
54
|
+
def count(text)
|
|
55
|
+
return 0 if text.nil? || text.empty?
|
|
56
|
+
|
|
57
|
+
tok = tokenizer
|
|
58
|
+
tok ? tok.encode(text).ids.length : estimate(text)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# True when the real tokenizer is loaded and in use.
|
|
62
|
+
#
|
|
63
|
+
# @return [Boolean]
|
|
64
|
+
def exact?
|
|
65
|
+
!tokenizer.nil?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def estimate(text)
|
|
71
|
+
(text.length / @chars_per_token).ceil
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Lazy-load the tokenizer under a mutex so concurrent first-calls
|
|
75
|
+
# don't each trigger a separate download. After the first attempt
|
|
76
|
+
# (successful or not) we memoize the result and skip the load path.
|
|
77
|
+
def tokenizer
|
|
78
|
+
@load_mutex.synchronize do
|
|
79
|
+
return @tokenizer if @load_attempted
|
|
80
|
+
|
|
81
|
+
@load_attempted = true
|
|
82
|
+
@tokenizer = try_load
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def try_load
|
|
87
|
+
require 'tokenizers'
|
|
88
|
+
Tokenizers.from_pretrained(@tokenizer_id)
|
|
89
|
+
rescue LoadError
|
|
90
|
+
warn_once(
|
|
91
|
+
'Exact token counting disabled: `tokenizers` gem not installed. ' \
|
|
92
|
+
"Falling back to #{@chars_per_token} chars/token estimation. " \
|
|
93
|
+
"Add `gem 'tokenizers', '~> 0.5'` to your Gemfile for exact sizing on Ollama."
|
|
94
|
+
)
|
|
95
|
+
nil
|
|
96
|
+
rescue StandardError => e
|
|
97
|
+
warn_once(
|
|
98
|
+
"Could not load tokenizer #{@tokenizer_id.inspect} " \
|
|
99
|
+
"(#{e.class}: #{e.message}). Falling back to chars/token estimate."
|
|
100
|
+
)
|
|
101
|
+
nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Per-process dedup so multiple TokenCounter instances (one per
|
|
105
|
+
# retriever build, plus one per chunker, plus tests) don't each
|
|
106
|
+
# spam the same fallback warning. The mutex keeps the dedup set
|
|
107
|
+
# consistent under the same concurrent-first-call pattern that
|
|
108
|
+
# the per-instance load mutex protects against.
|
|
109
|
+
@warned_messages = Set.new
|
|
110
|
+
@warned_mutex = Mutex.new
|
|
111
|
+
|
|
112
|
+
class << self
|
|
113
|
+
attr_reader :warned_messages, :warned_mutex
|
|
114
|
+
|
|
115
|
+
# Reset the per-process warning dedup. For tests only — production
|
|
116
|
+
# callers should never need to clear it.
|
|
117
|
+
def reset_warned!
|
|
118
|
+
@warned_mutex.synchronize { @warned_messages.clear }
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def warn_once(message)
|
|
123
|
+
full = "[woods] #{message}"
|
|
124
|
+
self.class.warned_mutex.synchronize do
|
|
125
|
+
return if self.class.warned_messages.include?(full)
|
|
126
|
+
|
|
127
|
+
self.class.warned_messages << full
|
|
128
|
+
end
|
|
129
|
+
Kernel.warn(full)
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -19,8 +19,19 @@ module Woods
|
|
|
19
19
|
VALID_STRATEGIES = %i[grep random file_level].freeze
|
|
20
20
|
|
|
21
21
|
# @param metadata_store [Object] Store that responds to #all_identifiers and #find_by_type
|
|
22
|
-
|
|
22
|
+
# @param seed [Integer, nil] Optional RNG seed for the `:random`
|
|
23
|
+
# baseline. Seeding makes evaluation runs reproducible — essential
|
|
24
|
+
# for comparing the real retriever against the baseline on the same
|
|
25
|
+
# query set across two invocations. `nil` (default) keeps the
|
|
26
|
+
# historical behavior of drawing from system entropy.
|
|
27
|
+
def initialize(metadata_store:, seed: nil)
|
|
23
28
|
@metadata_store = metadata_store
|
|
29
|
+
@random = seed.nil? ? Random.new : Random.new(seed)
|
|
30
|
+
# Ruby's `Random` instance isn't documented thread-safe; multiple
|
|
31
|
+
# evaluator threads sharing one runner would otherwise interleave
|
|
32
|
+
# `sample` calls and drift from the seeded sequence. A plain Mutex
|
|
33
|
+
# around the read path is enough — `sample` is the only caller.
|
|
34
|
+
@random_mutex = Mutex.new
|
|
24
35
|
end
|
|
25
36
|
|
|
26
37
|
# Run a baseline strategy for a query.
|
|
@@ -64,11 +75,18 @@ module Woods
|
|
|
64
75
|
|
|
65
76
|
# Random strategy: random selection from all available units.
|
|
66
77
|
#
|
|
78
|
+
# Uses the instance's injected {Random} generator so results are
|
|
79
|
+
# reproducible when {#initialize} was called with a seed. Guarded by
|
|
80
|
+
# a mutex so concurrent evaluator threads don't interleave calls on
|
|
81
|
+
# the shared `Random` and drift from the seeded sequence.
|
|
82
|
+
#
|
|
67
83
|
# @param _query [String] Query string (unused)
|
|
68
84
|
# @param limit [Integer] Max results
|
|
69
85
|
# @return [Array<String>]
|
|
70
86
|
def run_random(_query, limit)
|
|
71
|
-
@
|
|
87
|
+
@random_mutex.synchronize do
|
|
88
|
+
@metadata_store.all_identifiers.sample(limit, random: @random)
|
|
89
|
+
end
|
|
72
90
|
end
|
|
73
91
|
|
|
74
92
|
# File-level strategy: matches identifiers that look like file paths
|
|
@@ -22,7 +22,10 @@ module Woods
|
|
|
22
22
|
top_k = retrieved.first(cutoff)
|
|
23
23
|
relevant_set = relevant.to_set
|
|
24
24
|
hits = top_k.count { |id| relevant_set.include?(id) }
|
|
25
|
-
|
|
25
|
+
# Divide by actual slice size, not the cutoff — when fewer than
|
|
26
|
+
# `cutoff` items are retrieved, dividing by `cutoff` understates
|
|
27
|
+
# precision (returns 0.2 for 1-of-1 at cutoff=5 instead of 1.0).
|
|
28
|
+
hits.to_f / top_k.size
|
|
26
29
|
end
|
|
27
30
|
|
|
28
31
|
# Fraction of relevant items that were retrieved.
|
data/lib/woods/extracted_unit.rb
CHANGED
data/lib/woods/extractor.rb
CHANGED
|
@@ -403,9 +403,15 @@ module Woods
|
|
|
403
403
|
def extract_all_concurrent
|
|
404
404
|
# Pre-compute ModelNameCache to avoid race on lazy memoization.
|
|
405
405
|
# Multiple threads calling model_names concurrently could trigger
|
|
406
|
-
# duplicate compute_model_names calls without this warm-up.
|
|
406
|
+
# duplicate compute_model_names calls without this warm-up. All four
|
|
407
|
+
# derived caches must be warmed — `short_name_map` and
|
|
408
|
+
# `short_names_regex` were added for the three-pass dependency
|
|
409
|
+
# scanner and are reached from every extractor that calls
|
|
410
|
+
# `scan_model_dependencies`.
|
|
407
411
|
ModelNameCache.model_names
|
|
408
412
|
ModelNameCache.model_names_regex
|
|
413
|
+
ModelNameCache.short_name_map if ModelNameCache.respond_to?(:short_name_map)
|
|
414
|
+
ModelNameCache.short_names_regex if ModelNameCache.respond_to?(:short_names_regex)
|
|
409
415
|
|
|
410
416
|
results_mutex = Mutex.new
|
|
411
417
|
threads = EXTRACTORS.map do |type, extractor_class|
|
|
@@ -4,6 +4,7 @@ require 'digest'
|
|
|
4
4
|
require_relative 'ast_source_extraction'
|
|
5
5
|
require_relative 'shared_utility_methods'
|
|
6
6
|
require_relative 'shared_dependency_scanner'
|
|
7
|
+
require_relative 'route_helper_resolver'
|
|
7
8
|
|
|
8
9
|
module Woods
|
|
9
10
|
module Extractors
|
|
@@ -25,9 +26,11 @@ module Woods
|
|
|
25
26
|
include AstSourceExtraction
|
|
26
27
|
include SharedUtilityMethods
|
|
27
28
|
include SharedDependencyScanner
|
|
29
|
+
include RouteHelperResolver
|
|
28
30
|
|
|
29
31
|
def initialize
|
|
30
32
|
@routes_map = build_routes_map
|
|
33
|
+
build_route_helper_map
|
|
31
34
|
end
|
|
32
35
|
|
|
33
36
|
# Extract all controllers in the application
|
|
@@ -312,6 +315,9 @@ module Woods
|
|
|
312
315
|
source.scan(%r{render\s+["'](\w+/\w+)["']}).flatten.uniq.each do |template|
|
|
313
316
|
deps << { type: :view, target: template, via: :render }
|
|
314
317
|
end
|
|
318
|
+
|
|
319
|
+
# redirect_to with named route helpers
|
|
320
|
+
deps.concat(scan_navigation_dependencies(source, via_type: :redirect_to))
|
|
315
321
|
end
|
|
316
322
|
|
|
317
323
|
deps.uniq { |d| [d[:type], d[:target]] }
|
|
@@ -4,6 +4,7 @@ require 'digest'
|
|
|
4
4
|
require_relative 'ast_source_extraction'
|
|
5
5
|
require_relative 'shared_utility_methods'
|
|
6
6
|
require_relative 'shared_dependency_scanner'
|
|
7
|
+
require_relative 'route_helper_resolver'
|
|
7
8
|
|
|
8
9
|
module Woods
|
|
9
10
|
module Extractors
|
|
@@ -23,9 +24,11 @@ module Woods
|
|
|
23
24
|
include AstSourceExtraction
|
|
24
25
|
include SharedUtilityMethods
|
|
25
26
|
include SharedDependencyScanner
|
|
27
|
+
include RouteHelperResolver
|
|
26
28
|
|
|
27
29
|
def initialize
|
|
28
30
|
@mailer_base = defined?(ApplicationMailer) ? ApplicationMailer : ActionMailer::Base
|
|
31
|
+
build_route_helper_map
|
|
29
32
|
end
|
|
30
33
|
|
|
31
34
|
# Extract all mailers in the application
|
|
@@ -223,8 +226,19 @@ module Woods
|
|
|
223
226
|
deps = []
|
|
224
227
|
deps.concat(scan_model_dependencies(source))
|
|
225
228
|
deps.concat(scan_service_dependencies(source))
|
|
226
|
-
|
|
227
|
-
#
|
|
229
|
+
# Navigation edges — resolve `_path`/`_url` helpers to real
|
|
230
|
+
# controllers via RouteHelperResolver (wired through the include +
|
|
231
|
+
# build_route_helper_map call in #initialize). This adds resolved
|
|
232
|
+
# {type: :controller, via: :link_to} edges on top of the raw
|
|
233
|
+
# helper scan below.
|
|
234
|
+
deps.concat(scan_navigation_dependencies(source))
|
|
235
|
+
deps.concat(scan_form_dependencies(source))
|
|
236
|
+
|
|
237
|
+
# Raw-helper fallback — emits {type: :route, target: 'confirmation'}
|
|
238
|
+
# for every `_path`/`_url` helper referenced, regardless of whether
|
|
239
|
+
# RouteHelperResolver could resolve it. Kept so mailers that link to
|
|
240
|
+
# engine-mounted routes (not in the main routes table) still produce
|
|
241
|
+
# a dependency edge.
|
|
228
242
|
source.scan(/(\w+)_(?:url|path)/).flatten.uniq.each do |route|
|
|
229
243
|
deps << { type: :route, target: route, via: :url_helper }
|
|
230
244
|
end
|
|
@@ -530,7 +530,12 @@ module Woods
|
|
|
530
530
|
conditions: format_callback_conditions(cb)
|
|
531
531
|
}
|
|
532
532
|
end
|
|
533
|
-
rescue
|
|
533
|
+
rescue StandardError
|
|
534
|
+
# Widen beyond NoMethodError per CLAUDE.md — callback-chain
|
|
535
|
+
# introspection can raise a variety of errors across Rails
|
|
536
|
+
# versions (NameError, TypeError, LoadError for missing
|
|
537
|
+
# concerns), and silently swallowing only NoMethodError left
|
|
538
|
+
# the rest to crash extraction.
|
|
534
539
|
[]
|
|
535
540
|
end.compact
|
|
536
541
|
end
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative 'shared_utility_methods'
|
|
4
4
|
require_relative 'shared_dependency_scanner'
|
|
5
|
+
require_relative 'route_helper_resolver'
|
|
5
6
|
|
|
6
7
|
module Woods
|
|
7
8
|
module Extractors
|
|
@@ -23,6 +24,7 @@ module Woods
|
|
|
23
24
|
class PhlexExtractor
|
|
24
25
|
include SharedUtilityMethods
|
|
25
26
|
include SharedDependencyScanner
|
|
27
|
+
include RouteHelperResolver
|
|
26
28
|
|
|
27
29
|
# Common Phlex base classes to look for
|
|
28
30
|
PHLEX_BASES = %w[
|
|
@@ -33,6 +35,10 @@ module Woods
|
|
|
33
35
|
|
|
34
36
|
def initialize
|
|
35
37
|
@component_base = find_component_base
|
|
38
|
+
# Precompute the _path/_url → controller#action map once per
|
|
39
|
+
# extraction run so navigation edges resolve to real targets
|
|
40
|
+
# instead of the unresolved helper literal.
|
|
41
|
+
build_route_helper_map
|
|
36
42
|
end
|
|
37
43
|
|
|
38
44
|
# Extract all Phlex/ViewComponent components
|
|
@@ -240,10 +246,13 @@ module Woods
|
|
|
240
246
|
deps << { type: :stimulus_controller, target: controller, via: :html_attribute }
|
|
241
247
|
end
|
|
242
248
|
|
|
243
|
-
#
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
249
|
+
# Navigation edges — resolve _path / _url helpers to real controllers
|
|
250
|
+
# via RouteHelperResolver (wired through the include + build_route_helper_map
|
|
251
|
+
# call in #initialize). Replaces an earlier manual regex that emitted
|
|
252
|
+
# unresolved {type: :route, target: 'users'} edges; the graph now
|
|
253
|
+
# gets {type: :controller, target: 'UsersController', via: :link_to}.
|
|
254
|
+
deps.concat(scan_navigation_dependencies(source))
|
|
255
|
+
deps.concat(scan_form_dependencies(source))
|
|
247
256
|
|
|
248
257
|
deps.uniq { |d| [d[:type], d[:target]] }
|
|
249
258
|
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Woods
|
|
4
|
+
module Extractors
|
|
5
|
+
# Shared module for resolving named route helpers to controller#action targets.
|
|
6
|
+
#
|
|
7
|
+
# Builds an inverse lookup from `Rails.application.routes.named_routes`,
|
|
8
|
+
# mapping route helper names (e.g., "new_post") to their controller and action.
|
|
9
|
+
# Include this module and call {#build_route_helper_map} in your initializer.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# class MyExtractor
|
|
13
|
+
# include RouteHelperResolver
|
|
14
|
+
#
|
|
15
|
+
# def initialize
|
|
16
|
+
# build_route_helper_map
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# def find_target(source)
|
|
20
|
+
# resolve_route_helper("posts_path")
|
|
21
|
+
# #=> { controller: "PostsController", action: "index", path: "/posts", verb: "GET" }
|
|
22
|
+
# end
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
module RouteHelperResolver
|
|
26
|
+
# Route helper prefixes that produce non-navigation dependencies.
|
|
27
|
+
# These generate asset URLs or are common false positives from
|
|
28
|
+
# non-route uses of _path/_url suffixes in Ruby code.
|
|
29
|
+
#
|
|
30
|
+
# NOTE: `root` is intentionally excluded — root_path is the most common
|
|
31
|
+
# Rails route helper, but it appears so frequently in non-navigation contexts
|
|
32
|
+
# (path construction, config, tests) that it generates excessive noise.
|
|
33
|
+
# The tradeoff: "what links to the home page?" won't appear in graph queries.
|
|
34
|
+
# Add new prefixes here when false positives are discovered in host apps.
|
|
35
|
+
IGNORED_HELPER_PREFIXES = %w[
|
|
36
|
+
asset
|
|
37
|
+
image
|
|
38
|
+
stylesheet
|
|
39
|
+
javascript
|
|
40
|
+
font
|
|
41
|
+
audio
|
|
42
|
+
video
|
|
43
|
+
turbo_stream
|
|
44
|
+
file
|
|
45
|
+
tmp
|
|
46
|
+
base
|
|
47
|
+
root
|
|
48
|
+
log
|
|
49
|
+
socket
|
|
50
|
+
download
|
|
51
|
+
].freeze
|
|
52
|
+
|
|
53
|
+
# Build the route helper lookup map from Rails named routes.
|
|
54
|
+
# Call this once in your extractor's initialize method.
|
|
55
|
+
#
|
|
56
|
+
# Resilient to partial test doubles: any exception raised while
|
|
57
|
+
# traversing Rails routes (unstubbed `application` on a double,
|
|
58
|
+
# missing `named_routes`, etc.) is swallowed and leaves the map
|
|
59
|
+
# empty — extractors fall back to returning the helper name
|
|
60
|
+
# literal as the dependency target.
|
|
61
|
+
def build_route_helper_map
|
|
62
|
+
@route_helper_map = {}
|
|
63
|
+
return unless defined?(Rails)
|
|
64
|
+
|
|
65
|
+
routes = safe_rails_application_routes
|
|
66
|
+
return unless routes
|
|
67
|
+
|
|
68
|
+
routes.named_routes.each do |name, route|
|
|
69
|
+
controller = route.defaults[:controller]
|
|
70
|
+
action = route.defaults[:action]
|
|
71
|
+
next unless controller && action
|
|
72
|
+
|
|
73
|
+
@route_helper_map[name.to_s] = {
|
|
74
|
+
controller: "#{controller.camelize}Controller",
|
|
75
|
+
action: action,
|
|
76
|
+
path: route.path.spec.to_s.gsub('(.:format)', ''),
|
|
77
|
+
verb: extract_route_verb(route)
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
rescue StandardError
|
|
81
|
+
# Leave @route_helper_map empty — navigation-edge extractors will
|
|
82
|
+
# fall back to the helper-name literal.
|
|
83
|
+
@route_helper_map = {}
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# True when Rails.application.routes is reachable. Probing via
|
|
87
|
+
# `respond_to?` first so partial RSpec doubles that haven't
|
|
88
|
+
# stubbed `.application` don't raise MockExpectationError (which
|
|
89
|
+
# descends from Exception, not StandardError — `rescue StandardError`
|
|
90
|
+
# would not catch it).
|
|
91
|
+
def safe_rails_application_routes
|
|
92
|
+
return nil unless Rails.respond_to?(:application)
|
|
93
|
+
|
|
94
|
+
app = Rails.application
|
|
95
|
+
return nil unless app.respond_to?(:routes)
|
|
96
|
+
|
|
97
|
+
app.routes
|
|
98
|
+
rescue StandardError
|
|
99
|
+
nil
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Resolve a _path/_url helper to its controller#action target.
|
|
103
|
+
#
|
|
104
|
+
# @param helper_name [String] e.g., "new_post_path", "users_url"
|
|
105
|
+
# @return [Hash, nil] { controller:, action:, path:, verb: } or nil if unresolvable
|
|
106
|
+
def resolve_route_helper(helper_name)
|
|
107
|
+
base = helper_name.sub(/_(path|url)\z/, '')
|
|
108
|
+
return nil if IGNORED_HELPER_PREFIXES.any? { |prefix| base.start_with?("#{prefix}_") || base == prefix }
|
|
109
|
+
|
|
110
|
+
@route_helper_map&.[](base)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
private
|
|
114
|
+
|
|
115
|
+
# Extract the HTTP verb from a route.
|
|
116
|
+
#
|
|
117
|
+
# @param route [ActionDispatch::Journey::Route] The route object
|
|
118
|
+
# @return [String] HTTP verb (GET, POST, etc.)
|
|
119
|
+
def extract_route_verb(route)
|
|
120
|
+
if route.respond_to?(:verb) && route.verb.is_a?(String)
|
|
121
|
+
route.verb
|
|
122
|
+
elsif route.respond_to?(:verb)
|
|
123
|
+
route.verb.to_s.gsub(/[^A-Z|]/, '')
|
|
124
|
+
else
|
|
125
|
+
'GET'
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|