woods 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +169 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +15 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +3 -4
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +737 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +1 -1
  102. data/lib/woods/unblocked/document_builder.rb +35 -10
  103. data/lib/woods/unblocked/exporter.rb +1 -1
  104. data/lib/woods/util/host_guard.rb +61 -0
  105. data/lib/woods/version.rb +1 -1
  106. data/lib/woods.rb +126 -6
  107. metadata +69 -4
@@ -1,16 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'set'
4
+ require_relative 'route_helper_resolver'
5
+ require_relative 'view_engines/base'
6
+ require_relative 'view_engines/erb'
7
+
3
8
  module Woods
4
9
  module Extractors
5
- # ViewTemplateExtractor handles ERB view template extraction.
10
+ # ViewTemplateExtractor orchestrates view-template extraction across
11
+ # per-engine implementations under {ViewEngines}.
6
12
  #
7
- # Scans `app/views/` for `.html.erb` and `.erb` files and produces
8
- # one ExtractedUnit per template. Extracts render calls (partials),
9
- # instance variables, and helper method usage. Links partials via
10
- # dependencies and infers the owning controller from directory structure.
13
+ # For each configured view directory the orchestrator walks every
14
+ # extension any registered engine handles, finds the first engine
15
+ # whose {ViewEngines::Base#handles?} returns true for a given file,
16
+ # and delegates parsing, scanning, and partial-identifier resolution
17
+ # to that engine. The orchestrator itself owns filesystem walking,
18
+ # identifier construction, controller inference, route-helper edge
19
+ # resolution, and dependency assembly.
11
20
  #
12
- # This is an ERB-only MVP HAML, Slim, and layout inheritance
13
- # are not yet supported.
21
+ # Engines are registered via {ENGINES}. Add a new engine by creating
22
+ # a {ViewEngines::Base} subclass and appending it to that list.
14
23
  #
15
24
  # @example
16
25
  # extractor = ViewTemplateExtractor.new
@@ -18,67 +27,55 @@ module Woods
18
27
  # index = units.find { |u| u.identifier == "users/index.html.erb" }
19
28
  #
20
29
  class ViewTemplateExtractor
21
- # Directories to scan for view templates
30
+ include RouteHelperResolver
31
+
32
+ # Directories to scan for view templates.
22
33
  VIEW_DIRECTORIES = %w[
23
34
  app/views
24
35
  ].freeze
25
36
 
26
- # Common Rails view helper methods to detect
27
- COMMON_HELPERS = %w[
28
- link_to
29
- button_to
30
- form_for
31
- form_with
32
- form_tag
33
- image_tag
34
- stylesheet_link_tag
35
- javascript_include_tag
36
- content_for
37
- yield
38
- render
39
- redirect_to
40
- truncate
41
- pluralize
42
- number_to_currency
43
- number_to_percentage
44
- number_with_delimiter
45
- time_ago_in_words
46
- distance_of_time_in_words
47
- simple_format
48
- sanitize
49
- raw
50
- safe_join
51
- content_tag
52
- tag
53
- mail_to
54
- url_for
55
- asset_path
56
- asset_url
57
- ].freeze
37
+ # Registered view-template engines, in precedence order. The first
38
+ # engine whose {ViewEngines::Base#handles?} returns true for a file
39
+ # wins — place more specific engines before more general ones if
40
+ # overlap is ever introduced.
41
+ ENGINES = [ViewEngines::Erb].freeze
42
+
43
+ # Template engine names the extraction pipeline currently
44
+ # understands — aggregated from {ENGINES} so the list stays honest
45
+ # as engines are added or removed. Surfaced through the MCP
46
+ # `structure` tool.
47
+ #
48
+ # @return [Array<Symbol>]
49
+ def self.supported_template_engines
50
+ ENGINES.map { |klass| klass.new.name }.uniq.freeze
51
+ end
58
52
 
59
53
  def initialize
60
54
  @directories = VIEW_DIRECTORIES.map { |d| Rails.root.join(d) }
61
55
  .select(&:directory?)
56
+ @engines = self.class::ENGINES.map(&:new)
57
+ build_route_helper_map
62
58
  end
63
59
 
64
- # Extract all ERB view templates
60
+ # Extract all view templates across the registered engines.
65
61
  #
66
62
  # @return [Array<ExtractedUnit>] List of view template units
67
63
  def extract_all
64
+ extensions = @engines.flat_map(&:extensions).uniq
68
65
  @directories.flat_map do |dir|
69
- erb_files = Dir[dir.join('**/*.html.erb')] + Dir[dir.join('**/*.erb')]
70
- erb_files.uniq.filter_map do |file|
71
- extract_view_template_file(file)
72
- end
66
+ files = extensions.flat_map { |ext| Dir[dir.join("**/*#{ext}")] }
67
+ files.uniq.filter_map { |file| extract_view_template_file(file) }
73
68
  end
74
69
  end
75
70
 
76
- # Extract a single view template file
71
+ # Extract a single view template file.
77
72
  #
78
- # @param file_path [String] Path to the ERB template file
79
- # @return [ExtractedUnit, nil] The extracted unit or nil if not ERB
73
+ # @param file_path [String] Path to the template file
74
+ # @return [ExtractedUnit, nil] The extracted unit, or nil if no
75
+ # registered engine handles the file
80
76
  def extract_view_template_file(file_path)
81
- return nil unless file_path.end_with?('.erb')
77
+ engine = engine_for(file_path)
78
+ return nil unless engine
82
79
 
83
80
  source = File.read(file_path)
84
81
  identifier = build_identifier(file_path)
@@ -92,8 +89,9 @@ module Woods
92
89
 
93
90
  unit.namespace = namespace
94
91
  unit.source_code = source
95
- unit.metadata = build_metadata(source, file_path)
96
- unit.dependencies = build_dependencies(source, file_path, identifier)
92
+ partials = engine.scan_partials(source)
93
+ unit.metadata = build_metadata(engine, source, file_path, partials)
94
+ unit.dependencies = build_dependencies(engine, source, file_path, identifier, partials)
97
95
 
98
96
  unit
99
97
  rescue StandardError => e
@@ -103,6 +101,14 @@ module Woods
103
101
 
104
102
  private
105
103
 
104
+ # Find the registered engine that handles the given file, if any.
105
+ #
106
+ # @param file_path [String]
107
+ # @return [ViewEngines::Base, nil]
108
+ def engine_for(file_path)
109
+ @engines.find { |e| e.handles?(file_path) }
110
+ end
111
+
106
112
  # Build a readable identifier from the file path.
107
113
  #
108
114
  # @param file_path [String] Absolute path to the template
@@ -124,16 +130,18 @@ module Woods
124
130
 
125
131
  # Build metadata hash for the template.
126
132
  #
133
+ # @param engine [ViewEngines::Base] Engine that matched this file
127
134
  # @param source [String] Template source code
128
135
  # @param file_path [String] Path to the template
136
+ # @param partials [Array<String>] Pre-extracted partial names
129
137
  # @return [Hash]
130
- def build_metadata(source, file_path)
138
+ def build_metadata(engine, source, file_path, partials)
131
139
  {
132
- template_engine: 'erb',
140
+ template_engine: engine.name.to_s,
133
141
  is_partial: partial?(file_path),
134
- partials_rendered: extract_rendered_partials(source),
135
- instance_variables: extract_instance_variables(source),
136
- helpers_called: extract_helpers(source),
142
+ partials_rendered: partials,
143
+ instance_variables: engine.scan_instance_variables(source),
144
+ helpers_called: engine.scan_helpers(source),
137
145
  loc: source.lines.count { |l| l.strip.length.positive? }
138
146
  }
139
147
  end
@@ -146,98 +154,51 @@ module Woods
146
154
  File.basename(file_path).start_with?('_')
147
155
  end
148
156
 
149
- # Extract partial names from render calls.
150
- #
151
- # Matches:
152
- # - render partial: 'foo/bar'
153
- # - render 'foo/bar'
154
- # - render :foo
155
- #
156
- # @param source [String] Template source code
157
- # @return [Array<String>] Partial names
158
- def extract_rendered_partials(source)
159
- partials = Set.new
160
-
161
- # render partial: 'path/to/partial'
162
- source.scan(/render\s+partial:\s*['"]([^'"]+)['"]/).each do |match|
163
- partials << match[0]
164
- end
165
-
166
- # render 'path/to/partial' (string without keyword)
167
- source.scan(/render\s+['"]([^'"]+)['"]/).each do |match|
168
- partials << match[0]
169
- end
170
-
171
- # render :symbol
172
- source.scan(/render\s+:(\w+)/).each do |match|
173
- partials << match[0]
174
- end
175
-
176
- partials.to_a
177
- end
178
-
179
- # Extract instance variables used in the template.
180
- #
181
- # @param source [String] Template source code
182
- # @return [Array<String>] Instance variable names
183
- def extract_instance_variables(source)
184
- source.scan(/@[a-zA-Z_]\w*/).uniq.sort
185
- end
186
-
187
- # Extract common Rails helper calls from the template.
188
- #
189
- # @param source [String] Template source code
190
- # @return [Array<String>] Helper method names
191
- def extract_helpers(source)
192
- found = Set.new
193
- COMMON_HELPERS.each do |helper|
194
- found << helper if source.match?(/\b#{Regexp.escape(helper)}\b/)
195
- end
196
- found.to_a.sort
197
- end
198
-
199
157
  # Build dependencies for the template.
200
158
  #
159
+ # @param engine [ViewEngines::Base] Engine that matched this file
201
160
  # @param source [String] Template source code
202
161
  # @param file_path [String] Path to the template
203
162
  # @param identifier [String] Template identifier
163
+ # @param partials [Array<String>] Pre-extracted partial names
204
164
  # @return [Array<Hash>]
205
- def build_dependencies(source, file_path, identifier)
165
+ def build_dependencies(engine, source, file_path, identifier, partials)
206
166
  deps = []
207
167
 
208
- # Rendered partials
209
- extract_rendered_partials(source).each do |partial_name|
210
- partial_identifier = resolve_partial_identifier(partial_name, identifier)
168
+ partials.each do |partial_name|
169
+ partial_identifier = engine.resolve_partial_identifier(partial_name, identifier)
211
170
  deps << { type: :view_template, target: partial_identifier, via: :render }
212
171
  end
213
172
 
214
- # Inferred controller
215
173
  controller = infer_controller(file_path)
216
174
  deps << { type: :controller, target: controller, via: :view_render } if controller
217
175
 
218
- deps
176
+ deps.concat(resolve_navigation_candidates(engine, source))
177
+
178
+ deps.uniq { |d| [d[:type], d[:target], d[:via]] }
219
179
  end
220
180
 
221
- # Resolve a partial name to its file identifier.
181
+ # Ask the engine for route-helper candidates and resolve each to a
182
+ # controller target via {RouteHelperResolver}. Gated by
183
+ # +Woods.configuration.extract_navigation_edges+ so the config
184
+ # toggle still applies.
222
185
  #
223
- # Given a render call like `render 'comments/comment'`, resolves to
224
- # `comments/_comment.html.erb`.
225
- #
226
- # @param partial_name [String] The partial name from the render call
227
- # @param current_identifier [String] The current template's identifier
228
- # @return [String] Resolved partial identifier
229
- def resolve_partial_identifier(partial_name, current_identifier)
230
- if partial_name.include?('/')
231
- dir = File.dirname(partial_name)
232
- base = File.basename(partial_name)
233
- "#{dir}/_#{base}.html.erb"
234
- else
235
- dir = File.dirname(current_identifier)
236
- if dir == '.'
237
- "_#{partial_name}.html.erb"
238
- else
239
- "#{dir}/_#{partial_name}.html.erb"
240
- end
186
+ # @param engine [ViewEngines::Base]
187
+ # @param source [String]
188
+ # @return [Array<Hash>]
189
+ def resolve_navigation_candidates(engine, source)
190
+ return [] unless Woods.configuration&.extract_navigation_edges
191
+
192
+ seen = Set.new
193
+ engine.scan_navigation_candidates(source).filter_map do |cand|
194
+ resolved = resolve_route_helper(cand[:helper])
195
+ next unless resolved
196
+
197
+ key = [resolved[:controller], cand[:via]]
198
+ next if seen.include?(key)
199
+
200
+ seen.add(key)
201
+ { type: :controller, target: resolved[:controller], via: cand[:via] }
241
202
  end
242
203
  end
243
204
 
@@ -248,8 +209,6 @@ module Woods
248
209
  def infer_controller(file_path)
249
210
  namespace = extract_view_namespace(file_path)
250
211
  return nil unless namespace
251
-
252
- # Skip layout-only directories
253
212
  return nil if namespace == 'layouts'
254
213
 
255
214
  parts = namespace.split('/')
@@ -253,7 +253,11 @@ module Woods
253
253
 
254
254
  filenames.each do |filename|
255
255
  Dir[File.join(@extracted_dir, '*', filename)].each do |path|
256
- return JSON.parse(File.read(path), symbolize_names: true)
256
+ # Force UTF-8: the extractor writes the routes-comment header in
257
+ # source_code using Unicode box-drawing characters; reading under
258
+ # the platform default (US-ASCII on some CIs) raises
259
+ # InvalidByteSequenceError before JSON parsing.
260
+ return JSON.parse(File.read(path, encoding: 'UTF-8'), symbolize_names: true)
257
261
  rescue JSON::ParserError
258
262
  next
259
263
  end
@@ -263,28 +267,32 @@ module Woods
263
267
  end
264
268
 
265
269
  # Extract route information from controller metadata.
270
+ #
271
+ # Handles two on-disk shapes:
272
+ # - Hash keyed by action (what ControllerExtractor writes):
273
+ # { "create" => [{ verb:, path:, ... }, ...] }
274
+ # - Array of route hashes (older / test fixture shape):
275
+ # [{ action:, verb:, path: }, ...]
266
276
  def extract_route(entry_point)
267
277
  unit_id, method_name = parse_identifier(entry_point)
268
278
  unit_data = load_unit(unit_id)
269
279
  return nil unless unit_data
270
280
 
271
281
  metadata = unit_data[:metadata] || {}
272
- routes = metadata[:routes]
273
- return nil unless routes.is_a?(Array)
274
-
275
- # Find route matching the method name
276
- route = if method_name
277
- routes.find { |r| r[:action]&.to_s == method_name }
278
- else
279
- routes.first
280
- end
281
-
282
+ route = resolve_route_entry(metadata[:routes], method_name)
282
283
  return nil unless route
283
284
 
284
- {
285
- verb: route[:verb],
286
- path: route[:path]
287
- }
285
+ { verb: route[:verb], path: route[:path] }
286
+ end
287
+
288
+ def resolve_route_entry(routes, method_name)
289
+ case routes
290
+ when Hash
291
+ action_routes = method_name ? routes[method_name.to_s] || routes[method_name.to_sym] : routes.values.first
292
+ Array(action_routes).first
293
+ when Array
294
+ method_name ? routes.find { |r| r[:action]&.to_s == method_name } : routes.first
295
+ end
288
296
  end
289
297
  end
290
298
  end
@@ -83,7 +83,7 @@ module Woods
83
83
  filename = "#{controller_id.gsub('::', '__')}_#{action}.json"
84
84
  flow_path = File.join(@flows_dir, filename)
85
85
 
86
- File.write(flow_path, JSON.pretty_generate(flow.to_h))
86
+ File.write(flow_path, canonical_json(flow.to_h))
87
87
 
88
88
  flow_path
89
89
  rescue StandardError => e
@@ -96,7 +96,26 @@ module Woods
96
96
  # @param flow_map [Hash{String => String}]
97
97
  def write_flow_index(flow_map)
98
98
  index_path = File.join(@flows_dir, 'flow_index.json')
99
- File.write(index_path, JSON.pretty_generate(flow_map))
99
+ File.write(index_path, canonical_json(flow_map))
100
+ end
101
+
102
+ # Emit deterministic pretty JSON — keys recursively sorted so two runs
103
+ # over identical input produce byte-identical output. Without this,
104
+ # diff-based tooling (snapshot review, flow-change detection) flags
105
+ # spurious churn from incidental key-order differences in `flow.to_h`.
106
+ #
107
+ # @param value [Hash, Array, Object]
108
+ # @return [String]
109
+ def canonical_json(value)
110
+ JSON.pretty_generate(sort_keys_deep(value))
111
+ end
112
+
113
+ def sort_keys_deep(value)
114
+ case value
115
+ when Hash then value.keys.sort_by(&:to_s).to_h { |k| [k, sort_keys_deep(value[k])] }
116
+ when Array then value.map { |v| sort_keys_deep(v) }
117
+ else value
118
+ end
100
119
  end
101
120
  end
102
121
  end
@@ -307,11 +307,10 @@ module Woods
307
307
  # Find which other cluster this one connects to most
308
308
  target = find_merge_target(cluster, clusters, name)
309
309
 
310
- if target
311
- clusters[target][:members].concat(cluster[:members])
312
- cluster[:members].each { |id| clusters[target][:member_set].add(id) }
313
- end
310
+ break unless target
314
311
 
312
+ clusters[target][:members].concat(cluster[:members])
313
+ cluster[:members].each { |id| clusters[target][:member_set].add(id) }
315
314
  clusters.delete(name)
316
315
  end
317
316
  end
@@ -0,0 +1,173 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+ require 'json'
5
+ require 'fileutils'
6
+ require 'tempfile'
7
+
8
+ module Woods
9
+ # Whole Value for the on-disk artifact layout under +output_dir+.
10
+ #
11
+ # Centralises all path derivation and atomic write operations so that no
12
+ # caller ever assembles paths by hand. The +dumps/latest+ pointer file
13
+ # provides cross-artifact atomicity: consumers always read the pointer
14
+ # first; the pointer is flipped last, after the dump directory is fully
15
+ # fsynced.
16
+ #
17
+ # @example Basic usage
18
+ # artifact = Woods::IndexArtifact.new(output_dir)
19
+ # return if artifact.fresh?
20
+ # config = artifact.read_config
21
+ # dump_dir = artifact.latest_dump_path
22
+ #
23
+ class IndexArtifact
24
+ # @param output_dir [String, Pathname] path to the extraction output directory
25
+ def initialize(output_dir)
26
+ @root = Pathname.new(output_dir.to_s)
27
+ end
28
+
29
+ # The extraction output directory root.
30
+ #
31
+ # @return [Pathname]
32
+ def output_dir
33
+ @root
34
+ end
35
+
36
+ # Path to the resolved config snapshot written by the embed run.
37
+ #
38
+ # @return [Pathname]
39
+ def config_path
40
+ @root.join('woods.json')
41
+ end
42
+
43
+ # Root of the per-run dump directories.
44
+ #
45
+ # @return [Pathname]
46
+ def dumps_root
47
+ @root.join('dumps')
48
+ end
49
+
50
+ # Path to the +dumps/latest+ pointer file (may not exist yet).
51
+ #
52
+ # @return [Pathname]
53
+ def latest_pointer_path
54
+ dumps_root.join('latest')
55
+ end
56
+
57
+ # Returns true when the artifact has never been populated — +woods.json+
58
+ # is absent AND the +dumps/latest+ pointer does not exist.
59
+ #
60
+ # Once either file is present the artifact is considered non-fresh; the
61
+ # Bootstrapper uses this to decide whether to raise {Woods::MCP::MissingArtifact}
62
+ # or proceed with loading.
63
+ #
64
+ # @return [Boolean]
65
+ def fresh?
66
+ !config_path.exist? && !latest_pointer_path.exist?
67
+ end
68
+
69
+ # Path to the latest complete dump directory, or +nil+.
70
+ #
71
+ # Returns +nil+ if the pointer file does not exist, if the pointer content
72
+ # is blank, or if the directory it names no longer exists (stale pointer).
73
+ #
74
+ # @return [Pathname, nil]
75
+ def latest_dump_path
76
+ return nil unless latest_pointer_path.exist?
77
+
78
+ dirname = latest_pointer_path.read.strip
79
+ return nil if dirname.empty?
80
+
81
+ dir = dumps_root.join(dirname)
82
+ dir.exist? ? dir : nil
83
+ end
84
+
85
+ # Reads and parses +woods.json+, returning the raw hash.
86
+ #
87
+ # Returns +nil+ when the file does not exist. Schema-version validation
88
+ # is the caller's responsibility (typically {Woods::ResolvedConfig.from_hash}).
89
+ #
90
+ # @return [Hash, nil]
91
+ def read_config
92
+ return nil unless config_path.exist?
93
+
94
+ JSON.parse(config_path.read)
95
+ end
96
+
97
+ # Creates a new timestamped dump directory and returns its path.
98
+ #
99
+ # The directory is created immediately so callers can begin writing into
100
+ # it. +dumps_root+ is created on demand. Directory names use dashes in
101
+ # place of colons for filesystem compatibility (+%H-%M-%SZ+ not
102
+ # +%H:%M:%SZ+).
103
+ #
104
+ # @param now [Time] timestamp to use for the directory name (default: UTC now)
105
+ # @return [Pathname]
106
+ # @raise [Errno::EEXIST] if the target directory already exists
107
+ def new_dump_dir(now: Time.now.utc)
108
+ dirname = now.strftime('%Y-%m-%dT%H-%M-%SZ')
109
+ dir = dumps_root.join(dirname)
110
+ FileUtils.mkdir_p(dumps_root)
111
+ Dir.mkdir(dir.to_s)
112
+ dir
113
+ end
114
+
115
+ # Atomically flips the +latest+ pointer to the given dump directory.
116
+ #
117
+ # Uses a temp file + +File.rename+ so a crash mid-flip leaves the previous
118
+ # pointer intact. +dump_dir+ must exist and resolve to a path inside
119
+ # +dumps_root+.
120
+ #
121
+ # @param dump_dir [Pathname, String] completed dump directory to promote
122
+ # @return [void]
123
+ # @raise [ArgumentError] if +dump_dir+ does not exist or is outside +dumps_root+
124
+ def promote(dump_dir)
125
+ target = Pathname.new(dump_dir.to_s)
126
+ root_real = dumps_root.expand_path.to_s
127
+ target_real = target.exist? ? target.realpath.to_s : ''
128
+ # Resolve symlinks on both sides before comparing (handles macOS /tmp → /private/var)
129
+ root_resolved = Pathname.new(root_real).exist? ? Pathname.new(root_real).realpath.to_s : root_real
130
+ unless target.exist? && target_real.start_with?(root_resolved)
131
+ raise ArgumentError,
132
+ 'dump_dir must exist inside dumps_root. ' \
133
+ "Got: #{dump_dir.inspect}, dumps_root: #{dumps_root}"
134
+ end
135
+
136
+ atomic_write(latest_pointer_path, target.basename.to_s)
137
+ end
138
+
139
+ # Atomically writes a resolved config hash as +woods.json+.
140
+ #
141
+ # Accepts either a +ResolvedConfig+ (responds to +#to_snapshot_json+) or
142
+ # a plain +Hash+. When +#to_snapshot_json+ returns a +Hash+, it is
143
+ # serialized to JSON automatically — callers need not pre-serialize.
144
+ #
145
+ # @param resolved_config_hash [#to_snapshot_json, Hash]
146
+ # @return [void]
147
+ def write_config(resolved_config_hash)
148
+ raw = if resolved_config_hash.respond_to?(:to_snapshot_json)
149
+ resolved_config_hash.to_snapshot_json
150
+ else
151
+ resolved_config_hash
152
+ end
153
+ json = raw.is_a?(String) ? raw : JSON.pretty_generate(raw)
154
+ atomic_write(config_path, json)
155
+ end
156
+
157
+ private
158
+
159
+ def atomic_write(path, content)
160
+ FileUtils.mkdir_p(path.dirname)
161
+ tmp = Tempfile.new('.woods-', path.dirname.to_s)
162
+ tmp.write(content)
163
+ tmp.flush
164
+ tmp.fsync
165
+ tmp.close
166
+ File.rename(tmp.path, path.to_s)
167
+ rescue StandardError
168
+ tmp&.close
169
+ tmp&.unlink
170
+ raise
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'rack/utils'
5
+
6
+ module Woods
7
+ module MCP
8
+ # Rack middleware that rejects requests lacking a matching bearer token.
9
+ #
10
+ # Uses Rack::Utils.secure_compare for constant-time comparison to avoid
11
+ # leaking token bytes via response-time side channels.
12
+ class BearerAuth
13
+ UNAUTHORIZED_BODY = { jsonrpc: '2.0', error: { code: -32_001, message: 'Unauthorized' }, id: nil }.to_json.freeze
14
+
15
+ # Bearer tokens shorter than this are rejected at construction time.
16
+ # Matches OWASP "session ID entropy" guidance (>= 128 bits ≈ 32 hex chars).
17
+ MIN_TOKEN_LENGTH = 32
18
+
19
+ def initialize(app, token:)
20
+ raise ArgumentError, 'token must be a non-empty string' if token.nil? || token.empty?
21
+ if token.to_s.length < MIN_TOKEN_LENGTH
22
+ raise ArgumentError,
23
+ "bearer token must be at least #{MIN_TOKEN_LENGTH} characters " \
24
+ "(got #{token.to_s.length}); generate with `SecureRandom.hex(32)`"
25
+ end
26
+
27
+ @app = app
28
+ @token = token.to_s
29
+ end
30
+
31
+ def call(env)
32
+ header = env['HTTP_AUTHORIZATION'].to_s
33
+ presented = header.start_with?('Bearer ') ? header.sub(/\ABearer /, '') : nil
34
+
35
+ if presented && Rack::Utils.secure_compare(@token, presented)
36
+ @app.call(env)
37
+ else
38
+ [401,
39
+ { 'content-type' => 'application/json', 'www-authenticate' => 'Bearer realm="woods-mcp-http"' },
40
+ [UNAUTHORIZED_BODY]]
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end