woods 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +186 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +69 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +210 -0
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +100 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +771 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +163 -0
  102. data/lib/woods/unblocked/document_builder.rb +326 -0
  103. data/lib/woods/unblocked/exporter.rb +201 -0
  104. data/lib/woods/unblocked/rate_limiter.rb +94 -0
  105. data/lib/woods/util/host_guard.rb +61 -0
  106. data/lib/woods/version.rb +1 -1
  107. data/lib/woods.rb +130 -6
  108. metadata +73 -4
@@ -1,16 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'set'
4
+ require_relative 'route_helper_resolver'
5
+ require_relative 'view_engines/base'
6
+ require_relative 'view_engines/erb'
7
+
3
8
  module Woods
4
9
  module Extractors
5
- # ViewTemplateExtractor handles ERB view template extraction.
10
+ # ViewTemplateExtractor orchestrates view-template extraction across
11
+ # per-engine implementations under {ViewEngines}.
6
12
  #
7
- # Scans `app/views/` for `.html.erb` and `.erb` files and produces
8
- # one ExtractedUnit per template. Extracts render calls (partials),
9
- # instance variables, and helper method usage. Links partials via
10
- # dependencies and infers the owning controller from directory structure.
13
+ # For each configured view directory the orchestrator walks every
14
+ # extension any registered engine handles, finds the first engine
15
+ # whose {ViewEngines::Base#handles?} returns true for a given file,
16
+ # and delegates parsing, scanning, and partial-identifier resolution
17
+ # to that engine. The orchestrator itself owns filesystem walking,
18
+ # identifier construction, controller inference, route-helper edge
19
+ # resolution, and dependency assembly.
11
20
  #
12
- # This is an ERB-only MVP HAML, Slim, and layout inheritance
13
- # are not yet supported.
21
+ # Engines are registered via {ENGINES}. Add a new engine by creating
22
+ # a {ViewEngines::Base} subclass and appending it to that list.
14
23
  #
15
24
  # @example
16
25
  # extractor = ViewTemplateExtractor.new
@@ -18,67 +27,55 @@ module Woods
18
27
  # index = units.find { |u| u.identifier == "users/index.html.erb" }
19
28
  #
20
29
  class ViewTemplateExtractor
21
- # Directories to scan for view templates
30
+ include RouteHelperResolver
31
+
32
+ # Directories to scan for view templates.
22
33
  VIEW_DIRECTORIES = %w[
23
34
  app/views
24
35
  ].freeze
25
36
 
26
- # Common Rails view helper methods to detect
27
- COMMON_HELPERS = %w[
28
- link_to
29
- button_to
30
- form_for
31
- form_with
32
- form_tag
33
- image_tag
34
- stylesheet_link_tag
35
- javascript_include_tag
36
- content_for
37
- yield
38
- render
39
- redirect_to
40
- truncate
41
- pluralize
42
- number_to_currency
43
- number_to_percentage
44
- number_with_delimiter
45
- time_ago_in_words
46
- distance_of_time_in_words
47
- simple_format
48
- sanitize
49
- raw
50
- safe_join
51
- content_tag
52
- tag
53
- mail_to
54
- url_for
55
- asset_path
56
- asset_url
57
- ].freeze
37
+ # Registered view-template engines, in precedence order. The first
38
+ # engine whose {ViewEngines::Base#handles?} returns true for a file
39
+ # wins — place more specific engines before more general ones if
40
+ # overlap is ever introduced.
41
+ ENGINES = [ViewEngines::Erb].freeze
42
+
43
+ # Template engine names the extraction pipeline currently
44
+ # understands — aggregated from {ENGINES} so the list stays honest
45
+ # as engines are added or removed. Surfaced through the MCP
46
+ # `structure` tool.
47
+ #
48
+ # @return [Array<Symbol>]
49
+ def self.supported_template_engines
50
+ ENGINES.map { |klass| klass.new.name }.uniq.freeze
51
+ end
58
52
 
59
53
  def initialize
60
54
  @directories = VIEW_DIRECTORIES.map { |d| Rails.root.join(d) }
61
55
  .select(&:directory?)
56
+ @engines = self.class::ENGINES.map(&:new)
57
+ build_route_helper_map
62
58
  end
63
59
 
64
- # Extract all ERB view templates
60
+ # Extract all view templates across the registered engines.
65
61
  #
66
62
  # @return [Array<ExtractedUnit>] List of view template units
67
63
  def extract_all
64
+ extensions = @engines.flat_map(&:extensions).uniq
68
65
  @directories.flat_map do |dir|
69
- erb_files = Dir[dir.join('**/*.html.erb')] + Dir[dir.join('**/*.erb')]
70
- erb_files.uniq.filter_map do |file|
71
- extract_view_template_file(file)
72
- end
66
+ files = extensions.flat_map { |ext| Dir[dir.join("**/*#{ext}")] }
67
+ files.uniq.filter_map { |file| extract_view_template_file(file) }
73
68
  end
74
69
  end
75
70
 
76
- # Extract a single view template file
71
+ # Extract a single view template file.
77
72
  #
78
- # @param file_path [String] Path to the ERB template file
79
- # @return [ExtractedUnit, nil] The extracted unit or nil if not ERB
73
+ # @param file_path [String] Path to the template file
74
+ # @return [ExtractedUnit, nil] The extracted unit, or nil if no
75
+ # registered engine handles the file
80
76
  def extract_view_template_file(file_path)
81
- return nil unless file_path.end_with?('.erb')
77
+ engine = engine_for(file_path)
78
+ return nil unless engine
82
79
 
83
80
  source = File.read(file_path)
84
81
  identifier = build_identifier(file_path)
@@ -92,8 +89,9 @@ module Woods
92
89
 
93
90
  unit.namespace = namespace
94
91
  unit.source_code = source
95
- unit.metadata = build_metadata(source, file_path)
96
- unit.dependencies = build_dependencies(source, file_path, identifier)
92
+ partials = engine.scan_partials(source)
93
+ unit.metadata = build_metadata(engine, source, file_path, partials)
94
+ unit.dependencies = build_dependencies(engine, source, file_path, identifier, partials)
97
95
 
98
96
  unit
99
97
  rescue StandardError => e
@@ -103,6 +101,14 @@ module Woods
103
101
 
104
102
  private
105
103
 
104
+ # Find the registered engine that handles the given file, if any.
105
+ #
106
+ # @param file_path [String]
107
+ # @return [ViewEngines::Base, nil]
108
+ def engine_for(file_path)
109
+ @engines.find { |e| e.handles?(file_path) }
110
+ end
111
+
106
112
  # Build a readable identifier from the file path.
107
113
  #
108
114
  # @param file_path [String] Absolute path to the template
@@ -124,16 +130,18 @@ module Woods
124
130
 
125
131
  # Build metadata hash for the template.
126
132
  #
133
+ # @param engine [ViewEngines::Base] Engine that matched this file
127
134
  # @param source [String] Template source code
128
135
  # @param file_path [String] Path to the template
136
+ # @param partials [Array<String>] Pre-extracted partial names
129
137
  # @return [Hash]
130
- def build_metadata(source, file_path)
138
+ def build_metadata(engine, source, file_path, partials)
131
139
  {
132
- template_engine: 'erb',
140
+ template_engine: engine.name.to_s,
133
141
  is_partial: partial?(file_path),
134
- partials_rendered: extract_rendered_partials(source),
135
- instance_variables: extract_instance_variables(source),
136
- helpers_called: extract_helpers(source),
142
+ partials_rendered: partials,
143
+ instance_variables: engine.scan_instance_variables(source),
144
+ helpers_called: engine.scan_helpers(source),
137
145
  loc: source.lines.count { |l| l.strip.length.positive? }
138
146
  }
139
147
  end
@@ -146,98 +154,51 @@ module Woods
146
154
  File.basename(file_path).start_with?('_')
147
155
  end
148
156
 
149
- # Extract partial names from render calls.
150
- #
151
- # Matches:
152
- # - render partial: 'foo/bar'
153
- # - render 'foo/bar'
154
- # - render :foo
155
- #
156
- # @param source [String] Template source code
157
- # @return [Array<String>] Partial names
158
- def extract_rendered_partials(source)
159
- partials = Set.new
160
-
161
- # render partial: 'path/to/partial'
162
- source.scan(/render\s+partial:\s*['"]([^'"]+)['"]/).each do |match|
163
- partials << match[0]
164
- end
165
-
166
- # render 'path/to/partial' (string without keyword)
167
- source.scan(/render\s+['"]([^'"]+)['"]/).each do |match|
168
- partials << match[0]
169
- end
170
-
171
- # render :symbol
172
- source.scan(/render\s+:(\w+)/).each do |match|
173
- partials << match[0]
174
- end
175
-
176
- partials.to_a
177
- end
178
-
179
- # Extract instance variables used in the template.
180
- #
181
- # @param source [String] Template source code
182
- # @return [Array<String>] Instance variable names
183
- def extract_instance_variables(source)
184
- source.scan(/@[a-zA-Z_]\w*/).uniq.sort
185
- end
186
-
187
- # Extract common Rails helper calls from the template.
188
- #
189
- # @param source [String] Template source code
190
- # @return [Array<String>] Helper method names
191
- def extract_helpers(source)
192
- found = Set.new
193
- COMMON_HELPERS.each do |helper|
194
- found << helper if source.match?(/\b#{Regexp.escape(helper)}\b/)
195
- end
196
- found.to_a.sort
197
- end
198
-
199
157
  # Build dependencies for the template.
200
158
  #
159
+ # @param engine [ViewEngines::Base] Engine that matched this file
201
160
  # @param source [String] Template source code
202
161
  # @param file_path [String] Path to the template
203
162
  # @param identifier [String] Template identifier
163
+ # @param partials [Array<String>] Pre-extracted partial names
204
164
  # @return [Array<Hash>]
205
- def build_dependencies(source, file_path, identifier)
165
+ def build_dependencies(engine, source, file_path, identifier, partials)
206
166
  deps = []
207
167
 
208
- # Rendered partials
209
- extract_rendered_partials(source).each do |partial_name|
210
- partial_identifier = resolve_partial_identifier(partial_name, identifier)
168
+ partials.each do |partial_name|
169
+ partial_identifier = engine.resolve_partial_identifier(partial_name, identifier)
211
170
  deps << { type: :view_template, target: partial_identifier, via: :render }
212
171
  end
213
172
 
214
- # Inferred controller
215
173
  controller = infer_controller(file_path)
216
174
  deps << { type: :controller, target: controller, via: :view_render } if controller
217
175
 
218
- deps
176
+ deps.concat(resolve_navigation_candidates(engine, source))
177
+
178
+ deps.uniq { |d| [d[:type], d[:target], d[:via]] }
219
179
  end
220
180
 
221
- # Resolve a partial name to its file identifier.
181
+ # Ask the engine for route-helper candidates and resolve each to a
182
+ # controller target via {RouteHelperResolver}. Gated by
183
+ # +Woods.configuration.extract_navigation_edges+ so the config
184
+ # toggle still applies.
222
185
  #
223
- # Given a render call like `render 'comments/comment'`, resolves to
224
- # `comments/_comment.html.erb`.
225
- #
226
- # @param partial_name [String] The partial name from the render call
227
- # @param current_identifier [String] The current template's identifier
228
- # @return [String] Resolved partial identifier
229
- def resolve_partial_identifier(partial_name, current_identifier)
230
- if partial_name.include?('/')
231
- dir = File.dirname(partial_name)
232
- base = File.basename(partial_name)
233
- "#{dir}/_#{base}.html.erb"
234
- else
235
- dir = File.dirname(current_identifier)
236
- if dir == '.'
237
- "_#{partial_name}.html.erb"
238
- else
239
- "#{dir}/_#{partial_name}.html.erb"
240
- end
186
+ # @param engine [ViewEngines::Base]
187
+ # @param source [String]
188
+ # @return [Array<Hash>]
189
+ def resolve_navigation_candidates(engine, source)
190
+ return [] unless Woods.configuration&.extract_navigation_edges
191
+
192
+ seen = Set.new
193
+ engine.scan_navigation_candidates(source).filter_map do |cand|
194
+ resolved = resolve_route_helper(cand[:helper])
195
+ next unless resolved
196
+
197
+ key = [resolved[:controller], cand[:via]]
198
+ next if seen.include?(key)
199
+
200
+ seen.add(key)
201
+ { type: :controller, target: resolved[:controller], via: cand[:via] }
241
202
  end
242
203
  end
243
204
 
@@ -248,8 +209,6 @@ module Woods
248
209
  def infer_controller(file_path)
249
210
  namespace = extract_view_namespace(file_path)
250
211
  return nil unless namespace
251
-
252
- # Skip layout-only directories
253
212
  return nil if namespace == 'layouts'
254
213
 
255
214
  parts = namespace.split('/')
@@ -253,7 +253,11 @@ module Woods
253
253
 
254
254
  filenames.each do |filename|
255
255
  Dir[File.join(@extracted_dir, '*', filename)].each do |path|
256
- return JSON.parse(File.read(path), symbolize_names: true)
256
+ # Force UTF-8: the extractor writes the routes-comment header in
257
+ # source_code using Unicode box-drawing characters; reading under
258
+ # the platform default (US-ASCII on some CIs) raises
259
+ # InvalidByteSequenceError before JSON parsing.
260
+ return JSON.parse(File.read(path, encoding: 'UTF-8'), symbolize_names: true)
257
261
  rescue JSON::ParserError
258
262
  next
259
263
  end
@@ -263,28 +267,32 @@ module Woods
263
267
  end
264
268
 
265
269
  # Extract route information from controller metadata.
270
+ #
271
+ # Handles two on-disk shapes:
272
+ # - Hash keyed by action (what ControllerExtractor writes):
273
+ # { "create" => [{ verb:, path:, ... }, ...] }
274
+ # - Array of route hashes (older / test fixture shape):
275
+ # [{ action:, verb:, path: }, ...]
266
276
  def extract_route(entry_point)
267
277
  unit_id, method_name = parse_identifier(entry_point)
268
278
  unit_data = load_unit(unit_id)
269
279
  return nil unless unit_data
270
280
 
271
281
  metadata = unit_data[:metadata] || {}
272
- routes = metadata[:routes]
273
- return nil unless routes.is_a?(Array)
274
-
275
- # Find route matching the method name
276
- route = if method_name
277
- routes.find { |r| r[:action]&.to_s == method_name }
278
- else
279
- routes.first
280
- end
281
-
282
+ route = resolve_route_entry(metadata[:routes], method_name)
282
283
  return nil unless route
283
284
 
284
- {
285
- verb: route[:verb],
286
- path: route[:path]
287
- }
285
+ { verb: route[:verb], path: route[:path] }
286
+ end
287
+
288
+ def resolve_route_entry(routes, method_name)
289
+ case routes
290
+ when Hash
291
+ action_routes = method_name ? routes[method_name.to_s] || routes[method_name.to_sym] : routes.values.first
292
+ Array(action_routes).first
293
+ when Array
294
+ method_name ? routes.find { |r| r[:action]&.to_s == method_name } : routes.first
295
+ end
288
296
  end
289
297
  end
290
298
  end
@@ -83,7 +83,7 @@ module Woods
83
83
  filename = "#{controller_id.gsub('::', '__')}_#{action}.json"
84
84
  flow_path = File.join(@flows_dir, filename)
85
85
 
86
- File.write(flow_path, JSON.pretty_generate(flow.to_h))
86
+ File.write(flow_path, canonical_json(flow.to_h))
87
87
 
88
88
  flow_path
89
89
  rescue StandardError => e
@@ -96,7 +96,26 @@ module Woods
96
96
  # @param flow_map [Hash{String => String}]
97
97
  def write_flow_index(flow_map)
98
98
  index_path = File.join(@flows_dir, 'flow_index.json')
99
- File.write(index_path, JSON.pretty_generate(flow_map))
99
+ File.write(index_path, canonical_json(flow_map))
100
+ end
101
+
102
+ # Emit deterministic pretty JSON — keys recursively sorted so two runs
103
+ # over identical input produce byte-identical output. Without this,
104
+ # diff-based tooling (snapshot review, flow-change detection) flags
105
+ # spurious churn from incidental key-order differences in `flow.to_h`.
106
+ #
107
+ # @param value [Hash, Array, Object]
108
+ # @return [String]
109
+ def canonical_json(value)
110
+ JSON.pretty_generate(sort_keys_deep(value))
111
+ end
112
+
113
+ def sort_keys_deep(value)
114
+ case value
115
+ when Hash then value.keys.sort_by(&:to_s).to_h { |k| [k, sort_keys_deep(value[k])] }
116
+ when Array then value.map { |v| sort_keys_deep(v) }
117
+ else value
118
+ end
100
119
  end
101
120
  end
102
121
  end
@@ -154,6 +154,52 @@ module Woods
154
154
  end
155
155
  end
156
156
 
157
+ # Group units into semantic domains using namespace prefixes and graph connectivity.
158
+ #
159
+ # Strategy:
160
+ # 1. Seed clusters from top-level namespace prefixes (e.g., ShippingProfile::*, Order::*)
161
+ # 2. Assign unnamespaced units to their most-connected cluster
162
+ # 3. Merge small clusters (< min_size) into their most-connected neighbor
163
+ # 4. For each cluster, identify the hub (highest PageRank) and entry points
164
+ # 5. Compute boundary edges between clusters
165
+ #
166
+ # @param min_size [Integer] Minimum units per cluster before merging (default: 3)
167
+ # @param types [Array<String>, nil] Filter to these unit types (default: all)
168
+ # @return [Array<Hash>] Clusters sorted by member count descending.
169
+ # Each hash: { name:, hub:, members:, member_count:, entry_points:, boundary_edges:, types: }
170
+ def domain_clusters(min_size: 3, types: nil)
171
+ nodes = graph_nodes
172
+ return [] if nodes.empty?
173
+
174
+ # Filter by types if specified
175
+ filtered_ids = if types
176
+ type_set = types.map(&:to_s)
177
+ nodes.select { |_, meta| type_set.include?(meta[:type].to_s) }.keys
178
+ else
179
+ nodes.keys
180
+ end
181
+
182
+ return [] if filtered_ids.empty?
183
+
184
+ # Step 1: Seed clusters from namespace prefixes
185
+ clusters = seed_namespace_clusters(filtered_ids, nodes)
186
+
187
+ # Step 2: Assign unnamespaced/root units to most-connected cluster
188
+ assign_orphaned_units(clusters, filtered_ids, nodes)
189
+
190
+ # Step 3: Merge small clusters
191
+ merge_small_clusters(clusters, min_size)
192
+
193
+ # Step 4: Enrich each cluster with hub, entry points, boundary edges
194
+ pagerank_scores = @graph.pagerank
195
+ enrich_clusters(clusters, nodes, pagerank_scores)
196
+
197
+ # Sort by member count descending
198
+ clusters.values
199
+ .select { |c| c[:members].any? }
200
+ .sort_by { |c| -c[:member_count] }
201
+ end
202
+
157
203
  # Full analysis report combining all structural metrics.
158
204
  #
159
205
  # @return [Hash] Complete analysis with :orphans, :dead_ends, :hubs,
@@ -182,6 +228,170 @@ module Woods
182
228
 
183
229
  private
184
230
 
231
+ # ──────────────────────────────────────────────────────────────────────
232
+ # Domain Cluster Helpers
233
+ # ──────────────────────────────────────────────────────────────────────
234
+
235
+ # Extract the top-level namespace prefix for clustering.
236
+ # "ShippingProfile::Setting" => "ShippingProfile"
237
+ # "Order::Transactions::Refund" => "Order"
238
+ # "Account" => nil (no namespace)
239
+ def cluster_prefix(identifier)
240
+ parts = identifier.to_s.split('::')
241
+ parts.size > 1 ? parts.first : nil
242
+ end
243
+
244
+ # Seed initial clusters from namespace prefixes.
245
+ def seed_namespace_clusters(filtered_ids, _nodes)
246
+ clusters = {}
247
+
248
+ filtered_ids.each do |id|
249
+ prefix = cluster_prefix(id)
250
+ next unless prefix
251
+
252
+ clusters[prefix] ||= { name: prefix, members: [], member_set: Set.new }
253
+ clusters[prefix][:members] << id
254
+ clusters[prefix][:member_set].add(id)
255
+ end
256
+
257
+ clusters
258
+ end
259
+
260
+ # Assign units with no namespace prefix to their most-connected cluster.
261
+ def assign_orphaned_units(clusters, filtered_ids, _nodes)
262
+ return if clusters.empty?
263
+
264
+ unassigned = filtered_ids.select { |id| cluster_prefix(id).nil? }
265
+
266
+ unassigned.each do |id|
267
+ best_cluster = find_most_connected_cluster(id, clusters)
268
+ next unless best_cluster
269
+
270
+ clusters[best_cluster][:members] << id
271
+ clusters[best_cluster][:member_set].add(id)
272
+ end
273
+ end
274
+
275
+ # Find which cluster a unit has the most connections to.
276
+ def find_most_connected_cluster(identifier, clusters)
277
+ connections = Hash.new(0)
278
+
279
+ # Check forward edges (dependencies)
280
+ @graph.dependencies_of(identifier).each do |dep|
281
+ clusters.each do |name, cluster|
282
+ connections[name] += 1 if cluster[:member_set].include?(dep)
283
+ end
284
+ end
285
+
286
+ # Check reverse edges (dependents)
287
+ @graph.dependents_of(identifier).each do |dep|
288
+ clusters.each do |name, cluster|
289
+ connections[name] += 1 if cluster[:member_set].include?(dep)
290
+ end
291
+ end
292
+
293
+ return nil if connections.empty?
294
+
295
+ connections.max_by { |_, count| count }.first
296
+ end
297
+
298
+ # Merge clusters smaller than min_size into their most-connected neighbor.
299
+ def merge_small_clusters(clusters, min_size)
300
+ loop do
301
+ small = clusters.select { |_, c| c[:members].size < min_size }
302
+ break if small.empty?
303
+
304
+ # Merge the smallest cluster first
305
+ name, cluster = small.min_by { |_, c| c[:members].size }
306
+
307
+ # Find which other cluster this one connects to most
308
+ target = find_merge_target(cluster, clusters, name)
309
+
310
+ break unless target
311
+
312
+ clusters[target][:members].concat(cluster[:members])
313
+ cluster[:members].each { |id| clusters[target][:member_set].add(id) }
314
+ clusters.delete(name)
315
+ end
316
+ end
317
+
318
+ # Find the best cluster to merge into (most cross-cluster edges).
319
+ def find_merge_target(cluster, all_clusters, exclude_name)
320
+ connections = Hash.new(0)
321
+
322
+ cluster[:members].each do |id|
323
+ (@graph.dependencies_of(id) + @graph.dependents_of(id)).each do |connected|
324
+ all_clusters.each do |name, other|
325
+ next if name == exclude_name
326
+
327
+ connections[name] += 1 if other[:member_set].include?(connected)
328
+ end
329
+ end
330
+ end
331
+
332
+ return nil if connections.empty?
333
+
334
+ connections.max_by { |_, count| count }.first
335
+ end
336
+
337
+ # Enrich clusters with hub, entry points, boundary edges, and type breakdown.
338
+ def enrich_clusters(clusters, nodes, pagerank_scores)
339
+ clusters.each_value do |cluster|
340
+ members = cluster[:members]
341
+ member_set = cluster[:member_set]
342
+
343
+ # Hub: highest PageRank within the cluster
344
+ hub_id = members.max_by { |id| pagerank_scores[id] || 0 }
345
+ cluster[:hub] = hub_id
346
+
347
+ # Entry points: controllers and GraphQL resolvers in the cluster's dependents
348
+ entry_types = %w[controller graphql_resolver graphql_mutation graphql_query]
349
+ entry_points = Set.new
350
+ members.each do |id|
351
+ @graph.dependents_of(id).each do |dep|
352
+ meta = nodes[dep]
353
+ entry_points.add(dep) if meta && entry_types.include?(meta[:type].to_s)
354
+ end
355
+ end
356
+ cluster[:entry_points] = entry_points.to_a
357
+
358
+ # Boundary edges: connections that cross cluster boundaries
359
+ boundary = []
360
+ members.each do |id|
361
+ @graph.dependencies_of(id).each do |dep|
362
+ next if member_set.include?(dep)
363
+
364
+ dep_meta = nodes[dep]
365
+ next unless dep_meta
366
+
367
+ boundary << { from: id, to: dep, via: 'dependency' }
368
+ end
369
+
370
+ @graph.dependents_of(id).each do |dep|
371
+ next if member_set.include?(dep)
372
+
373
+ dep_meta = nodes[dep]
374
+ next unless dep_meta
375
+
376
+ boundary << { from: dep, to: id, via: 'dependent' }
377
+ end
378
+ end
379
+ # Deduplicate and limit boundary edges
380
+ cluster[:boundary_edges] = boundary.uniq { |e| [e[:from], e[:to]] }.first(20)
381
+
382
+ # Type breakdown
383
+ type_counts = members.each_with_object(Hash.new(0)) do |id, counts|
384
+ meta = nodes[id]
385
+ counts[meta[:type].to_s] += 1 if meta
386
+ end
387
+ cluster[:types] = type_counts
388
+
389
+ # Final shape
390
+ cluster[:member_count] = members.size
391
+ cluster.delete(:member_set) # Internal tracking, not part of output
392
+ end
393
+ end
394
+
185
395
  # ──────────────────────────────────────────────────────────────────────
186
396
  # Graph Accessors
187
397
  # ──────────────────────────────────────────────────────────────────────